feat(timeout): add agent timeout handling #141

Merged
shoko merged 1 commits from feat/agent-timeout into main 2026-04-05 06:59:05 +02:00

View File

@@ -25,6 +25,7 @@ QUEUE_DAEMON_LOG_FILE="${QUEUE_DAEMON_LOG_FILE:-$QUEUE_DIR/daemon.log}"
QUEUE_DAEMON_INTERVAL_MINUTES="${QUEUE_DAEMON_INTERVAL_MINUTES:-5}" QUEUE_DAEMON_INTERVAL_MINUTES="${QUEUE_DAEMON_INTERVAL_MINUTES:-5}"
QUEUE_DAEMON_BATCH_SIZE="${QUEUE_DAEMON_BATCH_SIZE:-2}" QUEUE_DAEMON_BATCH_SIZE="${QUEUE_DAEMON_BATCH_SIZE:-2}"
QUEUE_CLEANUP_AGE_DAYS="${QUEUE_CLEANUP_AGE_DAYS:-7}" QUEUE_CLEANUP_AGE_DAYS="${QUEUE_CLEANUP_AGE_DAYS:-7}"
TASK_TIMEOUT_HOURS="${TASK_TIMEOUT_HOURS:-1}"
# Load user config overrides (~/.kugetsu/config) # Load user config overrides (~/.kugetsu/config)
if [ -f "$KUGETSU_DIR/config" ]; then if [ -f "$KUGETSU_DIR/config" ]; then
@@ -576,6 +577,8 @@ get_queue_stats() {
update_queue_item_state() { update_queue_item_state() {
local queue_id="$1" local queue_id="$1"
local new_state="$2" local new_state="$2"
local session_id="${3:-}"
local pid="${4:-}"
local item_file="$QUEUE_ITEMS_DIR/${queue_id}.json" local item_file="$QUEUE_ITEMS_DIR/${queue_id}.json"
if [ ! -f "$item_file" ]; then if [ ! -f "$item_file" ]; then
@@ -589,6 +592,8 @@ from datetime import datetime
item_file = "$item_file" item_file = "$item_file"
new_state = "$new_state" new_state = "$new_state"
session_id = "$session_id"
pid = "$pid"
with open(item_file, 'r') as f: with open(item_file, 'r') as f:
item = json.load(f) item = json.load(f)
@@ -597,6 +602,10 @@ item['state'] = new_state
if new_state == "notified": if new_state == "notified":
item['notified_at'] = datetime.now().isoformat() + "Z" item['notified_at'] = datetime.now().isoformat() + "Z"
if session_id:
item['opencode_session_id'] = session_id
if pid:
item['pid'] = int(pid) if pid.isdigit() else None
elif new_state == "completed": elif new_state == "completed":
item['completed_at'] = datetime.now().isoformat() + "Z" item['completed_at'] = datetime.now().isoformat() + "Z"
elif new_state == "error": elif new_state == "error":
@@ -609,6 +618,83 @@ print(f"Updated $queue_id to state: $new_state")
PYEOF PYEOF
} }
check_task_timeouts() {
if [ ! -d "$QUEUE_ITEMS_DIR" ]; then
return
fi
local timeout_hours="${TASK_TIMEOUT_HOURS:-1}"
for item in "$QUEUE_ITEMS_DIR"/*.json; do
[ -f "$item" ] || continue
local state=$(python3 -c "import json; print(json.load(open('$item')).get('state', ''))" 2>/dev/null)
if [ "$state" != "notified" ]; then
continue
fi
local notified_at=$(python3 -c "import json; print(json.load(open('$item')).get('notified_at', ''))" 2>/dev/null)
if [ -z "$notified_at" ]; then
continue
fi
local queue_id=$(basename "$item" .json)
local pid=$(python3 -c "import json; print(json.load(open('$item')).get('pid', ''))" 2>/dev/null)
local session_id=$(python3 -c "import json; print(json.load(open('$item')).get('opencode_session_id', ''))" 2>/dev/null)
local notified_epoch=$(date -d "$notified_at" +%s 2>/dev/null || echo "0")
local now_epoch=$(date +%s)
local hours_elapsed=$(( (now_epoch - notified_epoch) / 3600 ))
if [ "$hours_elapsed" -ge "$timeout_hours" ]; then
echo "Task $queue_id timed out after ${hours_elapsed}h (limit: ${timeout_hours}h)"
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
echo "Killing process $pid"
kill "$pid" 2>/dev/null || true
fi
if [ -n "$session_id" ]; then
local worktree_path=""
for session_file in "$SESSIONS_DIR"/*.json; do
[ -f "$session_file" ] || continue
local sess_id=$(python3 -c "import json; print(json.load(open('$session_file')).get('opencode_session_id', ''))" 2>/dev/null)
if [ "$sess_id" = "$session_id" ]; then
worktree_path=$(python3 -c "import json; print(json.load(open('$session_file')).get('worktree_path', ''))" 2>/dev/null)
break
fi
done
if [ -n "$worktree_path" ]; then
pkill -f "opencode.*$worktree_path" 2>/dev/null || true
fi
fi
update_queue_item_state "$queue_id" "error"
local issue_ref=$(python3 -c "import json; print(json.load(open('$item')).get('issue_ref', ''))" 2>/dev/null)
if [ -n "$issue_ref" ]; then
local session_file=$(get_session_for_issue "$issue_ref")
if [ -n "$session_file" ] && [ "$session_file" != "null" ]; then
python3 << PYEOF
import json
session_path = "$SESSIONS_DIR/$session_file"
try:
with open(session_path, 'r') as f:
session = json.load(f)
session['state'] = 'timeout'
with open(session_path, 'w') as f:
json.dump(session, f, indent=2)
print(f"Marked session for $issue_ref as timeout")
except Exception as e:
print(f"Error marking session: {e}")
PYEOF
fi
fi
fi
done
}
cleanup_old_queue_items() { cleanup_old_queue_items() {
local days="${QUEUE_CLEANUP_AGE_DAYS:-7}" local days="${QUEUE_CLEANUP_AGE_DAYS:-7}"
@@ -1267,6 +1353,7 @@ queue_daemon_loop() {
exit 0 exit 0
fi fi
check_task_timeouts
process_queue process_queue
done done
} }
@@ -1323,8 +1410,11 @@ process_queue() {
env_sh="${env_sh}set +a; " env_sh="${env_sh}set +a; "
nohup sh -c "${env_sh}opencode run 'Delegate task: ${message}' --continue --session '$pm_session'" >> "$log_file" 2>&1 & nohup sh -c "${env_sh}opencode run 'Delegate task: ${message}' --continue --session '$pm_session'" >> "$log_file" 2>&1 &
local fork_pid=$!
echo "Queued task $queue_id for PM agent" update_queue_item_state "$queue_id" "notified" "" "$fork_pid"
echo "Queued task $queue_id for PM agent (PID: $fork_pid)"
count=$((count + 1)) count=$((count + 1))
done done
} }