From 0fa778353ba6a56c57255ec4a7603db477807e73 Mon Sep 17 00:00:00 2001 From: shokollm <270575765+shokollm@users.noreply.github.com> Date: Sun, 5 Apr 2026 04:53:27 +0000 Subject: [PATCH] feat(timeout): add agent timeout handling Implements #137 - Agent timeout handling. Changes: - Add TASK_TIMEOUT_HOURS config (default: 1 hour) - Update queue item to track opencode_session_id and pid - Add check_task_timeouts() function that: - Checks notified tasks against timeout threshold - Kills process if exceeded - Marks session as 'timeout' state - Integrate timeout check into queue daemon loop Timeout behavior: - Task is marked 'notified' when PM receives it - If not completed within TASK_TIMEOUT_HOURS, task is killed - Queue item marked 'error', session marked 'timeout' --- skills/kugetsu/scripts/kugetsu | 92 +++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/skills/kugetsu/scripts/kugetsu b/skills/kugetsu/scripts/kugetsu index 6f337e1..816a2b3 100755 --- a/skills/kugetsu/scripts/kugetsu +++ b/skills/kugetsu/scripts/kugetsu @@ -25,6 +25,7 @@ QUEUE_DAEMON_LOG_FILE="${QUEUE_DAEMON_LOG_FILE:-$QUEUE_DIR/daemon.log}" QUEUE_DAEMON_INTERVAL_MINUTES="${QUEUE_DAEMON_INTERVAL_MINUTES:-5}" QUEUE_DAEMON_BATCH_SIZE="${QUEUE_DAEMON_BATCH_SIZE:-2}" QUEUE_CLEANUP_AGE_DAYS="${QUEUE_CLEANUP_AGE_DAYS:-7}" +TASK_TIMEOUT_HOURS="${TASK_TIMEOUT_HOURS:-1}" # Load user config overrides (~/.kugetsu/config) if [ -f "$KUGETSU_DIR/config" ]; then @@ -576,6 +577,8 @@ get_queue_stats() { update_queue_item_state() { local queue_id="$1" local new_state="$2" + local session_id="${3:-}" + local pid="${4:-}" local item_file="$QUEUE_ITEMS_DIR/${queue_id}.json" if [ ! -f "$item_file" ]; then @@ -589,6 +592,8 @@ from datetime import datetime item_file = "$item_file" new_state = "$new_state" +session_id = "$session_id" +pid = "$pid" with open(item_file, 'r') as f: item = json.load(f) @@ -597,6 +602,10 @@ item['state'] = new_state if new_state == "notified": item['notified_at'] = datetime.now().isoformat() + "Z" + if session_id: + item['opencode_session_id'] = session_id + if pid: + item['pid'] = int(pid) if pid.isdigit() else None elif new_state == "completed": item['completed_at'] = datetime.now().isoformat() + "Z" elif new_state == "error": @@ -609,6 +618,83 @@ print(f"Updated $queue_id to state: $new_state") PYEOF } +check_task_timeouts() { + if [ ! -d "$QUEUE_ITEMS_DIR" ]; then + return + fi + + local timeout_hours="${TASK_TIMEOUT_HOURS:-1}" + + for item in "$QUEUE_ITEMS_DIR"/*.json; do + [ -f "$item" ] || continue + + local state=$(python3 -c "import json; print(json.load(open('$item')).get('state', ''))" 2>/dev/null) + if [ "$state" != "notified" ]; then + continue + fi + + local notified_at=$(python3 -c "import json; print(json.load(open('$item')).get('notified_at', ''))" 2>/dev/null) + if [ -z "$notified_at" ]; then + continue + fi + + local queue_id=$(basename "$item" .json) + local pid=$(python3 -c "import json; print(json.load(open('$item')).get('pid', ''))" 2>/dev/null) + local session_id=$(python3 -c "import json; print(json.load(open('$item')).get('opencode_session_id', ''))" 2>/dev/null) + + local notified_epoch=$(date -d "$notified_at" +%s 2>/dev/null || echo "0") + local now_epoch=$(date +%s) + local hours_elapsed=$(( (now_epoch - notified_epoch) / 3600 )) + + if [ "$hours_elapsed" -ge "$timeout_hours" ]; then + echo "Task $queue_id timed out after ${hours_elapsed}h (limit: ${timeout_hours}h)" + + if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then + echo "Killing process $pid" + kill "$pid" 2>/dev/null || true + fi + + if [ -n "$session_id" ]; then + local worktree_path="" + for session_file in "$SESSIONS_DIR"/*.json; do + [ -f "$session_file" ] || continue + local sess_id=$(python3 -c "import json; print(json.load(open('$session_file')).get('opencode_session_id', ''))" 2>/dev/null) + if [ "$sess_id" = "$session_id" ]; then + worktree_path=$(python3 -c "import json; print(json.load(open('$session_file')).get('worktree_path', ''))" 2>/dev/null) + break + fi + done + + if [ -n "$worktree_path" ]; then + pkill -f "opencode.*$worktree_path" 2>/dev/null || true + fi + fi + + update_queue_item_state "$queue_id" "error" + + local issue_ref=$(python3 -c "import json; print(json.load(open('$item')).get('issue_ref', ''))" 2>/dev/null) + if [ -n "$issue_ref" ]; then + local session_file=$(get_session_for_issue "$issue_ref") + if [ -n "$session_file" ] && [ "$session_file" != "null" ]; then + python3 << PYEOF +import json +session_path = "$SESSIONS_DIR/$session_file" +try: + with open(session_path, 'r') as f: + session = json.load(f) + session['state'] = 'timeout' + with open(session_path, 'w') as f: + json.dump(session, f, indent=2) + print(f"Marked session for $issue_ref as timeout") +except Exception as e: + print(f"Error marking session: {e}") +PYEOF + fi + fi + fi + done +} + cleanup_old_queue_items() { local days="${QUEUE_CLEANUP_AGE_DAYS:-7}" @@ -1267,6 +1353,7 @@ queue_daemon_loop() { exit 0 fi + check_task_timeouts process_queue done } @@ -1323,8 +1410,11 @@ process_queue() { env_sh="${env_sh}set +a; " nohup sh -c "${env_sh}opencode run 'Delegate task: ${message}' --continue --session '$pm_session'" >> "$log_file" 2>&1 & + local fork_pid=$! - echo "Queued task $queue_id for PM agent" + update_queue_item_state "$queue_id" "notified" "" "$fork_pid" + + echo "Queued task $queue_id for PM agent (PID: $fork_pid)" count=$((count + 1)) done } -- 2.49.1