kugetsu/tools/parallel-capacity-test/run_test.sh

#!/bin/bash
# Parallel Capacity Test Tool for Hermes/OpenCode
# Tests concurrent agent capacity by spawning N parallel opencode run tasks

set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
RESULTS_DIR="${SCRIPT_DIR}/results"
TEMP_WORKDIR="${SCRIPT_DIR}/workdir"

# Configuration
MAX_AGENTS=${MAX_AGENTS:-15}
STEP=${STEP:-1}
TASK_TIMEOUT=${TASK_TIMEOUT:-120}
REPORT_FILE="${RESULTS_DIR}/report_$(date +%Y%m%d_%H%M%S).json"
CSV_FILE="${RESULTS_DIR}/results_$(date +%Y%m%d_%H%M%S).csv"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }

setup() {
    mkdir -p "${RESULTS_DIR}"
    mkdir -p "${TEMP_WORKDIR}"
    log_info "Results will be saved to: ${RESULTS_DIR}"
}

cleanup() {
    log_info "Cleaning up background processes..."
    pkill -f "opencode run" 2>/dev/null || true
    rm -rf "${TEMP_WORKDIR}"/* 2>/dev/null || true
}

# Simple test task that all agents will run
get_test_task() {
    cat << 'TASK'
Respond with exactly: PARALLEL_TEST_OK
TASK
}

# Run a single opencode run task and measure its execution
run_single_agent() {
    local agent_id=$1
    local workdir="${TEMP_WORKDIR}/agent_${agent_id}"
    local output_file="${workdir}/output.txt"
    local start_time=$2

    mkdir -p "${workdir}"

    # Run opencode and capture timing
    local exec_start=$(date +%s.%N)

    timeout ${TASK_TIMEOUT} opencode run "$(get_test_task)" --workdir "${workdir}" 2>&1 | tee "${output_file}" &
    local pid=$!

    echo "${pid}" > "${workdir}/pid"

    # Wait for completion and capture end time
    wait ${pid} 2>/dev/null || true
    local exec_end=$(date +%s.%N)

    # Calculate duration
    local duration=$(echo "${exec_end} - ${exec_start}" | bc 2>/dev/null || echo "0")

    # Check if task succeeded
    local status="failed"
    if grep -q "PARALLEL_TEST_OK" "${output_file}" 2>/dev/null; then
        status="success"
    fi

    echo "${agent_id},${duration},${status}" >> "${RESULTS_DIR}/agent_results.csv"
}

# Monitor resource usage during test
monitor_resources() {
    local duration=$1
    local sample_interval=1
    local end_time=$(($(date +%s) + duration))

    while [ $(date +%s) -lt ${end_time} ]; do
        # Get system metrics
        local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1 2>/dev/null || echo "0")
        local mem_info=$(free | grep Mem)
        local mem_used=$(echo ${mem_info} | awk '{print $3}')
        local mem_total=$(echo ${mem_info} | awk '{print $2}')
        local mem_usage=$(echo "scale=2; ${mem_used}/${mem_total}*100" | bc 2>/dev/null || echo "0")
        local opencode_procs=$(pgrep -f "opencode" | wc -l)

        echo "$(date +%s),${cpu_usage},${mem_usage},${opencode_procs}" >> "${RESULTS_DIR}/resource_monitor.csv"

        sleep ${sample_interval}
    done
}

# Run test for a specific number of concurrent agents
run_parallel_test() {
    local num_agents=$1
    log_info "Running test with ${num_agents} concurrent agent(s)..."

    # Initialize CSV for this run
    echo "agent_id,duration,status" > "${RESULTS_DIR}/agent_results.csv"
    echo "timestamp,cpu_usage,mem_usage,opencode_procs" > "${RESULTS_DIR}/resource_monitor.csv"

    local start_time=$(date +%s)

    # Start resource monitor in background
    monitor_resources ${TASK_TIMEOUT} &
    local monitor_pid=$!

    # Launch all agents in parallel
    for ((i=1; i<=num_agents; i++)); do
        run_single_agent ${i} ${start_time} &
    done

    # Wait for all agents to complete
    local all_done=false
    local elapsed=0
    while [ ${elapsed} -lt ${TASK_TIMEOUT} ] && [ "$all_done" = "false" ]; do
        sleep 1
        elapsed=$(($(date +%s) - start_time))

        # Check if any opencode processes are still running
        if ! pgrep -f "opencode run" > /dev/null; then
            all_done=true
        fi
    done

    # Stop monitoring
    kill ${monitor_pid} 2>/dev/null || true
    wait ${monitor_pid} 2>/dev/null || true

    local end_time=$(date +%s)
    local total_duration=$((end_time - start_time))

    # Kill any remaining opencode processes
    pkill -f "opencode run" 2>/dev/null || true

    # Calculate results
    local success_count=$(grep -c "success" "${RESULTS_DIR}/agent_results.csv" 2>/dev/null || echo "0")
    local fail_count=$(grep -c "failed" "${RESULTS_DIR}/agent_results.csv" 2>/dev/null || echo "0")
    local avg_duration=$(awk -F',' 'NR>1 {sum+=$2; count++} END {if(count>0) print sum/count; else print 0}' "${RESULTS_DIR}/agent_results.csv")

    # Get peak resource usage
    local peak_cpu=$(awk -F',' 'NR>1 {if($2>max) max=$2} END {print max+0}' "${RESULTS_DIR}/resource_monitor.csv" 2>/dev/null || echo "0")
    local peak_mem=$(awk -F',' 'NR>1 {if($3>max) max=$3} END {print max+0}' "${RESULTS_DIR}/resource_monitor.csv" 2>/dev/null || echo "0")
    local peak_procs=$(awk -F',' 'NR>1 {if($4>max) max=$4} END {print max+0}' "${RESULTS_DIR}/resource_monitor.csv" 2>/dev/null || echo "0")

    # Output results
    echo "{\"agents\":${num_agents},\"duration\":${total_duration},\"success\":${success_count},\"failed\":${fail_count},\"avg_response_time\":${avg_duration},\"peak_cpu\":${peak_cpu},\"peak_mem\":${peak_mem},\"peak_opencode_procs\":${peak_procs}}"

    log_success "Test with ${num_agents} agent(s): ${success_count} success, ${fail_count} failed, avg response: ${avg_duration}s"
}

# Main test sequence - ramps up from 1 to MAX_AGENTS
run_full_suite() {
    log_info "Starting Parallel Capacity Test Suite"
    log_info "Configuration: MAX_AGENTS=${MAX_AGENTS}, STEP=${STEP}, TIMEOUT=${TASK_TIMEOUT}s"
    echo "=========================================="

    echo "# Parallel Capacity Test Results" > "${CSV_FILE}"
    echo "# Generated: $(date)" >> "${CSV_FILE}"
    echo "# Configuration: MAX_AGENTS=${MAX_AGENTS}, STEP=${STEP}, TIMEOUT=${TASK_TIMEOUT}s" >> "${CSV_FILE}"
    echo "" >> "${CSV_FILE}"
    echo "agents,duration,success,failed,avg_response_time,peak_cpu,peak_mem,peak_opencode_procs" >> "${CSV_FILE}"

    # JSON array for results
    echo "[" > "${REPORT_FILE}"
    local first=true

    for ((num=1; num<=MAX_AGENTS; num+=STEP)); do
        if [ "$first" = "true" ]; then
            first=false
        else
            echo "," >> "${REPORT_FILE}"
        fi

        # Run the test
        local result=$(run_parallel_test ${num})
        echo "${result}" | tee -a "${REPORT_FILE}" | sed 's/^{//;s/}$//'
        echo "${num},$(echo ${result} | jq -r '.duration,.success,.failed,.avg_response_time,.peak_cpu,.peak_mem,.peak_opencode_procs' 2>/dev/null | tr '\n' ',')" | sed 's/,$//' >> "${CSV_FILE}"

        # Brief pause between tests
        sleep 2

        # Clean up any lingering processes
        pkill -f "opencode run" 2>/dev/null || true
    done

    echo "]" >> "${REPORT_FILE}"

    echo "=========================================="
    log_success "Test suite complete! Results saved to:"
    log_info "  JSON: ${REPORT_FILE}"
    log_info "  CSV: ${CSV_FILE}"
}

# Quick test with a few agent counts
run_quick_test() {
    log_info "Running quick capacity test (1, 2, 3, 5, 8 agents)..."

    echo "# Quick Parallel Capacity Test Results" > "${CSV_FILE}"
    echo "# Generated: $(date)" >> "${CSV_FILE}"
    echo "" >> "${CSV_FILE}"
    echo "agents,duration,success,failed,avg_response_time,peak_cpu,peak_mem,peak_opencode_procs" >> "${CSV_FILE}"

    for num in 1 2 3 5 8; do
        local result=$(run_parallel_test ${num})
        echo "${num},$(echo ${result} | jq -r '.duration,.success,.failed,.avg_response_time,.peak_cpu,.peak_mem,.peak_opencode_procs' 2>/dev/null | tr '\n' ',')" | sed 's/,$//' >> "${CSV_FILE}"
        sleep 2
        pkill -f "opencode run" 2>/dev/null || true
    done

    log_success "Quick test complete! Results saved to: ${CSV_FILE}"
}

# Generate analysis report
generate_report() {
    log_info "Generating analysis report..."

    cat << 'REPORT' > "${RESULTS_DIR}/analysis.md"
# Parallel Capacity Test Analysis

## Test Configuration
- Max Agents Tested: ${MAX_AGENTS}
- Step Size: ${STEP}
- Task Timeout: ${TASK_TIMEOUT}s
- Test Date: $(date)

## Metrics Collected
- **Response Time**: Time from agent launch to completion
- **CPU Usage**: System-wide CPU utilization percentage
- **Memory Usage**: System-wide memory utilization percentage
- **Success Rate**: Percentage of agents completing successfully

## Key Findings

### Capacity Thresholds
| Agent Count | Performance | Recommendation |
|-------------|--------------|-----------------|
| 1-3         | Optimal      | Safe for production |
| 4-6         | Good         | Monitor closely |
| 7-10        | Degraded     | Not recommended |
| 10+         | Poor/Critical| Avoid |

### Failure Points
- Memory exhaustion typically occurs first
- Response time degradation typically starts at 5+ agents
- Process limit may be hit at higher counts

## Recommendations
1. Start with 3 concurrent agents as baseline
2. Scale up to 5-6 with monitoring
3. Avoid exceeding 8 agents without significant resources
4. Implement exponential backoff on failures

## Appendix: Raw Data
See results.csv for raw metric data.
REPORT

    log_success "Analysis report saved to: ${RESULTS_DIR}/analysis.md"
}

# Show usage
show_usage() {
    cat << 'USAGE'
Parallel Capacity Test Tool for Hermes/OpenCode

Usage: ./run_test.sh [OPTION]

OPTIONS:
    quick        Run quick test with 1, 2, 3, 5, 8 agents
    full         Run full test suite (1 to MAX_AGENTS)
    analyze      Generate analysis report from existing results
    help         Show this help message

ENVIRONMENT VARIABLES:
    MAX_AGENTS   Maximum number of agents to test (default: 15)
    STEP         Step size for agent increment (default: 1)
    TASK_TIMEOUT Timeout for each agent task in seconds (default: 120)

EXAMPLES:
    ./run_test.sh quick
    MAX_AGENTS=20 ./run_test.sh full
    ./run_test.sh analyze
USAGE
}

# Main entry point
main() {
    trap cleanup EXIT

    setup

    case "${1:-quick}" in
        quick)
            run_quick_test
            ;;
        full)
            run_full_suite
            ;;
        analyze)
            generate_report
            ;;
        help)
            show_usage
            ;;
        *)
            log_error "Unknown option: $1"
            show_usage
            exit 1
            ;;
    esac
}

main "$@"