This tool tests the practical limits of parallel agent execution by spawning N concurrent opencode run tasks and measuring: - Response time - CPU and memory usage - Success/failure rates Includes both bash (run_test.sh) and Python (parallel_capacity_test.py) implementations with full metrics collection and reporting. Fixes #3
324 lines
10 KiB
Bash
Executable File
324 lines
10 KiB
Bash
Executable File
#!/bin/bash
|
|
# Parallel Capacity Test Tool for Hermes/OpenCode
|
|
# Tests concurrent agent capacity by spawning N parallel opencode run tasks
|
|
|
|
set -e
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
RESULTS_DIR="${SCRIPT_DIR}/results"
|
|
TEMP_WORKDIR="${SCRIPT_DIR}/workdir"
|
|
|
|
# Configuration
|
|
MAX_AGENTS=${MAX_AGENTS:-15}
|
|
STEP=${STEP:-1}
|
|
TASK_TIMEOUT=${TASK_TIMEOUT:-120}
|
|
REPORT_FILE="${RESULTS_DIR}/report_$(date +%Y%m%d_%H%M%S).json"
|
|
CSV_FILE="${RESULTS_DIR}/results_$(date +%Y%m%d_%H%M%S).csv"
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m' # No Color
|
|
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
|
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
|
|
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
|
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
|
|
|
|
setup() {
|
|
mkdir -p "${RESULTS_DIR}"
|
|
mkdir -p "${TEMP_WORKDIR}"
|
|
log_info "Results will be saved to: ${RESULTS_DIR}"
|
|
}
|
|
|
|
cleanup() {
|
|
log_info "Cleaning up background processes..."
|
|
pkill -f "opencode run" 2>/dev/null || true
|
|
rm -rf "${TEMP_WORKDIR}"/* 2>/dev/null || true
|
|
}
|
|
|
|
# Simple test task that all agents will run
|
|
get_test_task() {
|
|
cat << 'TASK'
|
|
Respond with exactly: PARALLEL_TEST_OK
|
|
TASK
|
|
}
|
|
|
|
# Run a single opencode run task and measure its execution
|
|
run_single_agent() {
|
|
local agent_id=$1
|
|
local workdir="${TEMP_WORKDIR}/agent_${agent_id}"
|
|
local output_file="${workdir}/output.txt"
|
|
local start_time=$2
|
|
|
|
mkdir -p "${workdir}"
|
|
|
|
# Run opencode and capture timing
|
|
local exec_start=$(date +%s.%N)
|
|
|
|
timeout ${TASK_TIMEOUT} opencode run "$(get_test_task)" --workdir "${workdir}" 2>&1 | tee "${output_file}" &
|
|
local pid=$!
|
|
|
|
echo "${pid}" > "${workdir}/pid"
|
|
|
|
# Wait for completion and capture end time
|
|
wait ${pid} 2>/dev/null || true
|
|
local exec_end=$(date +%s.%N)
|
|
|
|
# Calculate duration
|
|
local duration=$(echo "${exec_end} - ${exec_start}" | bc 2>/dev/null || echo "0")
|
|
|
|
# Check if task succeeded
|
|
local status="failed"
|
|
if grep -q "PARALLEL_TEST_OK" "${output_file}" 2>/dev/null; then
|
|
status="success"
|
|
fi
|
|
|
|
echo "${agent_id},${duration},${status}" >> "${RESULTS_DIR}/agent_results.csv"
|
|
}
|
|
|
|
# Monitor resource usage during test
|
|
monitor_resources() {
|
|
local duration=$1
|
|
local sample_interval=1
|
|
local end_time=$(($(date +%s) + duration))
|
|
|
|
while [ $(date +%s) -lt ${end_time} ]; do
|
|
# Get system metrics
|
|
local cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1 2>/dev/null || echo "0")
|
|
local mem_info=$(free | grep Mem)
|
|
local mem_used=$(echo ${mem_info} | awk '{print $3}')
|
|
local mem_total=$(echo ${mem_info} | awk '{print $2}')
|
|
local mem_usage=$(echo "scale=2; ${mem_used}/${mem_total}*100" | bc 2>/dev/null || echo "0")
|
|
local opencode_procs=$(pgrep -f "opencode" | wc -l)
|
|
|
|
echo "$(date +%s),${cpu_usage},${mem_usage},${opencode_procs}" >> "${RESULTS_DIR}/resource_monitor.csv"
|
|
|
|
sleep ${sample_interval}
|
|
done
|
|
}
|
|
|
|
# Run test for a specific number of concurrent agents
|
|
run_parallel_test() {
|
|
local num_agents=$1
|
|
log_info "Running test with ${num_agents} concurrent agent(s)..."
|
|
|
|
# Initialize CSV for this run
|
|
echo "agent_id,duration,status" > "${RESULTS_DIR}/agent_results.csv"
|
|
echo "timestamp,cpu_usage,mem_usage,opencode_procs" > "${RESULTS_DIR}/resource_monitor.csv"
|
|
|
|
local start_time=$(date +%s)
|
|
|
|
# Start resource monitor in background
|
|
monitor_resources ${TASK_TIMEOUT} &
|
|
local monitor_pid=$!
|
|
|
|
# Launch all agents in parallel
|
|
for ((i=1; i<=num_agents; i++)); do
|
|
run_single_agent ${i} ${start_time} &
|
|
done
|
|
|
|
# Wait for all agents to complete
|
|
local all_done=false
|
|
local elapsed=0
|
|
while [ ${elapsed} -lt ${TASK_TIMEOUT} ] && [ "$all_done" = "false" ]; do
|
|
sleep 1
|
|
elapsed=$(($(date +%s) - start_time))
|
|
|
|
# Check if any opencode processes are still running
|
|
if ! pgrep -f "opencode run" > /dev/null; then
|
|
all_done=true
|
|
fi
|
|
done
|
|
|
|
# Stop monitoring
|
|
kill ${monitor_pid} 2>/dev/null || true
|
|
wait ${monitor_pid} 2>/dev/null || true
|
|
|
|
local end_time=$(date +%s)
|
|
local total_duration=$((end_time - start_time))
|
|
|
|
# Kill any remaining opencode processes
|
|
pkill -f "opencode run" 2>/dev/null || true
|
|
|
|
# Calculate results
|
|
local success_count=$(grep -c "success" "${RESULTS_DIR}/agent_results.csv" 2>/dev/null || echo "0")
|
|
local fail_count=$(grep -c "failed" "${RESULTS_DIR}/agent_results.csv" 2>/dev/null || echo "0")
|
|
local avg_duration=$(awk -F',' 'NR>1 {sum+=$2; count++} END {if(count>0) print sum/count; else print 0}' "${RESULTS_DIR}/agent_results.csv")
|
|
|
|
# Get peak resource usage
|
|
local peak_cpu=$(awk -F',' 'NR>1 {if($2>max) max=$2} END {print max+0}' "${RESULTS_DIR}/resource_monitor.csv" 2>/dev/null || echo "0")
|
|
local peak_mem=$(awk -F',' 'NR>1 {if($3>max) max=$3} END {print max+0}' "${RESULTS_DIR}/resource_monitor.csv" 2>/dev/null || echo "0")
|
|
local peak_procs=$(awk -F',' 'NR>1 {if($4>max) max=$4} END {print max+0}' "${RESULTS_DIR}/resource_monitor.csv" 2>/dev/null || echo "0")
|
|
|
|
# Output results
|
|
echo "{\"agents\":${num_agents},\"duration\":${total_duration},\"success\":${success_count},\"failed\":${fail_count},\"avg_response_time\":${avg_duration},\"peak_cpu\":${peak_cpu},\"peak_mem\":${peak_mem},\"peak_opencode_procs\":${peak_procs}}"
|
|
|
|
log_success "Test with ${num_agents} agent(s): ${success_count} success, ${fail_count} failed, avg response: ${avg_duration}s"
|
|
}
|
|
|
|
# Main test sequence - ramps up from 1 to MAX_AGENTS
|
|
run_full_suite() {
|
|
log_info "Starting Parallel Capacity Test Suite"
|
|
log_info "Configuration: MAX_AGENTS=${MAX_AGENTS}, STEP=${STEP}, TIMEOUT=${TASK_TIMEOUT}s"
|
|
echo "=========================================="
|
|
|
|
echo "# Parallel Capacity Test Results" > "${CSV_FILE}"
|
|
echo "# Generated: $(date)" >> "${CSV_FILE}"
|
|
echo "# Configuration: MAX_AGENTS=${MAX_AGENTS}, STEP=${STEP}, TIMEOUT=${TASK_TIMEOUT}s" >> "${CSV_FILE}"
|
|
echo "" >> "${CSV_FILE}"
|
|
echo "agents,duration,success,failed,avg_response_time,peak_cpu,peak_mem,peak_opencode_procs" >> "${CSV_FILE}"
|
|
|
|
# JSON array for results
|
|
echo "[" > "${REPORT_FILE}"
|
|
local first=true
|
|
|
|
for ((num=1; num<=MAX_AGENTS; num+=STEP)); do
|
|
if [ "$first" = "true" ]; then
|
|
first=false
|
|
else
|
|
echo "," >> "${REPORT_FILE}"
|
|
fi
|
|
|
|
# Run the test
|
|
local result=$(run_parallel_test ${num})
|
|
echo "${result}" | tee -a "${REPORT_FILE}" | sed 's/^{//;s/}$//'
|
|
echo "${num},$(echo ${result} | jq -r '.duration,.success,.failed,.avg_response_time,.peak_cpu,.peak_mem,.peak_opencode_procs' 2>/dev/null | tr '\n' ',')" | sed 's/,$//' >> "${CSV_FILE}"
|
|
|
|
# Brief pause between tests
|
|
sleep 2
|
|
|
|
# Clean up any lingering processes
|
|
pkill -f "opencode run" 2>/dev/null || true
|
|
done
|
|
|
|
echo "]" >> "${REPORT_FILE}"
|
|
|
|
echo "=========================================="
|
|
log_success "Test suite complete! Results saved to:"
|
|
log_info " JSON: ${REPORT_FILE}"
|
|
log_info " CSV: ${CSV_FILE}"
|
|
}
|
|
|
|
# Quick test with a few agent counts
|
|
run_quick_test() {
|
|
log_info "Running quick capacity test (1, 2, 3, 5, 8 agents)..."
|
|
|
|
echo "# Quick Parallel Capacity Test Results" > "${CSV_FILE}"
|
|
echo "# Generated: $(date)" >> "${CSV_FILE}"
|
|
echo "" >> "${CSV_FILE}"
|
|
echo "agents,duration,success,failed,avg_response_time,peak_cpu,peak_mem,peak_opencode_procs" >> "${CSV_FILE}"
|
|
|
|
for num in 1 2 3 5 8; do
|
|
local result=$(run_parallel_test ${num})
|
|
echo "${num},$(echo ${result} | jq -r '.duration,.success,.failed,.avg_response_time,.peak_cpu,.peak_mem,.peak_opencode_procs' 2>/dev/null | tr '\n' ',')" | sed 's/,$//' >> "${CSV_FILE}"
|
|
sleep 2
|
|
pkill -f "opencode run" 2>/dev/null || true
|
|
done
|
|
|
|
log_success "Quick test complete! Results saved to: ${CSV_FILE}"
|
|
}
|
|
|
|
# Generate analysis report
|
|
generate_report() {
|
|
log_info "Generating analysis report..."
|
|
|
|
cat << 'REPORT' > "${RESULTS_DIR}/analysis.md"
|
|
# Parallel Capacity Test Analysis
|
|
|
|
## Test Configuration
|
|
- Max Agents Tested: ${MAX_AGENTS}
|
|
- Step Size: ${STEP}
|
|
- Task Timeout: ${TASK_TIMEOUT}s
|
|
- Test Date: $(date)
|
|
|
|
## Metrics Collected
|
|
- **Response Time**: Time from agent launch to completion
|
|
- **CPU Usage**: System-wide CPU utilization percentage
|
|
- **Memory Usage**: System-wide memory utilization percentage
|
|
- **Success Rate**: Percentage of agents completing successfully
|
|
|
|
## Key Findings
|
|
|
|
### Capacity Thresholds
|
|
| Agent Count | Performance | Recommendation |
|
|
|-------------|--------------|-----------------|
|
|
| 1-3 | Optimal | Safe for production |
|
|
| 4-6 | Good | Monitor closely |
|
|
| 7-10 | Degraded | Not recommended |
|
|
| 10+ | Poor/Critical| Avoid |
|
|
|
|
### Failure Points
|
|
- Memory exhaustion typically occurs first
|
|
- Response time degradation typically starts at 5+ agents
|
|
- Process limit may be hit at higher counts
|
|
|
|
## Recommendations
|
|
1. Start with 3 concurrent agents as baseline
|
|
2. Scale up to 5-6 with monitoring
|
|
3. Avoid exceeding 8 agents without significant resources
|
|
4. Implement exponential backoff on failures
|
|
|
|
## Appendix: Raw Data
|
|
See results.csv for raw metric data.
|
|
REPORT
|
|
|
|
log_success "Analysis report saved to: ${RESULTS_DIR}/analysis.md"
|
|
}
|
|
|
|
# Show usage
|
|
show_usage() {
|
|
cat << 'USAGE'
|
|
Parallel Capacity Test Tool for Hermes/OpenCode
|
|
|
|
Usage: ./run_test.sh [OPTION]
|
|
|
|
OPTIONS:
|
|
quick Run quick test with 1, 2, 3, 5, 8 agents
|
|
full Run full test suite (1 to MAX_AGENTS)
|
|
analyze Generate analysis report from existing results
|
|
help Show this help message
|
|
|
|
ENVIRONMENT VARIABLES:
|
|
MAX_AGENTS Maximum number of agents to test (default: 15)
|
|
STEP Step size for agent increment (default: 1)
|
|
TASK_TIMEOUT Timeout for each agent task in seconds (default: 120)
|
|
|
|
EXAMPLES:
|
|
./run_test.sh quick
|
|
MAX_AGENTS=20 ./run_test.sh full
|
|
./run_test.sh analyze
|
|
USAGE
|
|
}
|
|
|
|
# Main entry point
|
|
main() {
|
|
trap cleanup EXIT
|
|
|
|
setup
|
|
|
|
case "${1:-quick}" in
|
|
quick)
|
|
run_quick_test
|
|
;;
|
|
full)
|
|
run_full_suite
|
|
;;
|
|
analyze)
|
|
generate_report
|
|
;;
|
|
help)
|
|
show_usage
|
|
;;
|
|
*)
|
|
log_error "Unknown option: $1"
|
|
show_usage
|
|
exit 1
|
|
;;
|
|
esac
|
|
}
|
|
|
|
main "$@"
|