merge: resolve conflicts with main (psutil with fallback, use --dir flag)

This commit is contained in:
shokollm
2026-03-31 03:29:00 +00:00
25 changed files with 5350 additions and 75 deletions

View File

@@ -19,12 +19,78 @@ from typing import List, Optional
try:
import psutil
HAS_PSUTIL = True
except ImportError:
HAS_PSUTIL = False
print("[WARN] psutil not available - resource monitoring will be limited")
def get_memory_percent() -> float:
"""Get memory usage percent by reading /proc/meminfo (Linux)"""
try:
with open("/proc/meminfo", "r") as f:
meminfo = f.read()
total = 0
available = 0
for line in meminfo.splitlines():
if line.startswith("MemTotal:"):
total = int(line.split()[1])
elif line.startswith("MemAvailable:"):
available = int(line.split()[1])
break
if total > 0:
used = total - available
return (used / total) * 100
except (FileNotFoundError, PermissionError, ValueError):
pass
return 0.0
def count_opencode_processes() -> int:
"""Count opencode processes using pgrep or /proc scanning"""
try:
result = subprocess.run(
["pgrep", "-c", "-x", "opencode"], capture_output=True, text=True, timeout=5
)
if result.returncode == 0:
return int(result.stdout.strip())
except (subprocess.TimeoutExpired, ValueError, subprocess.SubprocessError):
pass
try:
count = 0
for pid_dir in os.listdir("/proc"):
if not pid_dir.isdigit():
continue
try:
with open(f"/proc/{pid_dir}/comm", "r") as f:
if "opencode" in f.read().lower():
count += 1
except (PermissionError, FileNotFoundError):
continue
return count
except FileNotFoundError:
return 0
return 0
def get_cpu_percent() -> float:
"""Get CPU usage by reading /proc/stat"""
try:
with open("/proc/stat", "r") as f:
line = f.readline()
parts = line.split()
if parts[0] == "cpu":
values = [int(x) for x in parts[1:8]]
idle = values[3]
total = sum(values)
if total > 0:
return ((total - idle) / total) * 100
except (FileNotFoundError, PermissionError, ValueError, IndexError):
pass
return 0.0
@dataclass
class AgentResult:
agent_id: int
@@ -95,8 +161,13 @@ class ResourceMonitor:
def _collect_sample(self) -> ResourceSample:
timestamp = time.time()
try:
opencode_procs = len([p for p in psutil.process_iter(['name'])
if 'opencode' in p.info['name'].lower()])
opencode_procs = len(
[
p
for p in psutil.process_iter(["name"])
if "opencode" in p.info["name"].lower()
]
)
except Exception:
opencode_procs = 0
@@ -112,7 +183,7 @@ class ResourceMonitor:
cpu_percent=cpu_percent,
memory_percent=memory_percent,
opencode_processes=opencode_procs,
agent_count=self._current_agent_count
agent_count=self._current_agent_count,
)
@@ -135,36 +206,35 @@ class ParallelCapacityTester:
try:
result = subprocess.run(
['opencode', 'run', task, '--dir', workdir],
["opencode", "run", task, "--dir", workdir],
capture_output=True,
text=True,
timeout=self.timeout
timeout=self.timeout,
)
duration = time.time() - start_time
output = result.stdout + result.stderr
success = 'PARALLEL_TEST_OK' in output
success = "PARALLEL_TEST_OK" in output
return AgentResult(
agent_id=agent_id,
duration=duration,
status='success' if success else 'failed',
status="success" if success else "failed",
return_code=result.returncode,
output=output[:500]
output=output[:500],
)
except subprocess.TimeoutExpired:
return AgentResult(
agent_id=agent_id,
duration=self.timeout,
status='timeout',
return_code=-1
status="timeout",
return_code=-1,
)
except Exception as e:
return AgentResult(
agent_id=agent_id,
duration=time.time() - start_time,
status='failed',
status="failed",
return_code=-1,
error=str(e)
)
def _run_parallel_agents(self, num_agents: int) -> TestRun:
@@ -194,7 +264,7 @@ class ParallelCapacityTester:
elapsed = int(time.time() - start_time)
all_done = all(not t.is_alive() for t in threads)
subprocess.run(['pkill', '-f', 'opencode run'], capture_output=True)
subprocess.run(["pkill", "-f", "opencode run"], capture_output=True)
for t in threads:
t.join(timeout=5)
@@ -202,9 +272,9 @@ class ParallelCapacityTester:
resource_samples = self.monitor.stop()
total_duration = time.time() - start_time
success_count = sum(1 for r in results if r.status == 'success')
failed_count = sum(1 for r in results if r.status == 'failed')
timeout_count = sum(1 for r in results if r.status == 'timeout')
success_count = sum(1 for r in results if r.status == "success")
failed_count = sum(1 for r in results if r.status == "failed")
timeout_count = sum(1 for r in results if r.status == "timeout")
durations = [r.duration for r in results]
avg_duration = statistics.mean(durations) if durations else 0
@@ -221,7 +291,9 @@ class ParallelCapacityTester:
else:
peak_cpu = avg_cpu = peak_mem = avg_mem = peak_procs = 0
print(f"[RESULT] {num_agents} agents: {success_count} success, {failed_count} failed, {timeout_count} timeout")
print(
f"[RESULT] {num_agents} agents: {success_count} success, {failed_count} failed, {timeout_count} timeout"
)
return TestRun(
agent_count=num_agents,
@@ -237,11 +309,12 @@ class ParallelCapacityTester:
avg_cpu_percent=avg_cpu,
peak_memory_percent=peak_mem,
avg_memory_percent=avg_mem,
peak_opencode_procs=peak_procs
peak_opencode_procs=peak_procs,
)
def run_capacity_test(self, max_agents: int = 10, step: int = 1,
quick: bool = False) -> List[TestRun]:
def run_capacity_test(
self, max_agents: int = 10, step: int = 1, quick: bool = False
) -> List[TestRun]:
if quick:
agent_counts = [1, 2, 3, 5, 8]
else:
@@ -253,7 +326,7 @@ class ParallelCapacityTester:
self.results = []
for count in agent_counts:
subprocess.run(['pkill', '-f', 'opencode run'], capture_output=True)
subprocess.run(["pkill", "-f", "opencode run"], capture_output=True)
time.sleep(2)
result = self._run_parallel_agents(count)
self.results.append(result)
@@ -266,21 +339,25 @@ class ParallelCapacityTester:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
json_file = output_path / f"results_{timestamp}.json"
with open(json_file, 'w') as f:
with open(json_file, "w") as f:
data = [asdict(run) for run in self.results]
json.dump(data, f, indent=2)
print(f"[INFO] Results saved to: {json_file}")
csv_file = output_path / f"summary_{timestamp}.csv"
with open(csv_file, 'w') as f:
f.write("agents,duration,success,failed,timeout,avg_response,stddev,min_response,max_response,peak_cpu,avg_cpu,peak_mem,avg_mem,peak_procs\n")
with open(csv_file, "w") as f:
f.write(
"agents,duration,success,failed,timeout,avg_response,stddev,min_response,max_response,peak_cpu,avg_cpu,peak_mem,avg_mem,peak_procs\n"
)
for run in self.results:
f.write(f"{run.agent_count},{run.total_duration:.2f},{run.success_count},"
f"{run.failed_count},{run.timeout_count},{run.avg_response_time:.2f},"
f"{run.stddev_response_time:.2f},{run.min_response_time:.2f},"
f"{run.max_response_time:.2f},{run.peak_cpu_percent:.1f},"
f"{run.avg_cpu_percent:.1f},{run.peak_memory_percent:.1f},"
f"{run.avg_memory_percent:.1f},{run.peak_opencode_procs}\n")
f.write(
f"{run.agent_count},{run.total_duration:.2f},{run.success_count},"
f"{run.failed_count},{run.timeout_count},{run.avg_response_time:.2f},"
f"{run.stddev_response_time:.2f},{run.min_response_time:.2f},"
f"{run.max_response_time:.2f},{run.peak_cpu_percent:.1f},"
f"{run.avg_cpu_percent:.1f},{run.peak_memory_percent:.1f},"
f"{run.avg_memory_percent:.1f},{run.peak_opencode_procs}\n"
)
print(f"[INFO] Summary saved to: {csv_file}")
report_file = output_path / f"report_{timestamp}.md"
@@ -290,44 +367,62 @@ class ParallelCapacityTester:
return str(json_file), str(csv_file), str(report_file)
def _generate_markdown_report(self, output_file: Path):
with open(output_file, 'w') as f:
with open(output_file, "w") as f:
f.write("# Parallel Capacity Test Report\n\n")
f.write(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
f.write(
f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
)
f.write("## Summary\n\n")
f.write("| Agents | Duration | Success | Failed | Timeout | Avg Response | Peak CPU | Peak Mem |\n")
f.write("|--------|----------|---------|--------|---------|--------------|----------|----------|\n")
f.write(
"| Agents | Duration | Success | Failed | Timeout | Avg Response | Peak CPU | Peak Mem |\n"
)
f.write(
"|--------|----------|---------|--------|---------|--------------|----------|----------|\n"
)
for run in self.results:
f.write(f"| {run.agent_count} | {run.total_duration:.1f}s | "
f"{run.success_count} | {run.failed_count} | "
f"{run.timeout_count} | {run.avg_response_time:.1f}s | "
f"{run.peak_cpu_percent:.1f}% | {run.peak_memory_percent:.1f}% |\n")
f.write(
f"| {run.agent_count} | {run.total_duration:.1f}s | "
f"{run.success_count} | {run.failed_count} | "
f"{run.timeout_count} | {run.avg_response_time:.1f}s | "
f"{run.peak_cpu_percent:.1f}% | {run.peak_memory_percent:.1f}% |\n"
)
f.write("\n## Key Findings\n\n")
successful_runs = [r for r in self.results if r.success_count == r.agent_count]
successful_runs = [
r for r in self.results if r.success_count == r.agent_count
]
optimal = max(successful_runs, key=lambda r: r.agent_count, default=None)
if optimal:
f.write(f"### Optimal Configuration\n")
f.write(f"- **{optimal.agent_count} agents** achieved perfect success rate\n")
f.write(f" - Average response time: {optimal.avg_response_time:.1f}s\n")
f.write(
f"- **{optimal.agent_count} agents** achieved perfect success rate\n"
)
f.write(
f" - Average response time: {optimal.avg_response_time:.1f}s\n"
)
f.write(f" - Peak CPU: {optimal.peak_cpu_percent:.1f}%\n")
f.write(f" - Peak Memory: {optimal.peak_memory_percent:.1f}%\n\n")
f.write("## Recommendations\n\n")
if optimal:
f.write(f"1. **Recommended max agents:** {optimal.agent_count} for stable operation\n")
f.write(
f"1. **Recommended max agents:** {optimal.agent_count} for stable operation\n"
)
f.write("2. **Monitor closely:** 5+ agents\n")
f.write("3. **Implement circuit breaker** when failure rate exceeds threshold\n")
f.write(
"3. **Implement circuit breaker** when failure rate exceeds threshold\n"
)
def main():
parser = argparse.ArgumentParser(description='Parallel Capacity Test Tool')
parser.add_argument('--agents', '-n', type=int, default=10)
parser.add_argument('--timeout', '-t', type=int, default=120)
parser.add_argument('--step', '-s', type=int, default=1)
parser.add_argument('--quick', '-q', action='store_true')
parser.add_argument('--output', '-o', type=str, default=None)
parser = argparse.ArgumentParser(description="Parallel Capacity Test Tool")
parser.add_argument("--agents", "-n", type=int, default=10)
parser.add_argument("--timeout", "-t", type=int, default=120)
parser.add_argument("--step", "-s", type=int, default=1)
parser.add_argument("--quick", "-q", action="store_true")
parser.add_argument("--output", "-o", type=str, default=None)
args = parser.parse_args()
script_dir = Path(__file__).parent
output_dir = args.output or str(script_dir / 'results')
output_dir = args.output or str(script_dir / "results")
print("=" * 60)
print("Parallel Capacity Test Tool for Hermes/OpenCode")
@@ -339,7 +434,9 @@ def main():
tester = ParallelCapacityTester(timeout=args.timeout)
try:
tester.run_capacity_test(max_agents=args.agents, step=args.step, quick=args.quick)
tester.run_capacity_test(
max_agents=args.agents, step=args.step, quick=args.quick
)
json_file, csv_file, report_file = tester.save_results(output_dir)
print("\n" + "=" * 60)
print("TEST COMPLETE")
@@ -352,5 +449,5 @@ def main():
sys.exit(1)
if __name__ == '__main__':
if __name__ == "__main__":
main()