merge: resolve conflicts with main (psutil with fallback, use --dir flag)
This commit is contained in:
@@ -19,12 +19,78 @@ from typing import List, Optional
|
||||
|
||||
try:
|
||||
import psutil
|
||||
|
||||
HAS_PSUTIL = True
|
||||
except ImportError:
|
||||
HAS_PSUTIL = False
|
||||
print("[WARN] psutil not available - resource monitoring will be limited")
|
||||
|
||||
|
||||
def get_memory_percent() -> float:
|
||||
"""Get memory usage percent by reading /proc/meminfo (Linux)"""
|
||||
try:
|
||||
with open("/proc/meminfo", "r") as f:
|
||||
meminfo = f.read()
|
||||
total = 0
|
||||
available = 0
|
||||
for line in meminfo.splitlines():
|
||||
if line.startswith("MemTotal:"):
|
||||
total = int(line.split()[1])
|
||||
elif line.startswith("MemAvailable:"):
|
||||
available = int(line.split()[1])
|
||||
break
|
||||
if total > 0:
|
||||
used = total - available
|
||||
return (used / total) * 100
|
||||
except (FileNotFoundError, PermissionError, ValueError):
|
||||
pass
|
||||
return 0.0
|
||||
|
||||
|
||||
def count_opencode_processes() -> int:
|
||||
"""Count opencode processes using pgrep or /proc scanning"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["pgrep", "-c", "-x", "opencode"], capture_output=True, text=True, timeout=5
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return int(result.stdout.strip())
|
||||
except (subprocess.TimeoutExpired, ValueError, subprocess.SubprocessError):
|
||||
pass
|
||||
try:
|
||||
count = 0
|
||||
for pid_dir in os.listdir("/proc"):
|
||||
if not pid_dir.isdigit():
|
||||
continue
|
||||
try:
|
||||
with open(f"/proc/{pid_dir}/comm", "r") as f:
|
||||
if "opencode" in f.read().lower():
|
||||
count += 1
|
||||
except (PermissionError, FileNotFoundError):
|
||||
continue
|
||||
return count
|
||||
except FileNotFoundError:
|
||||
return 0
|
||||
return 0
|
||||
|
||||
|
||||
def get_cpu_percent() -> float:
|
||||
"""Get CPU usage by reading /proc/stat"""
|
||||
try:
|
||||
with open("/proc/stat", "r") as f:
|
||||
line = f.readline()
|
||||
parts = line.split()
|
||||
if parts[0] == "cpu":
|
||||
values = [int(x) for x in parts[1:8]]
|
||||
idle = values[3]
|
||||
total = sum(values)
|
||||
if total > 0:
|
||||
return ((total - idle) / total) * 100
|
||||
except (FileNotFoundError, PermissionError, ValueError, IndexError):
|
||||
pass
|
||||
return 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentResult:
|
||||
agent_id: int
|
||||
@@ -95,8 +161,13 @@ class ResourceMonitor:
|
||||
def _collect_sample(self) -> ResourceSample:
|
||||
timestamp = time.time()
|
||||
try:
|
||||
opencode_procs = len([p for p in psutil.process_iter(['name'])
|
||||
if 'opencode' in p.info['name'].lower()])
|
||||
opencode_procs = len(
|
||||
[
|
||||
p
|
||||
for p in psutil.process_iter(["name"])
|
||||
if "opencode" in p.info["name"].lower()
|
||||
]
|
||||
)
|
||||
except Exception:
|
||||
opencode_procs = 0
|
||||
|
||||
@@ -112,7 +183,7 @@ class ResourceMonitor:
|
||||
cpu_percent=cpu_percent,
|
||||
memory_percent=memory_percent,
|
||||
opencode_processes=opencode_procs,
|
||||
agent_count=self._current_agent_count
|
||||
agent_count=self._current_agent_count,
|
||||
)
|
||||
|
||||
|
||||
@@ -135,36 +206,35 @@ class ParallelCapacityTester:
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['opencode', 'run', task, '--dir', workdir],
|
||||
["opencode", "run", task, "--dir", workdir],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self.timeout
|
||||
timeout=self.timeout,
|
||||
)
|
||||
duration = time.time() - start_time
|
||||
output = result.stdout + result.stderr
|
||||
success = 'PARALLEL_TEST_OK' in output
|
||||
success = "PARALLEL_TEST_OK" in output
|
||||
|
||||
return AgentResult(
|
||||
agent_id=agent_id,
|
||||
duration=duration,
|
||||
status='success' if success else 'failed',
|
||||
status="success" if success else "failed",
|
||||
return_code=result.returncode,
|
||||
output=output[:500]
|
||||
output=output[:500],
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return AgentResult(
|
||||
agent_id=agent_id,
|
||||
duration=self.timeout,
|
||||
status='timeout',
|
||||
return_code=-1
|
||||
status="timeout",
|
||||
return_code=-1,
|
||||
)
|
||||
except Exception as e:
|
||||
return AgentResult(
|
||||
agent_id=agent_id,
|
||||
duration=time.time() - start_time,
|
||||
status='failed',
|
||||
status="failed",
|
||||
return_code=-1,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
def _run_parallel_agents(self, num_agents: int) -> TestRun:
|
||||
@@ -194,7 +264,7 @@ class ParallelCapacityTester:
|
||||
elapsed = int(time.time() - start_time)
|
||||
all_done = all(not t.is_alive() for t in threads)
|
||||
|
||||
subprocess.run(['pkill', '-f', 'opencode run'], capture_output=True)
|
||||
subprocess.run(["pkill", "-f", "opencode run"], capture_output=True)
|
||||
|
||||
for t in threads:
|
||||
t.join(timeout=5)
|
||||
@@ -202,9 +272,9 @@ class ParallelCapacityTester:
|
||||
resource_samples = self.monitor.stop()
|
||||
total_duration = time.time() - start_time
|
||||
|
||||
success_count = sum(1 for r in results if r.status == 'success')
|
||||
failed_count = sum(1 for r in results if r.status == 'failed')
|
||||
timeout_count = sum(1 for r in results if r.status == 'timeout')
|
||||
success_count = sum(1 for r in results if r.status == "success")
|
||||
failed_count = sum(1 for r in results if r.status == "failed")
|
||||
timeout_count = sum(1 for r in results if r.status == "timeout")
|
||||
|
||||
durations = [r.duration for r in results]
|
||||
avg_duration = statistics.mean(durations) if durations else 0
|
||||
@@ -221,7 +291,9 @@ class ParallelCapacityTester:
|
||||
else:
|
||||
peak_cpu = avg_cpu = peak_mem = avg_mem = peak_procs = 0
|
||||
|
||||
print(f"[RESULT] {num_agents} agents: {success_count} success, {failed_count} failed, {timeout_count} timeout")
|
||||
print(
|
||||
f"[RESULT] {num_agents} agents: {success_count} success, {failed_count} failed, {timeout_count} timeout"
|
||||
)
|
||||
|
||||
return TestRun(
|
||||
agent_count=num_agents,
|
||||
@@ -237,11 +309,12 @@ class ParallelCapacityTester:
|
||||
avg_cpu_percent=avg_cpu,
|
||||
peak_memory_percent=peak_mem,
|
||||
avg_memory_percent=avg_mem,
|
||||
peak_opencode_procs=peak_procs
|
||||
peak_opencode_procs=peak_procs,
|
||||
)
|
||||
|
||||
def run_capacity_test(self, max_agents: int = 10, step: int = 1,
|
||||
quick: bool = False) -> List[TestRun]:
|
||||
def run_capacity_test(
|
||||
self, max_agents: int = 10, step: int = 1, quick: bool = False
|
||||
) -> List[TestRun]:
|
||||
if quick:
|
||||
agent_counts = [1, 2, 3, 5, 8]
|
||||
else:
|
||||
@@ -253,7 +326,7 @@ class ParallelCapacityTester:
|
||||
self.results = []
|
||||
|
||||
for count in agent_counts:
|
||||
subprocess.run(['pkill', '-f', 'opencode run'], capture_output=True)
|
||||
subprocess.run(["pkill", "-f", "opencode run"], capture_output=True)
|
||||
time.sleep(2)
|
||||
result = self._run_parallel_agents(count)
|
||||
self.results.append(result)
|
||||
@@ -266,21 +339,25 @@ class ParallelCapacityTester:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
json_file = output_path / f"results_{timestamp}.json"
|
||||
with open(json_file, 'w') as f:
|
||||
with open(json_file, "w") as f:
|
||||
data = [asdict(run) for run in self.results]
|
||||
json.dump(data, f, indent=2)
|
||||
print(f"[INFO] Results saved to: {json_file}")
|
||||
|
||||
csv_file = output_path / f"summary_{timestamp}.csv"
|
||||
with open(csv_file, 'w') as f:
|
||||
f.write("agents,duration,success,failed,timeout,avg_response,stddev,min_response,max_response,peak_cpu,avg_cpu,peak_mem,avg_mem,peak_procs\n")
|
||||
with open(csv_file, "w") as f:
|
||||
f.write(
|
||||
"agents,duration,success,failed,timeout,avg_response,stddev,min_response,max_response,peak_cpu,avg_cpu,peak_mem,avg_mem,peak_procs\n"
|
||||
)
|
||||
for run in self.results:
|
||||
f.write(f"{run.agent_count},{run.total_duration:.2f},{run.success_count},"
|
||||
f"{run.failed_count},{run.timeout_count},{run.avg_response_time:.2f},"
|
||||
f"{run.stddev_response_time:.2f},{run.min_response_time:.2f},"
|
||||
f"{run.max_response_time:.2f},{run.peak_cpu_percent:.1f},"
|
||||
f"{run.avg_cpu_percent:.1f},{run.peak_memory_percent:.1f},"
|
||||
f"{run.avg_memory_percent:.1f},{run.peak_opencode_procs}\n")
|
||||
f.write(
|
||||
f"{run.agent_count},{run.total_duration:.2f},{run.success_count},"
|
||||
f"{run.failed_count},{run.timeout_count},{run.avg_response_time:.2f},"
|
||||
f"{run.stddev_response_time:.2f},{run.min_response_time:.2f},"
|
||||
f"{run.max_response_time:.2f},{run.peak_cpu_percent:.1f},"
|
||||
f"{run.avg_cpu_percent:.1f},{run.peak_memory_percent:.1f},"
|
||||
f"{run.avg_memory_percent:.1f},{run.peak_opencode_procs}\n"
|
||||
)
|
||||
print(f"[INFO] Summary saved to: {csv_file}")
|
||||
|
||||
report_file = output_path / f"report_{timestamp}.md"
|
||||
@@ -290,44 +367,62 @@ class ParallelCapacityTester:
|
||||
return str(json_file), str(csv_file), str(report_file)
|
||||
|
||||
def _generate_markdown_report(self, output_file: Path):
|
||||
with open(output_file, 'w') as f:
|
||||
with open(output_file, "w") as f:
|
||||
f.write("# Parallel Capacity Test Report\n\n")
|
||||
f.write(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
||||
f.write(
|
||||
f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||
)
|
||||
f.write("## Summary\n\n")
|
||||
f.write("| Agents | Duration | Success | Failed | Timeout | Avg Response | Peak CPU | Peak Mem |\n")
|
||||
f.write("|--------|----------|---------|--------|---------|--------------|----------|----------|\n")
|
||||
f.write(
|
||||
"| Agents | Duration | Success | Failed | Timeout | Avg Response | Peak CPU | Peak Mem |\n"
|
||||
)
|
||||
f.write(
|
||||
"|--------|----------|---------|--------|---------|--------------|----------|----------|\n"
|
||||
)
|
||||
for run in self.results:
|
||||
f.write(f"| {run.agent_count} | {run.total_duration:.1f}s | "
|
||||
f"{run.success_count} | {run.failed_count} | "
|
||||
f"{run.timeout_count} | {run.avg_response_time:.1f}s | "
|
||||
f"{run.peak_cpu_percent:.1f}% | {run.peak_memory_percent:.1f}% |\n")
|
||||
f.write(
|
||||
f"| {run.agent_count} | {run.total_duration:.1f}s | "
|
||||
f"{run.success_count} | {run.failed_count} | "
|
||||
f"{run.timeout_count} | {run.avg_response_time:.1f}s | "
|
||||
f"{run.peak_cpu_percent:.1f}% | {run.peak_memory_percent:.1f}% |\n"
|
||||
)
|
||||
f.write("\n## Key Findings\n\n")
|
||||
successful_runs = [r for r in self.results if r.success_count == r.agent_count]
|
||||
successful_runs = [
|
||||
r for r in self.results if r.success_count == r.agent_count
|
||||
]
|
||||
optimal = max(successful_runs, key=lambda r: r.agent_count, default=None)
|
||||
if optimal:
|
||||
f.write(f"### Optimal Configuration\n")
|
||||
f.write(f"- **{optimal.agent_count} agents** achieved perfect success rate\n")
|
||||
f.write(f" - Average response time: {optimal.avg_response_time:.1f}s\n")
|
||||
f.write(
|
||||
f"- **{optimal.agent_count} agents** achieved perfect success rate\n"
|
||||
)
|
||||
f.write(
|
||||
f" - Average response time: {optimal.avg_response_time:.1f}s\n"
|
||||
)
|
||||
f.write(f" - Peak CPU: {optimal.peak_cpu_percent:.1f}%\n")
|
||||
f.write(f" - Peak Memory: {optimal.peak_memory_percent:.1f}%\n\n")
|
||||
f.write("## Recommendations\n\n")
|
||||
if optimal:
|
||||
f.write(f"1. **Recommended max agents:** {optimal.agent_count} for stable operation\n")
|
||||
f.write(
|
||||
f"1. **Recommended max agents:** {optimal.agent_count} for stable operation\n"
|
||||
)
|
||||
f.write("2. **Monitor closely:** 5+ agents\n")
|
||||
f.write("3. **Implement circuit breaker** when failure rate exceeds threshold\n")
|
||||
f.write(
|
||||
"3. **Implement circuit breaker** when failure rate exceeds threshold\n"
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Parallel Capacity Test Tool')
|
||||
parser.add_argument('--agents', '-n', type=int, default=10)
|
||||
parser.add_argument('--timeout', '-t', type=int, default=120)
|
||||
parser.add_argument('--step', '-s', type=int, default=1)
|
||||
parser.add_argument('--quick', '-q', action='store_true')
|
||||
parser.add_argument('--output', '-o', type=str, default=None)
|
||||
parser = argparse.ArgumentParser(description="Parallel Capacity Test Tool")
|
||||
parser.add_argument("--agents", "-n", type=int, default=10)
|
||||
parser.add_argument("--timeout", "-t", type=int, default=120)
|
||||
parser.add_argument("--step", "-s", type=int, default=1)
|
||||
parser.add_argument("--quick", "-q", action="store_true")
|
||||
parser.add_argument("--output", "-o", type=str, default=None)
|
||||
args = parser.parse_args()
|
||||
|
||||
script_dir = Path(__file__).parent
|
||||
output_dir = args.output or str(script_dir / 'results')
|
||||
output_dir = args.output or str(script_dir / "results")
|
||||
|
||||
print("=" * 60)
|
||||
print("Parallel Capacity Test Tool for Hermes/OpenCode")
|
||||
@@ -339,7 +434,9 @@ def main():
|
||||
tester = ParallelCapacityTester(timeout=args.timeout)
|
||||
|
||||
try:
|
||||
tester.run_capacity_test(max_agents=args.agents, step=args.step, quick=args.quick)
|
||||
tester.run_capacity_test(
|
||||
max_agents=args.agents, step=args.step, quick=args.quick
|
||||
)
|
||||
json_file, csv_file, report_file = tester.save_results(output_dir)
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST COMPLETE")
|
||||
@@ -352,5 +449,5 @@ def main():
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user