Fix: Switch fetch_page from subprocess to urllib, add early-exit to fetch_all_pages, add sort_by to browse_events

- fetch_page: replace subprocess.run(curl) with urllib (stdlib, cleaner)
- fetch_all_pages: add matches_max/non_matches_max params for early-exit.
  When both are set, stop fetching once quotas are satisfied.
- browse_events: add sort_by param (None='fast' early-exit, 'volume'=full fetch+sort).
  Early-exit only used when sort_by=None (no client-side sort needed).
- Remove subprocess import (no longer needed after migration)
This commit is contained in:
shoko
2026-03-25 18:53:11 +00:00
parent 3a9f8fb365
commit 764c75e712

View File

@@ -42,52 +42,68 @@ def fetch_page(q, page=1, max_retries=MAX_RETRIES, initial_delay=INITIAL_RETRY_D
url = (f"{base}?q={q.replace(' ', '%20')}&limit={PAGE_SIZE}&page={page}" url = (f"{base}?q={q.replace(' ', '%20')}&limit={PAGE_SIZE}&page={page}"
f"&search_profiles=false&search_tags=false" f"&search_profiles=false&search_tags=false"
f"&keep_closed_markets=0&events_status=active&cache=false") f"&keep_closed_markets=0&events_status=active&cache=false")
delay = initial_delay delay = initial_delay
for attempt in range(max_retries): for attempt in range(max_retries):
time.sleep(delay) if attempt > 0:
r = subprocess.run( time.sleep(delay)
["curl", "-s", url, "--max-time", "10", "-H", "User-Agent: curl/7.88.1"], try:
capture_output=True req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
) with urlopen(req, timeout=10) as r:
return json.loads(r.read())
if r.returncode == 0 and len(r.stdout) > 0: except Exception:
try:
return json.loads(r.stdout.decode('utf-8'))
except json.JSONDecodeError:
if attempt < max_retries - 1:
delay *= 2 # Exponential backoff
continue
return None
else:
# Rate limit or other error - exponential backoff
if attempt < max_retries - 1: if attempt < max_retries - 1:
delay *= 2 delay *= 2
continue continue
return None return None
return None return None
def fetch_all_pages(q, max_pages=100): def fetch_all_pages(q, matches_max=None, non_matches_max=None):
""" """
Fetch ALL pages until pagination ends. Fetch pages until pagination ends, or until quotas are satisfied.
max_pages is a safety cap to prevent infinite loops.
Args:
q: search query
matches_max: stop early once we have this many match events (None = no limit)
non_matches_max: stop early once we have this many non-match events (None = no limit)
Returns:
{"events": [...], "total_raw": N, "partial": bool}
""" """
all_events = [] all_events = []
total_raw = 0 total_raw = 0
for page in range(1, max_pages + 1): match_count = 0
time.sleep(0.2) # small delay between pages (API rate limit is generous) non_match_count = 0
page = 0
while True:
page += 1
time.sleep(0.2)
data = fetch_page(q, page) data = fetch_page(q, page)
if data is None: if data is None:
break break
events = data.get("events", []) events = data.get("events", [])
total_raw = data.get("pagination", {}).get("totalResults", 0) total_raw = data.get("pagination", {}).get("totalResults", 0)
all_events.extend(events) all_events.extend(events)
# Stop when we get 0 events (no more pages),
# OR when we've fetched >= total results # Count matches/non-matches in this page
for e in events:
if is_match_market(e):
match_count += 1
else:
non_match_count += 1
# Stop if we got what we wanted (only when caps are set)
if matches_max is not None and non_matches_max is not None:
if match_count >= matches_max and non_match_count >= non_matches_max:
break
# Stop when we get 0 events (no more pages)
if len(events) == 0: if len(events) == 0:
break break
# Stop when we've fetched all known results
if len(all_events) >= total_raw: if len(all_events) >= total_raw:
break break
partial = (total_raw > 0 and len(all_events) < total_raw) partial = (total_raw > 0 and len(all_events) < total_raw)
return {"events": all_events, "total_raw": total_raw, "partial": partial} return {"events": all_events, "total_raw": total_raw, "partial": partial}
@@ -321,18 +337,39 @@ def sort_events(events):
# BROWSE # BROWSE
# ============================================================ # ============================================================
def browse_events(q, matches_max=10, non_matches_max=10, tradeable_only=True): def browse_events(q, matches_max=10, non_matches_max=10, tradeable_only=True, sort_by=None):
result = fetch_all_pages(q) """
Browse Polymarket events.
Args:
q: search query
matches_max: max number of match markets to return
non_matches_max: max number of non-match markets to return
tradeable_only: filter to tradeable events only
sort_by: None (fast, API order) or "volume" (full fetch, sort by volume desc)
"""
# Pass quotas to fetch_all_pages for early-exit optimization.
# Only use early-exit when sort_by is None (no client-side sort needed).
use_early_exit = (sort_by is None)
fetch_matches_max = matches_max if use_early_exit else None
fetch_non_matches_max = non_matches_max if use_early_exit else None
result = fetch_all_pages(q, matches_max=fetch_matches_max, non_matches_max=fetch_non_matches_max)
events = result["events"] events = result["events"]
match_events, non_match_events = filter_events(events, tradeable_only) match_events, non_match_events = filter_events(events, tradeable_only)
sorted_match = sort_events(match_events)
# Sort if requested; otherwise preserve API order
if sort_by == "volume":
match_events = sort_events(match_events)
non_match_events = sort_events(non_match_events)
return { return {
"query": q, "query": q,
"total_raw": result["total_raw"], "total_raw": result["total_raw"],
"total_fetched": len(events), "total_fetched": len(events),
"total_match": len(match_events), "total_match": len(match_events),
"total_non_match": len(non_match_events), "total_non_match": len(non_match_events),
"match_events": sorted_match[:matches_max], "match_events": match_events[:matches_max],
"non_match_events": non_match_events[:non_matches_max], "non_match_events": non_match_events[:non_matches_max],
"partial": result.get("partial", False), "partial": result.get("partial", False),
} }