Add parallel fetching, caching, and max_total parameter #26
@@ -220,6 +220,7 @@ def fetch_all_pages(
|
|||||||
|
|
||||||
total_raw = 0
|
total_raw = 0
|
||||||
page_count = 0
|
page_count = 0
|
||||||
|
page1_data = None
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
page_count += 1
|
page_count += 1
|
||||||
@@ -227,33 +228,39 @@ def fetch_all_pages(
|
|||||||
if data is None:
|
if data is None:
|
||||||
break
|
break
|
||||||
total_raw = data.get("pagination", {}).get("totalResults", 0)
|
total_raw = data.get("pagination", {}).get("totalResults", 0)
|
||||||
|
if page_count == 1:
|
||||||
|
page1_data = data
|
||||||
if total_raw > 0:
|
if total_raw > 0:
|
||||||
break
|
break
|
||||||
if not data.get("events"):
|
if not data.get("events"):
|
||||||
break
|
break
|
||||||
|
|
||||||
if total_raw == 0:
|
if total_raw == 0 or page1_data is None:
|
||||||
return {"events": [], "total_raw": 0, "partial": False}
|
return {"events": [], "total_raw": 0, "partial": False}
|
||||||
|
|
||||||
# API always returns exactly 5 events per page regardless of 'limit' param.
|
page1_events = page1_data.get("events", [])
|
||||||
# This is integer ceiling division: ceil(total_raw / 5) = (total_raw + 5 - 1) // 5 = (total_raw + 4) // 5
|
actual_page_size = len(page1_events)
|
||||||
total_pages = (total_raw + 4) // 5
|
|
||||||
|
# Use actual events per page from API for ceiling division
|
||||||
|
# ceil(total_raw / actual_page_size) = (total_raw + actual_page_size - 1) // actual_page_size
|
||||||
|
total_pages = (total_raw + actual_page_size - 1) // actual_page_size
|
||||||
concurrency = min(MAX_PARALLEL_FETCHES, total_pages)
|
concurrency = min(MAX_PARALLEL_FETCHES, total_pages)
|
||||||
|
|
||||||
all_page_data: dict[int, list[Any]] = {}
|
all_page_data: dict[int, list[Any]] = {1: page1_events}
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=concurrency) as executor:
|
if total_pages > 1:
|
||||||
futures = {
|
with ThreadPoolExecutor(max_workers=concurrency) as executor:
|
||||||
executor.submit(_fetch_page_with_index, q, page): page
|
futures = {
|
||||||
for page in range(1, total_pages + 1)
|
executor.submit(_fetch_page_with_index, q, page): page
|
||||||
}
|
for page in range(2, total_pages + 1)
|
||||||
for future in as_completed(futures):
|
}
|
||||||
try:
|
for future in as_completed(futures):
|
||||||
page_num, data = future.result()
|
try:
|
||||||
if data is not None:
|
page_num, data = future.result()
|
||||||
all_page_data[page_num] = data.get("events", [])
|
if data is not None:
|
||||||
except Exception:
|
all_page_data[page_num] = data.get("events", [])
|
||||||
pass
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
all_events = []
|
all_events = []
|
||||||
for page_num in sorted(all_page_data.keys()):
|
for page_num in sorted(all_page_data.keys()):
|
||||||
|
|||||||
@@ -1117,6 +1117,7 @@ class TestFetchAllPages(unittest.TestCase):
|
|||||||
},
|
},
|
||||||
{"id": "n1", "title": "Non-match 1", "markets": []},
|
{"id": "n1", "title": "Non-match 1", "markets": []},
|
||||||
{"id": "n2", "title": "Non-match 2", "markets": []},
|
{"id": "n2", "title": "Non-match 2", "markets": []},
|
||||||
|
{"id": "e1", "title": "Extra 1", "markets": []},
|
||||||
],
|
],
|
||||||
"pagination": {"totalResults": 20, "hasMore": True},
|
"pagination": {"totalResults": 20, "hasMore": True},
|
||||||
}
|
}
|
||||||
@@ -1130,21 +1131,35 @@ class TestFetchAllPages(unittest.TestCase):
|
|||||||
"markets": [],
|
"markets": [],
|
||||||
},
|
},
|
||||||
{"id": "n3", "title": "Non-match 3", "markets": []},
|
{"id": "n3", "title": "Non-match 3", "markets": []},
|
||||||
|
{"id": "e2", "title": "Extra 2", "markets": []},
|
||||||
|
{"id": "e3", "title": "Extra 3", "markets": []},
|
||||||
|
{"id": "e4", "title": "Extra 4", "markets": []},
|
||||||
],
|
],
|
||||||
"pagination": {"totalResults": 20, "hasMore": True},
|
"pagination": {"totalResults": 20, "hasMore": True},
|
||||||
}
|
}
|
||||||
page3 = {
|
page3 = {
|
||||||
"events": [],
|
"events": [
|
||||||
|
{"id": "e5", "title": "Extra 5", "markets": []},
|
||||||
|
{"id": "e6", "title": "Extra 6", "markets": []},
|
||||||
|
{"id": "e7", "title": "Extra 7", "markets": []},
|
||||||
|
{"id": "e8", "title": "Extra 8", "markets": []},
|
||||||
|
{"id": "e9", "title": "Extra 9", "markets": []},
|
||||||
|
],
|
||||||
"pagination": {"totalResults": 20, "hasMore": True},
|
"pagination": {"totalResults": 20, "hasMore": True},
|
||||||
}
|
}
|
||||||
page4 = {
|
page4 = {
|
||||||
"events": [],
|
"events": [
|
||||||
|
{"id": "e10", "title": "Extra 10", "markets": []},
|
||||||
|
{"id": "e11", "title": "Extra 11", "markets": []},
|
||||||
|
{"id": "e12", "title": "Extra 12", "markets": []},
|
||||||
|
{"id": "e13", "title": "Extra 13", "markets": []},
|
||||||
|
{"id": "e14", "title": "Extra 14", "markets": []},
|
||||||
|
],
|
||||||
"pagination": {"totalResults": 20, "hasMore": True},
|
"pagination": {"totalResults": 20, "hasMore": True},
|
||||||
}
|
}
|
||||||
|
|
||||||
mock_fetch_page.return_value = page1
|
mock_fetch_page.return_value = page1
|
||||||
mock_parallel_fetch.side_effect = [
|
mock_parallel_fetch.side_effect = [
|
||||||
(1, page1),
|
|
||||||
(2, page2),
|
(2, page2),
|
||||||
(3, page3),
|
(3, page3),
|
||||||
(4, page4),
|
(4, page4),
|
||||||
@@ -1155,7 +1170,7 @@ class TestFetchAllPages(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(mock_fetch_page.call_count, 1)
|
self.assertEqual(mock_fetch_page.call_count, 1)
|
||||||
self.assertEqual(mock_parallel_fetch.call_count, 4)
|
self.assertEqual(mock_parallel_fetch.call_count, 3)
|
||||||
self.assertEqual(len(result["events"]), 6)
|
self.assertEqual(len(result["events"]), 6)
|
||||||
|
|
||||||
@patch("browse._read_cache", return_value=None)
|
@patch("browse._read_cache", return_value=None)
|
||||||
@@ -1168,27 +1183,45 @@ class TestFetchAllPages(unittest.TestCase):
|
|||||||
from browse import fetch_all_pages
|
from browse import fetch_all_pages
|
||||||
|
|
||||||
page1 = {
|
page1 = {
|
||||||
"events": [{"id": "e1", "title": "Event 1", "markets": []}],
|
"events": [
|
||||||
|
{"id": "e1", "title": "Event 1", "markets": []},
|
||||||
|
{"id": "e2", "title": "Event 2", "markets": []},
|
||||||
|
{"id": "e3", "title": "Event 3", "markets": []},
|
||||||
|
{"id": "e4", "title": "Event 4", "markets": []},
|
||||||
|
{"id": "e5", "title": "Event 5", "markets": []},
|
||||||
|
],
|
||||||
"pagination": {"totalResults": 15, "hasMore": True},
|
"pagination": {"totalResults": 15, "hasMore": True},
|
||||||
}
|
}
|
||||||
page2 = {
|
page2 = {
|
||||||
"events": [{"id": "e2", "title": "Event 2", "markets": []}],
|
"events": [
|
||||||
|
{"id": "e6", "title": "Event 6", "markets": []},
|
||||||
|
{"id": "e7", "title": "Event 7", "markets": []},
|
||||||
|
{"id": "e8", "title": "Event 8", "markets": []},
|
||||||
|
{"id": "e9", "title": "Event 9", "markets": []},
|
||||||
|
{"id": "e10", "title": "Event 10", "markets": []},
|
||||||
|
],
|
||||||
"pagination": {"totalResults": 15, "hasMore": True},
|
"pagination": {"totalResults": 15, "hasMore": True},
|
||||||
}
|
}
|
||||||
page3 = {
|
page3 = {
|
||||||
"events": [{"id": "e3", "title": "Event 3", "markets": []}],
|
"events": [
|
||||||
|
{"id": "e11", "title": "Event 11", "markets": []},
|
||||||
|
{"id": "e12", "title": "Event 12", "markets": []},
|
||||||
|
{"id": "e13", "title": "Event 13", "markets": []},
|
||||||
|
{"id": "e14", "title": "Event 14", "markets": []},
|
||||||
|
{"id": "e15", "title": "Event 15", "markets": []},
|
||||||
|
],
|
||||||
"pagination": {"totalResults": 15, "hasMore": False},
|
"pagination": {"totalResults": 15, "hasMore": False},
|
||||||
}
|
}
|
||||||
|
|
||||||
mock_fetch_page.return_value = page1
|
mock_fetch_page.return_value = page1
|
||||||
mock_parallel_fetch.side_effect = [(1, page1), (2, page2), (3, page3)]
|
mock_parallel_fetch.side_effect = [(2, page2), (3, page3)]
|
||||||
|
|
||||||
result = fetch_all_pages("test", use_cache=False)
|
result = fetch_all_pages("test", use_cache=False)
|
||||||
|
|
||||||
self.assertEqual(mock_fetch_page.call_count, 1)
|
self.assertEqual(mock_fetch_page.call_count, 1)
|
||||||
self.assertEqual(mock_parallel_fetch.call_count, 3)
|
self.assertEqual(mock_parallel_fetch.call_count, 2)
|
||||||
self.assertEqual(len(result["events"]), 3)
|
self.assertEqual(len(result["events"]), 15)
|
||||||
self.assertTrue(result["partial"])
|
self.assertFalse(result["partial"])
|
||||||
|
|
||||||
@patch("browse._read_cache", return_value=None)
|
@patch("browse._read_cache", return_value=None)
|
||||||
@patch("browse._fetch_page_with_index")
|
@patch("browse._fetch_page_with_index")
|
||||||
@@ -1222,6 +1255,8 @@ class TestFetchAllPages(unittest.TestCase):
|
|||||||
"gameId": "3",
|
"gameId": "3",
|
||||||
"markets": [],
|
"markets": [],
|
||||||
},
|
},
|
||||||
|
{"id": "e1", "title": "Extra 1", "markets": []},
|
||||||
|
{"id": "e2", "title": "Extra 2", "markets": []},
|
||||||
],
|
],
|
||||||
"pagination": {"totalResults": 20, "hasMore": True},
|
"pagination": {"totalResults": 20, "hasMore": True},
|
||||||
}
|
}
|
||||||
@@ -1230,31 +1265,168 @@ class TestFetchAllPages(unittest.TestCase):
|
|||||||
{"id": "n1", "title": "Non-match 1", "markets": []},
|
{"id": "n1", "title": "Non-match 1", "markets": []},
|
||||||
{"id": "n2", "title": "Non-match 2", "markets": []},
|
{"id": "n2", "title": "Non-match 2", "markets": []},
|
||||||
{"id": "n3", "title": "Non-match 3", "markets": []},
|
{"id": "n3", "title": "Non-match 3", "markets": []},
|
||||||
|
{"id": "e3", "title": "Extra 3", "markets": []},
|
||||||
|
{"id": "e4", "title": "Extra 4", "markets": []},
|
||||||
],
|
],
|
||||||
"pagination": {"totalResults": 20, "hasMore": True},
|
"pagination": {"totalResults": 20, "hasMore": True},
|
||||||
}
|
}
|
||||||
page3 = {
|
page3 = {
|
||||||
"events": [],
|
"events": [
|
||||||
|
{"id": "e5", "title": "Extra 5", "markets": []},
|
||||||
|
{"id": "e6", "title": "Extra 6", "markets": []},
|
||||||
|
{"id": "e7", "title": "Extra 7", "markets": []},
|
||||||
|
{"id": "e8", "title": "Extra 8", "markets": []},
|
||||||
|
{"id": "e9", "title": "Extra 9", "markets": []},
|
||||||
|
],
|
||||||
"pagination": {"totalResults": 20, "hasMore": True},
|
"pagination": {"totalResults": 20, "hasMore": True},
|
||||||
}
|
}
|
||||||
page4 = {
|
page4 = {
|
||||||
"events": [],
|
"events": [
|
||||||
|
{"id": "e10", "title": "Extra 10", "markets": []},
|
||||||
|
{"id": "e11", "title": "Extra 11", "markets": []},
|
||||||
|
{"id": "e12", "title": "Extra 12", "markets": []},
|
||||||
|
{"id": "e13", "title": "Extra 13", "markets": []},
|
||||||
|
{"id": "e14", "title": "Extra 14", "markets": []},
|
||||||
|
],
|
||||||
"pagination": {"totalResults": 20, "hasMore": True},
|
"pagination": {"totalResults": 20, "hasMore": True},
|
||||||
}
|
}
|
||||||
|
|
||||||
mock_fetch_page.return_value = page1
|
mock_fetch_page.return_value = page1
|
||||||
mock_parallel_fetch.side_effect = [
|
mock_parallel_fetch.side_effect = [(2, page2), (3, page3), (4, page4)]
|
||||||
(1, page1),
|
|
||||||
(2, page2),
|
|
||||||
(3, page3),
|
|
||||||
(4, page4),
|
|
||||||
]
|
|
||||||
|
|
||||||
result = fetch_all_pages(
|
result = fetch_all_pages(
|
||||||
"test", matches_max=3, non_matches_max=3, use_cache=False
|
"test", matches_max=3, non_matches_max=3, use_cache=False
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(mock_parallel_fetch.call_count, 4)
|
self.assertEqual(mock_parallel_fetch.call_count, 3)
|
||||||
|
self.assertEqual(len(result["events"]), 6)
|
||||||
|
|
||||||
|
@patch("browse._read_cache", return_value=None)
|
||||||
|
@patch("browse._fetch_page_with_index")
|
||||||
|
@patch("browse.fetch_page")
|
||||||
|
def test_no_quota_fetches_all_pages(
|
||||||
|
self, mock_fetch_page, mock_parallel_fetch, mock_cache
|
||||||
|
):
|
||||||
|
"""Without quotas, fetches all pages until pagination ends."""
|
||||||
|
from browse import fetch_all_pages
|
||||||
|
|
||||||
|
page1 = {
|
||||||
|
"events": [
|
||||||
|
{"id": "e1", "title": "Event 1", "markets": []},
|
||||||
|
{"id": "e2", "title": "Event 2", "markets": []},
|
||||||
|
{"id": "e3", "title": "Event 3", "markets": []},
|
||||||
|
{"id": "e4", "title": "Event 4", "markets": []},
|
||||||
|
{"id": "e5", "title": "Event 5", "markets": []},
|
||||||
|
],
|
||||||
|
"pagination": {"totalResults": 15, "hasMore": True},
|
||||||
|
}
|
||||||
|
page2 = {
|
||||||
|
"events": [
|
||||||
|
{"id": "e6", "title": "Event 6", "markets": []},
|
||||||
|
{"id": "e7", "title": "Event 7", "markets": []},
|
||||||
|
{"id": "e8", "title": "Event 8", "markets": []},
|
||||||
|
{"id": "e9", "title": "Event 9", "markets": []},
|
||||||
|
{"id": "e10", "title": "Event 10", "markets": []},
|
||||||
|
],
|
||||||
|
"pagination": {"totalResults": 15, "hasMore": True},
|
||||||
|
}
|
||||||
|
page3 = {
|
||||||
|
"events": [
|
||||||
|
{"id": "e11", "title": "Event 11", "markets": []},
|
||||||
|
{"id": "e12", "title": "Event 12", "markets": []},
|
||||||
|
{"id": "e13", "title": "Event 13", "markets": []},
|
||||||
|
{"id": "e14", "title": "Event 14", "markets": []},
|
||||||
|
{"id": "e15", "title": "Event 15", "markets": []},
|
||||||
|
],
|
||||||
|
"pagination": {"totalResults": 15, "hasMore": False},
|
||||||
|
}
|
||||||
|
|
||||||
|
mock_fetch_page.return_value = page1
|
||||||
|
mock_parallel_fetch.side_effect = [(2, page2), (3, page3)]
|
||||||
|
|
||||||
|
result = fetch_all_pages("test", use_cache=False)
|
||||||
|
|
||||||
|
self.assertEqual(mock_fetch_page.call_count, 1)
|
||||||
|
self.assertEqual(mock_parallel_fetch.call_count, 2)
|
||||||
|
self.assertEqual(len(result["events"]), 15)
|
||||||
|
self.assertFalse(result["partial"])
|
||||||
|
|
||||||
|
@patch("browse._read_cache", return_value=None)
|
||||||
|
@patch("browse._fetch_page_with_index")
|
||||||
|
@patch("browse.fetch_page")
|
||||||
|
def test_quota_one_side_only_keeps_fetching(
|
||||||
|
self, mock_fetch_page, mock_parallel_fetch, mock_cache
|
||||||
|
):
|
||||||
|
"""If only one quota is met, keeps fetching."""
|
||||||
|
from browse import fetch_all_pages
|
||||||
|
|
||||||
|
page1 = {
|
||||||
|
"events": [
|
||||||
|
{
|
||||||
|
"id": "m1",
|
||||||
|
"title": "Match 1",
|
||||||
|
"seriesSlug": "x",
|
||||||
|
"gameId": "1",
|
||||||
|
"markets": [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "m2",
|
||||||
|
"title": "Match 2",
|
||||||
|
"seriesSlug": "x",
|
||||||
|
"gameId": "2",
|
||||||
|
"markets": [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "m3",
|
||||||
|
"title": "Match 3",
|
||||||
|
"seriesSlug": "x",
|
||||||
|
"gameId": "3",
|
||||||
|
"markets": [],
|
||||||
|
},
|
||||||
|
{"id": "e1", "title": "Extra 1", "markets": []},
|
||||||
|
{"id": "e2", "title": "Extra 2", "markets": []},
|
||||||
|
],
|
||||||
|
"pagination": {"totalResults": 20, "hasMore": True},
|
||||||
|
}
|
||||||
|
page2 = {
|
||||||
|
"events": [
|
||||||
|
{"id": "n1", "title": "Non-match 1", "markets": []},
|
||||||
|
{"id": "n2", "title": "Non-match 2", "markets": []},
|
||||||
|
{"id": "n3", "title": "Non-match 3", "markets": []},
|
||||||
|
{"id": "e3", "title": "Extra 3", "markets": []},
|
||||||
|
{"id": "e4", "title": "Extra 4", "markets": []},
|
||||||
|
],
|
||||||
|
"pagination": {"totalResults": 20, "hasMore": True},
|
||||||
|
}
|
||||||
|
page3 = {
|
||||||
|
"events": [
|
||||||
|
{"id": "e5", "title": "Extra 5", "markets": []},
|
||||||
|
{"id": "e6", "title": "Extra 6", "markets": []},
|
||||||
|
{"id": "e7", "title": "Extra 7", "markets": []},
|
||||||
|
{"id": "e8", "title": "Extra 8", "markets": []},
|
||||||
|
{"id": "e9", "title": "Extra 9", "markets": []},
|
||||||
|
],
|
||||||
|
"pagination": {"totalResults": 20, "hasMore": True},
|
||||||
|
}
|
||||||
|
page4 = {
|
||||||
|
"events": [
|
||||||
|
{"id": "e10", "title": "Extra 10", "markets": []},
|
||||||
|
{"id": "e11", "title": "Extra 11", "markets": []},
|
||||||
|
{"id": "e12", "title": "Extra 12", "markets": []},
|
||||||
|
{"id": "e13", "title": "Extra 13", "markets": []},
|
||||||
|
{"id": "e14", "title": "Extra 14", "markets": []},
|
||||||
|
],
|
||||||
|
"pagination": {"totalResults": 20, "hasMore": True},
|
||||||
|
}
|
||||||
|
|
||||||
|
mock_fetch_page.return_value = page1
|
||||||
|
mock_parallel_fetch.side_effect = [(2, page2), (3, page3), (4, page4)]
|
||||||
|
|
||||||
|
result = fetch_all_pages(
|
||||||
|
"test", matches_max=3, non_matches_max=3, use_cache=False
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(mock_parallel_fetch.call_count, 3)
|
||||||
self.assertEqual(len(result["events"]), 6)
|
self.assertEqual(len(result["events"]), 6)
|
||||||
|
|
||||||
|
|
||||||
@@ -1271,7 +1443,13 @@ class TestParallelFetchConcurrency(unittest.TestCase):
|
|||||||
from browse import fetch_all_pages
|
from browse import fetch_all_pages
|
||||||
|
|
||||||
page = {
|
page = {
|
||||||
"events": [{"id": "e1", "title": "Event 1", "markets": []}],
|
"events": [
|
||||||
|
{"id": "e1", "title": "Event 1", "markets": []},
|
||||||
|
{"id": "e2", "title": "Event 2", "markets": []},
|
||||||
|
{"id": "e3", "title": "Event 3", "markets": []},
|
||||||
|
{"id": "e4", "title": "Event 4", "markets": []},
|
||||||
|
{"id": "e5", "title": "Event 5", "markets": []},
|
||||||
|
],
|
||||||
"pagination": {"totalResults": 50, "hasMore": True},
|
"pagination": {"totalResults": 50, "hasMore": True},
|
||||||
}
|
}
|
||||||
mock_fetch_page.return_value = page
|
mock_fetch_page.return_value = page
|
||||||
@@ -1279,8 +1457,9 @@ class TestParallelFetchConcurrency(unittest.TestCase):
|
|||||||
|
|
||||||
result = fetch_all_pages("test", use_cache=False)
|
result = fetch_all_pages("test", use_cache=False)
|
||||||
|
|
||||||
total_pages = (50 + 4) // 5 # = 10 pages (API returns 5 per page)
|
total_pages = (50 + 5 - 1) // 5 # = 10 pages (API returns 5 per page)
|
||||||
self.assertEqual(mock_parallel_fetch.call_count, total_pages)
|
# Page 1 is fetched in probe loop, so executor only fetches pages 2-10 (9 calls)
|
||||||
|
self.assertEqual(mock_parallel_fetch.call_count, total_pages - 1)
|
||||||
|
|
||||||
@patch("browse._read_cache", return_value=None)
|
@patch("browse._read_cache", return_value=None)
|
||||||
@patch("browse._fetch_page_with_index")
|
@patch("browse._fetch_page_with_index")
|
||||||
|
|||||||
Reference in New Issue
Block a user