Add parallel fetching, caching, and max_total parameter #26

Merged
shoko merged 6 commits from feat/parallel-fetch-cache into master 2026-03-26 18:23:20 +01:00
2 changed files with 18 additions and 21 deletions
Showing only changes of commit 1ae60f5661 - Show all commits

View File

@@ -235,7 +235,7 @@ def fetch_all_pages(
if total_raw == 0: if total_raw == 0:
return {"events": [], "total_raw": 0, "partial": False} return {"events": [], "total_raw": 0, "partial": False}
total_pages = (total_raw + PAGE_SIZE - 1) // PAGE_SIZE total_pages = (total_raw + 4) // 5
concurrency = min(MAX_PARALLEL_FETCHES, total_pages) concurrency = min(MAX_PARALLEL_FETCHES, total_pages)
all_page_data: dict[int, list[Any]] = {} all_page_data: dict[int, list[Any]] = {}

View File

@@ -1118,7 +1118,7 @@ class TestFetchAllPages(unittest.TestCase):
{"id": "n1", "title": "Non-match 1", "markets": []}, {"id": "n1", "title": "Non-match 1", "markets": []},
{"id": "n2", "title": "Non-match 2", "markets": []}, {"id": "n2", "title": "Non-match 2", "markets": []},
], ],
"pagination": {"totalResults": 200, "hasMore": True}, "pagination": {"totalResults": 20, "hasMore": True},
} }
page2 = { page2 = {
"events": [ "events": [
@@ -1131,15 +1131,15 @@ class TestFetchAllPages(unittest.TestCase):
}, },
{"id": "n3", "title": "Non-match 3", "markets": []}, {"id": "n3", "title": "Non-match 3", "markets": []},
], ],
"pagination": {"totalResults": 200, "hasMore": True}, "pagination": {"totalResults": 20, "hasMore": True},
} }
page3 = { page3 = {
"events": [], "events": [],
"pagination": {"totalResults": 200, "hasMore": True}, "pagination": {"totalResults": 20, "hasMore": True},
} }
page4 = { page4 = {
"events": [], "events": [],
"pagination": {"totalResults": 200, "hasMore": True}, "pagination": {"totalResults": 20, "hasMore": True},
} }
mock_fetch_page.return_value = page1 mock_fetch_page.return_value = page1
@@ -1169,15 +1169,15 @@ class TestFetchAllPages(unittest.TestCase):
page1 = { page1 = {
"events": [{"id": "e1", "title": "Event 1", "markets": []}], "events": [{"id": "e1", "title": "Event 1", "markets": []}],
"pagination": {"totalResults": 150, "hasMore": True}, "pagination": {"totalResults": 15, "hasMore": True},
} }
page2 = { page2 = {
"events": [{"id": "e2", "title": "Event 2", "markets": []}], "events": [{"id": "e2", "title": "Event 2", "markets": []}],
"pagination": {"totalResults": 150, "hasMore": True}, "pagination": {"totalResults": 15, "hasMore": True},
} }
page3 = { page3 = {
"events": [{"id": "e3", "title": "Event 3", "markets": []}], "events": [{"id": "e3", "title": "Event 3", "markets": []}],
"pagination": {"totalResults": 150, "hasMore": False}, "pagination": {"totalResults": 15, "hasMore": False},
} }
mock_fetch_page.return_value = page1 mock_fetch_page.return_value = page1
@@ -1223,7 +1223,7 @@ class TestFetchAllPages(unittest.TestCase):
"markets": [], "markets": [],
}, },
], ],
"pagination": {"totalResults": 200, "hasMore": True}, "pagination": {"totalResults": 20, "hasMore": True},
} }
page2 = { page2 = {
"events": [ "events": [
@@ -1231,15 +1231,15 @@ class TestFetchAllPages(unittest.TestCase):
{"id": "n2", "title": "Non-match 2", "markets": []}, {"id": "n2", "title": "Non-match 2", "markets": []},
{"id": "n3", "title": "Non-match 3", "markets": []}, {"id": "n3", "title": "Non-match 3", "markets": []},
], ],
"pagination": {"totalResults": 200, "hasMore": True}, "pagination": {"totalResults": 20, "hasMore": True},
} }
page3 = { page3 = {
"events": [], "events": [],
"pagination": {"totalResults": 200, "hasMore": True}, "pagination": {"totalResults": 20, "hasMore": True},
} }
page4 = { page4 = {
"events": [], "events": [],
"pagination": {"totalResults": 200, "hasMore": True}, "pagination": {"totalResults": 20, "hasMore": True},
} }
mock_fetch_page.return_value = page1 mock_fetch_page.return_value = page1
@@ -1267,23 +1267,20 @@ class TestParallelFetchConcurrency(unittest.TestCase):
def test_parallel_fetch_uses_batch_size_of_5( def test_parallel_fetch_uses_batch_size_of_5(
self, mock_fetch_page, mock_parallel_fetch, mock_cache self, mock_fetch_page, mock_parallel_fetch, mock_cache
): ):
"""With 10 pages (totalResults=500), verify concurrency=5 means 5 calls per batch.""" """With 10 pages (totalResults=50), verify 10 calls are made with concurrency=5."""
from browse import fetch_all_pages from browse import fetch_all_pages
page = { page = {
"events": [{"id": "e1", "title": "Event 1", "markets": []}], "events": [{"id": "e1", "title": "Event 1", "markets": []}],
"pagination": {"totalResults": 500, "hasMore": True}, "pagination": {"totalResults": 50, "hasMore": True},
} }
mock_fetch_page.return_value = page mock_fetch_page.return_value = page
mock_parallel_fetch.return_value = (1, page) mock_parallel_fetch.return_value = (1, page)
result = fetch_all_pages("test", use_cache=False) result = fetch_all_pages("test", use_cache=False)
total_pages = (500 + 50 - 1) // 50 # = 10 pages total_pages = (50 + 4) // 5 # = 10 pages (API returns 5 per page)
concurrency = min(5, total_pages) # = 5 self.assertEqual(mock_parallel_fetch.call_count, total_pages)
expected_batches = (total_pages + concurrency - 1) // concurrency # = 2 batches
self.assertEqual(mock_parallel_fetch.call_count, 10)
@patch("browse._read_cache", return_value=None) @patch("browse._read_cache", return_value=None)
@patch("browse._fetch_page_with_index") @patch("browse._fetch_page_with_index")
@@ -1394,7 +1391,7 @@ class TestMaxTotalParameter(unittest.TestCase):
"markets": [], "markets": [],
} }
], ],
"pagination": {"totalResults": 500, "hasMore": True}, "pagination": {"totalResults": 50, "hasMore": True},
}, },
) )
) )
@@ -1432,7 +1429,7 @@ class TestMaxTotalParameter(unittest.TestCase):
"markets": [], "markets": [],
}, },
], ],
"pagination": {"totalResults": 100, "hasMore": True}, "pagination": {"totalResults": 5, "hasMore": True},
} }
mock_fetch_page.return_value = page1 mock_fetch_page.return_value = page1
mock_parallel_fetch.side_effect = [(1, page1)] mock_parallel_fetch.side_effect = [(1, page1)]