From 1820d3acf3bc3dcbd9578543403d357bd5d72caf Mon Sep 17 00:00:00 2001 From: Oscar Arbelaez Date: Mon, 3 Feb 2025 11:01:52 +0000 Subject: [PATCH] Only do 5 calls to the openalex API at a time --- src/bibx/clients/openalex.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/bibx/clients/openalex.py b/src/bibx/clients/openalex.py index 726ba58..cef9830 100644 --- a/src/bibx/clients/openalex.py +++ b/src/bibx/clients/openalex.py @@ -11,8 +11,9 @@ logger = logging.getLogger(__name__) -MAX_WORKS_PER_PAGE = 200 -MAX_IDS_PER_REQUEST = 80 +_MAX_WORKS_PER_PAGE = 200 +_MAX_IDS_PER_REQUEST = 80 +_MAX_CONNECTIONS = 5 class AuthorPosition(Enum): @@ -145,9 +146,9 @@ def list_recent_articles(self, query: str, limit: int = 600) -> list[Work]: "cited_by_count:>1", ] ) - pages = (limit // MAX_WORKS_PER_PAGE) + 1 + pages = (limit // _MAX_WORKS_PER_PAGE) + 1 results: list[Work] = [] - with ThreadPoolExecutor(max_workers=min(pages, 25)) as executor: + with ThreadPoolExecutor(max_workers=min(pages, _MAX_CONNECTIONS)) as executor: futures = [ executor.submit( self._fetch_works, @@ -155,7 +156,7 @@ def list_recent_articles(self, query: str, limit: int = 600) -> list[Work]: "select": select, "filter": filter_, "sort": "publication_year:desc", - "per_page": MAX_WORKS_PER_PAGE, + "per_page": _MAX_WORKS_PER_PAGE, "page": page, }, ) @@ -175,17 +176,17 @@ def list_articles_by_openalex_id(self, ids: list[str]) -> list[Work]: return [] select = ",".join(Work.model_fields.keys()) results: list[Work] = [] - with ThreadPoolExecutor(max_workers=5) as executor: + with ThreadPoolExecutor(max_workers=_MAX_CONNECTIONS) as executor: futures = [ executor.submit( self._fetch_works, { "select": select, "filter": f"ids.openalex:{'|'.join(ids)},type:types/article", - "per_page": MAX_IDS_PER_REQUEST, + "per_page": _MAX_IDS_PER_REQUEST, }, ) - for ids in chunks(ids, MAX_IDS_PER_REQUEST) + for ids in chunks(ids, _MAX_IDS_PER_REQUEST) ] for future in as_completed(futures): work_response = future.result()