diff --git a/src/bibx/__init__.py b/src/bibx/__init__.py index 96b8a4a..1f33671 100644 --- a/src/bibx/__init__.py +++ b/src/bibx/__init__.py @@ -3,13 +3,13 @@ import logging from typing import TextIO -from bibx._entities.article import Article -from bibx._entities.collection import Collection -from bibx._entities.collection_builders.openalex import OpenAlexCollectionBuilder -from bibx._entities.collection_builders.scopus_bib import ScopusBibCollectionBuilder -from bibx._entities.collection_builders.scopus_ris import ScopusRisCollectionBuilder -from bibx._entities.collection_builders.wos import WosCollectionBuilder from bibx.algorithms.sap import Sap +from bibx.article import Article +from bibx.builders.openalex import HandleReferences, OpenAlexCollectionBuilder +from bibx.builders.scopus_bib import ScopusBibCollectionBuilder +from bibx.builders.scopus_ris import ScopusRisCollectionBuilder +from bibx.builders.wos import WosCollectionBuilder +from bibx.collection import Collection from bibx.exceptions import BibXError logger = logging.getLogger(__name__) @@ -17,6 +17,7 @@ __all__ = [ "Article", "Collection", + "HandleReferences", "Sap", "query_openalex", "read_any", @@ -25,12 +26,16 @@ "read_wos", ] -__version__ = "0.3.1" +__version__ = "0.4.0" -def query_openalex(query: str, limit: int = 600) -> Collection: +def query_openalex( + query: str, + limit: int = 600, + references: HandleReferences = HandleReferences.BASIC, +) -> Collection: """Query OpenAlex and return a collection.""" - return OpenAlexCollectionBuilder(query, limit).build() + return OpenAlexCollectionBuilder(query, limit, references=references).build() def read_scopus_bib(*files: TextIO) -> Collection: diff --git a/src/bibx/_entities/__init__.py b/src/bibx/_entities/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/bibx/_entities/collection_builders/__init__.py b/src/bibx/_entities/collection_builders/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/bibx/_entities/collection_builders/base.py b/src/bibx/_entities/collection_builders/base.py deleted file mode 100644 index 0f79dc2..0000000 --- a/src/bibx/_entities/collection_builders/base.py +++ /dev/null @@ -1,7 +0,0 @@ -from typing import Protocol - -from bibx._entities.collection import Collection - - -class CollectionBuilder(Protocol): - def build(self) -> Collection: ... diff --git a/src/bibx/_entities/collection_builders/cross_ref.py b/src/bibx/_entities/collection_builders/cross_ref.py deleted file mode 100644 index 0239d0b..0000000 --- a/src/bibx/_entities/collection_builders/cross_ref.py +++ /dev/null @@ -1,15 +0,0 @@ -from bibx._entities.collection import Collection -from bibx._entities.collection_builders.base import CollectionBuilder - - -class CrossRefCollectionBuilder(CollectionBuilder): - def __init__(self, query: str, count: int = 100) -> None: - self._query = query - self._count = count - - def with_count(self, count: int) -> "CrossRefCollectionBuilder": - self._count = count - return self - - def build(self) -> Collection: - return Collection([]) diff --git a/src/bibx/_entities/collection_builders/generic.py b/src/bibx/_entities/collection_builders/generic.py deleted file mode 100644 index 4a62245..0000000 --- a/src/bibx/_entities/collection_builders/generic.py +++ /dev/null @@ -1,10 +0,0 @@ -from bibx._entities.collection import Collection -from bibx._entities.collection_builders.base import CollectionBuilder - - -class GenericCollectionBuilder(CollectionBuilder): - def __init__(self, *collections: Collection) -> None: - self._collections = collections - - def build(self) -> Collection: - return Collection([]) diff --git a/src/bibx/algorithms/preprocess.py b/src/bibx/algorithms/preprocess.py index cfba3a8..48598f1 100644 --- a/src/bibx/algorithms/preprocess.py +++ b/src/bibx/algorithms/preprocess.py @@ -5,7 +5,7 @@ from xlsxwriter import Workbook from xlsxwriter.worksheet import Worksheet -from bibx import Collection +from bibx.collection import Collection from .sap import BRANCH, LEAF, ROOT, TRUNK, Sap diff --git a/src/bibx/algorithms/sap.py b/src/bibx/algorithms/sap.py index e789638..b2d7396 100644 --- a/src/bibx/algorithms/sap.py +++ b/src/bibx/algorithms/sap.py @@ -4,7 +4,8 @@ import networkx as nx from networkx.algorithms.community.louvain import louvain_communities -from bibx import Article, Collection +from bibx.article import Article +from bibx.collection import Collection YEAR = "year" LEAF = "leaf" diff --git a/src/bibx/_entities/article.py b/src/bibx/article.py similarity index 89% rename from src/bibx/_entities/article.py rename to src/bibx/article.py index e5372c1..08fb3e4 100644 --- a/src/bibx/_entities/article.py +++ b/src/bibx/article.py @@ -15,6 +15,8 @@ def _keep_longest(a: str, b: str) -> str: @dataclass class Article: + """A scientific article.""" + label: str ids: set[str] authors: list[str] = field(default_factory=list) @@ -55,10 +57,12 @@ def merge(self, other: "Article") -> "Article": @property def key(self) -> str: + """Return the first ID of the article.""" return next(iter(sorted(self.ids))) @property def simple_label(self) -> Optional[str]: + """Return a simple label for the article.""" pieces = { "AU": self.authors[0].replace(",", "") if self.authors else None, "PY": str(self.year) if self.year else None, @@ -73,6 +77,7 @@ def simple_label(self) -> Optional[str]: @property def permalink(self) -> Optional[str]: + """Return the permalink of the article.""" if self._permalink is not None: return self._permalink if self.doi is not None: @@ -81,21 +86,25 @@ def permalink(self) -> Optional[str]: @property def simple_id(self) -> Optional[str]: + """Return a simple ID for the article.""" if self.authors and self.year is not None: author = self.authors[0].split(" ")[0].replace(",", "") return f"{author}{self.year}".lower() return None def __repr__(self) -> str: + """Return a string representation of the article.""" return f"Article(ids={self.ids!r}, authors={self.authors!r})" def add_simple_id(self) -> "Article": + """Add a simple ID to the article.""" if self.simple_id is None: return self self.ids.add(f"simple:{self.simple_id}") return self def set_simple_label(self) -> "Article": + """Set the simple label as the label of the article.""" if self.simple_label is None: return self self.label = self.simple_label @@ -104,6 +113,7 @@ def set_simple_label(self) -> "Article": def info( self, ) -> dict[str, Union[str, int, list[str], None]]: + """Return a dictionary with the information of the article.""" return { "permalink": self.permalink, "label": self.label, diff --git a/src/bibx/builders/__init__.py b/src/bibx/builders/__init__.py new file mode 100644 index 0000000..6df31bd --- /dev/null +++ b/src/bibx/builders/__init__.py @@ -0,0 +1 @@ +"""Builders for diverse Collection types.""" diff --git a/src/bibx/builders/base.py b/src/bibx/builders/base.py new file mode 100644 index 0000000..cdb7bfd --- /dev/null +++ b/src/bibx/builders/base.py @@ -0,0 +1,11 @@ +from typing import Protocol + +from bibx.collection import Collection + + +class CollectionBuilder(Protocol): + """Protocol for classes that build collections of articles.""" + + def build(self) -> Collection: + """Build a collection of articles.""" + ... diff --git a/src/bibx/_entities/collection_builders/openalex.py b/src/bibx/builders/openalex.py similarity index 94% rename from src/bibx/_entities/collection_builders/openalex.py rename to src/bibx/builders/openalex.py index 9be957f..5b3ac9e 100644 --- a/src/bibx/_entities/collection_builders/openalex.py +++ b/src/bibx/builders/openalex.py @@ -3,9 +3,9 @@ from typing import Optional from urllib.parse import urlparse -from bibx._entities.article import Article -from bibx._entities.collection import Collection +from bibx.article import Article from bibx.clients.openalex import OpenAlexClient, Work +from bibx.collection import Collection from .base import CollectionBuilder @@ -13,11 +13,15 @@ class HandleReferences(Enum): + """How to handle references when building an openalex collection.""" + BASIC = "basic" FULL = "full" class OpenAlexCollectionBuilder(CollectionBuilder): + """Builder for collections of articles from the OpenAlex API.""" + def __init__( self, query: str, diff --git a/src/bibx/_entities/collection_builders/scopus_bib.py b/src/bibx/builders/scopus_bib.py similarity index 92% rename from src/bibx/_entities/collection_builders/scopus_bib.py rename to src/bibx/builders/scopus_bib.py index 3714348..e953f72 100644 --- a/src/bibx/_entities/collection_builders/scopus_bib.py +++ b/src/bibx/builders/scopus_bib.py @@ -6,19 +6,23 @@ import bibtexparser -from bibx._entities.article import Article -from bibx._entities.collection import Collection -from bibx._entities.collection_builders.base import CollectionBuilder +from bibx.article import Article +from bibx.collection import Collection from bibx.exceptions import MissingCriticalInformationError +from .base import CollectionBuilder + class ScopusBibCollectionBuilder(CollectionBuilder): + """Builder for collections of articles from Scopus BibTeX files.""" + def __init__(self, *scopus_files: TextIO) -> None: self._files = scopus_files for file in self._files: file.seek(0) def build(self) -> Collection: + """Build a collection of articles from Scopus BibTeX files.""" articles = self._get_articles_from_files() return Collection(Collection.deduplicate_articles(list(articles))) diff --git a/src/bibx/_entities/collection_builders/scopus_ris.py b/src/bibx/builders/scopus_ris.py similarity index 96% rename from src/bibx/_entities/collection_builders/scopus_ris.py rename to src/bibx/builders/scopus_ris.py index b1ac019..da828d6 100644 --- a/src/bibx/_entities/collection_builders/scopus_ris.py +++ b/src/bibx/builders/scopus_ris.py @@ -4,11 +4,12 @@ from collections.abc import Iterable from typing import Optional, TextIO -from bibx._entities.article import Article -from bibx._entities.collection import Collection -from bibx._entities.collection_builders.base import CollectionBuilder +from bibx.article import Article +from bibx.collection import Collection from bibx.exceptions import InvalidScopusFileError, MissingCriticalInformationError +from .base import CollectionBuilder + logger = logging.getLogger(__name__) _RIS_PATTERN = re.compile(r"^(((?P[A-Z0-9]{2}))[ ]{2}-[ ]{1})?(?P(.*))$") @@ -37,12 +38,15 @@ def _joined(raw: Optional[list[str]]) -> Optional[str]: class ScopusRisCollectionBuilder(CollectionBuilder): + """Builder for collections of articles from Scopus RIS files.""" + def __init__(self, *ris_files: TextIO) -> None: self._files = ris_files for file in self._files: file.seek(0) def build(self) -> Collection: + """Build a collection of articles from Scopus RIS files.""" articles = self._get_articles_from_files() return Collection(Collection.deduplicate_articles(list(articles))) diff --git a/src/bibx/_entities/collection_builders/simple.py b/src/bibx/builders/simple.py similarity index 50% rename from src/bibx/_entities/collection_builders/simple.py rename to src/bibx/builders/simple.py index 742b3fc..278c418 100644 --- a/src/bibx/_entities/collection_builders/simple.py +++ b/src/bibx/builders/simple.py @@ -1,11 +1,15 @@ -from bibx._entities.article import Article -from bibx._entities.collection import Collection -from bibx._entities.collection_builders.base import CollectionBuilder +from bibx.article import Article +from bibx.collection import Collection + +from .base import CollectionBuilder class SimpleCollectionBuilder(CollectionBuilder): + """Builder for collections of articles from a list of articles.""" + def __init__(self, articles: list[Article]) -> None: self.articles = articles def build(self) -> Collection: + """Build a collection of articles from a list of articles.""" return Collection(Collection.deduplicate_articles(self.articles)) diff --git a/src/bibx/_entities/collection_builders/wos.py b/src/bibx/builders/wos.py similarity index 72% rename from src/bibx/_entities/collection_builders/wos.py rename to src/bibx/builders/wos.py index 01d0c38..bcd40fb 100644 --- a/src/bibx/_entities/collection_builders/wos.py +++ b/src/bibx/builders/wos.py @@ -7,15 +7,16 @@ from dataclasses import dataclass from typing import Any, Callable, ClassVar, Optional, TextIO, Union -from bibx._entities.article import Article -from bibx._entities.collection import Collection -from bibx._entities.collection_builders.base import CollectionBuilder +from bibx.article import Article +from bibx.collection import Collection from bibx.exceptions import ( InvalidIsiLineError, InvalidIsiReferenceError, MissingCriticalInformationError, ) +from .base import CollectionBuilder + logger = logging.getLogger(__name__) @@ -46,7 +47,7 @@ def _integer(values: list[str]) -> int: @dataclass(frozen=True) -class IsiField: +class _IsiField: key: str description: str parser: Callable @@ -57,6 +58,8 @@ def parse(self, value: list[str]) -> Union[str, int, list[str]]: class WosCollectionBuilder(CollectionBuilder): + """Builder for collections of articles from Web of Science (WoS) ISI files.""" + ISI_LINE_PATTERN = re.compile( r"^(null|.)?((?P[A-Z0-9]{2})| )( (?P.*))?$" ) @@ -72,175 +75,177 @@ class WosCollectionBuilder(CollectionBuilder): ) FIELDS: ClassVar = { - "AB": IsiField("AB", "Abstract", _joined, ["abstract"]), - "AF": IsiField("AF", "Author Full Names", _ident, ["author_full_names"]), - "AR": IsiField("AR", "Article Number", _joined, ["article_number"]), - "AU": IsiField("AU", "Authors", _ident, ["authors"]), - "BA": IsiField("BA", "Book Authors", _ident, ["book_authors"]), - "BE": IsiField("BE", "Editors", _ident, ["editors"]), - "BF": IsiField( + "AB": _IsiField("AB", "Abstract", _joined, ["abstract"]), + "AF": _IsiField("AF", "Author Full Names", _ident, ["author_full_names"]), + "AR": _IsiField("AR", "Article Number", _joined, ["article_number"]), + "AU": _IsiField("AU", "Authors", _ident, ["authors"]), + "BA": _IsiField("BA", "Book Authors", _ident, ["book_authors"]), + "BE": _IsiField("BE", "Editors", _ident, ["editors"]), + "BF": _IsiField( "BF", "Book Authors Full Name", _ident, ["book_authors_full_name"] ), - "BN": IsiField( + "BN": _IsiField( "BN", "International Standard Book Number (ISBN)", _joined, ["international_standard_book_number"], ), - "BP": IsiField("BP", "Beginning Page", _joined, ["beginning_page"]), - "BS": IsiField("BS", "Book Series Subtitle", _joined, ["book_series_subtitle"]), - "C1": IsiField("C1", "Author Address", _ident, ["author_address"]), - "CA": IsiField("CA", "Group Authors", _ident, ["group_authors"]), - "CL": IsiField("CL", "Conference Location", _joined, ["conference_location"]), - "CR": IsiField( + "BP": _IsiField("BP", "Beginning Page", _joined, ["beginning_page"]), + "BS": _IsiField( + "BS", "Book Series Subtitle", _joined, ["book_series_subtitle"] + ), + "C1": _IsiField("C1", "Author Address", _ident, ["author_address"]), + "CA": _IsiField("CA", "Group Authors", _ident, ["group_authors"]), + "CL": _IsiField("CL", "Conference Location", _joined, ["conference_location"]), + "CR": _IsiField( "CR", "Cited References", _ident, ["cited_references", "references", "citations"], ), - "CT": IsiField( + "CT": _IsiField( "CT", "Conference Title", functools.partial(_joined, separator="\n"), ["conference_title"], ), - "CY": IsiField("CY", "Conference Date", _joined, ["conference_date"]), - "DE": IsiField("DE", "Author Keywords", _delimited, ["author_keywords"]), - "DI": IsiField( + "CY": _IsiField("CY", "Conference Date", _joined, ["conference_date"]), + "DE": _IsiField("DE", "Author Keywords", _delimited, ["author_keywords"]), + "DI": _IsiField( "DI", "Digital Object Identifier (DOI)", _joined, ["digital_object_identifier", "DOI"], ), - "DT": IsiField("DT", "Document Type", _joined, ["document_type"]), - "D2": IsiField( + "DT": _IsiField("DT", "Document Type", _joined, ["document_type"]), + "D2": _IsiField( "D2", "Book Digital Object Identifier (DOI)", _joined, ["book_digital_object_identifier"], ), - "ED": IsiField("ED", "Editors", _ident, ["editors"]), - "EM": IsiField("EM", "E-mail Address", _ident, ["email_address"]), - "EI": IsiField( + "ED": _IsiField("ED", "Editors", _ident, ["editors"]), + "EM": _IsiField("EM", "E-mail Address", _ident, ["email_address"]), + "EI": _IsiField( "EI", "Electronic International Standard Serial Number (eISSN)", _joined, ["eissn"], ), - "EP": IsiField("EP", "Ending Page", _joined, ["ending_page"]), - "FU": IsiField( + "EP": _IsiField("EP", "Ending Page", _joined, ["ending_page"]), + "FU": _IsiField( "FU", "Funding Agency and Grant Number", _delimited, ["funding_agency_and_grant_number"], ), - "FX": IsiField("FX", "Funding Text", _joined, ["funding_text"]), - "GA": IsiField( + "FX": _IsiField("FX", "Funding Text", _joined, ["funding_text"]), + "GA": _IsiField( "GA", "Document Delivery Number", _joined, ["document_delivery_number"], ), - "GP": IsiField("GP", "Book Group Authors", _ident, ["book_group_authors"]), - "HO": IsiField("HO", "Conference Host", _joined, ["conference_host"]), - "ID": IsiField( + "GP": _IsiField("GP", "Book Group Authors", _ident, ["book_group_authors"]), + "HO": _IsiField("HO", "Conference Host", _joined, ["conference_host"]), + "ID": _IsiField( "ID", "Keywords Plus", _delimited, ["keywords_plus", "keywords"] ), - "IS": IsiField("IS", "Issue", _joined, ["issue"]), - "J9": IsiField( + "IS": _IsiField("IS", "Issue", _joined, ["issue"]), + "J9": _IsiField( "J9", "29-Character Source Abbreviation", _joined, ["source_abbreviation"], ), - "JI": IsiField( + "JI": _IsiField( "JI", "ISO Source Abbreviation", _joined, ["iso_source_abbreviation"] ), - "LA": IsiField("LA", "Language", _joined, ["language"]), - "MA": IsiField("MA", "Meeting Abstract", _joined, ["meeting_abstract"]), - "NR": IsiField( + "LA": _IsiField("LA", "Language", _joined, ["language"]), + "MA": _IsiField("MA", "Meeting Abstract", _joined, ["meeting_abstract"]), + "NR": _IsiField( "NR", "Cited Reference Count", _integer, ["cited_reference_count"] ), - "OI": IsiField( + "OI": _IsiField( "OI", "ORCID Identifier (Open Researcher and Contributor ID)", _delimited, ["orcid_identifier"], ), - "P2": IsiField( + "P2": _IsiField( "P2", "Chapter count (Book Citation Index)", _integer, ["chapter_count"], ), - "PA": IsiField( + "PA": _IsiField( "PA", "Publisher Address", functools.partial(_joined, separator="\n"), ["publisher_address"], ), - "PD": IsiField("PD", "Publication Date", _joined, ["publication_date"]), - "PG": IsiField("PG", "Page Count", _integer, ["page_count"]), - "PI": IsiField("PI", "Publisher City", _joined, ["publisher_city"]), - "PM": IsiField("PM", "PubMed ID", _joined, ["pubmed_id"]), - "PN": IsiField("PN", "Part Number", _joined, ["part_number"]), - "PT": IsiField( + "PD": _IsiField("PD", "Publication Date", _joined, ["publication_date"]), + "PG": _IsiField("PG", "Page Count", _integer, ["page_count"]), + "PI": _IsiField("PI", "Publisher City", _joined, ["publisher_city"]), + "PM": _IsiField("PM", "PubMed ID", _joined, ["pubmed_id"]), + "PN": _IsiField("PN", "Part Number", _joined, ["part_number"]), + "PT": _IsiField( "PT", "Publication Type (J=Journal; B=Book; S=Series; P=Patent)", _joined, ["publication_type"], ), - "PU": IsiField("PU", "Publisher", _joined, ["publisher"]), - "PY": IsiField( + "PU": _IsiField("PU", "Publisher", _joined, ["publisher"]), + "PY": _IsiField( "PY", "Year Published", _integer, ["year_published", "year", "publication_year"], ), - "RI": IsiField( + "RI": _IsiField( "RI", "ResearcherID Number", _delimited, ["researcherid_number"] ), - "RP": IsiField("RP", "Reprint Address", _joined, ["reprint_address"]), - "SC": IsiField("SC", "Research Areas", _delimited, ["research_areas"]), - "SE": IsiField("SE", "Book Series Title", _joined, ["book_series_title"]), - "SI": IsiField("SI", "Special Issue", _joined, ["special_issue"]), - "SN": IsiField( + "RP": _IsiField("RP", "Reprint Address", _joined, ["reprint_address"]), + "SC": _IsiField("SC", "Research Areas", _delimited, ["research_areas"]), + "SE": _IsiField("SE", "Book Series Title", _joined, ["book_series_title"]), + "SI": _IsiField("SI", "Special Issue", _joined, ["special_issue"]), + "SN": _IsiField( "SN", "International Standard Serial Number (ISSN)", _joined, ["issn"], ), - "SO": IsiField("SO", "Publication Name", _joined, ["publication_name"]), - "SP": IsiField( + "SO": _IsiField("SO", "Publication Name", _joined, ["publication_name"]), + "SP": _IsiField( "SP", "Conference Sponsors", functools.partial(_delimited, delimiter=", "), ["conference_sponsors"], ), - "SU": IsiField("SU", "Supplement", _joined, ["supplement"]), - "TC": IsiField( + "SU": _IsiField("SU", "Supplement", _joined, ["supplement"]), + "TC": _IsiField( "TC", "Web of Science Core Collection Times Cited Count", _integer, ["wos_times_cited_count", "wos_times_cited"], ), - "TI": IsiField("TI", "Document Title", _joined, ["title"]), - "U1": IsiField("U1", "Usage Count (Last 180 Days)", _integer, ["usage_count"]), - "U2": IsiField("U2", "Usage Count (Since 2013)", _integer, ["usage_count"]), - "UT": IsiField( + "TI": _IsiField("TI", "Document Title", _joined, ["title"]), + "U1": _IsiField("U1", "Usage Count (Last 180 Days)", _integer, ["usage_count"]), + "U2": _IsiField("U2", "Usage Count (Since 2013)", _integer, ["usage_count"]), + "UT": _IsiField( "UT", "Unique Article Identifier", _joined, ["unique_article_identifier"], ), - "VL": IsiField("VL", "Volume", _joined, ["volume"]), - "WC": IsiField( + "VL": _IsiField("VL", "Volume", _joined, ["volume"]), + "WC": _IsiField( "WC", "Web of Science Categories", _delimited, ["web_of_science_categories"], ), - "Z9": IsiField( + "Z9": _IsiField( "Z9", "Total Times Cited Count (WoS Core, BCI, and CSCD)", _integer, @@ -254,6 +259,7 @@ def __init__(self, *isi_files: TextIO) -> None: file.seek(0) def build(self) -> Collection: + """Build a collection of articles from Web of Science (WoS) ISI files.""" articles = self._get_articles_from_files() return Collection(list(articles)) diff --git a/src/bibx/cli.py b/src/bibx/cli.py index 4ff177e..323bfd1 100644 --- a/src/bibx/cli.py +++ b/src/bibx/cli.py @@ -7,7 +7,6 @@ from rich import print as rprint from bibx import ( - Collection, query_openalex, read_any, read_scopus_bib, @@ -16,6 +15,8 @@ ) from bibx.algorithms.preprocess import Preprocess from bibx.algorithms.sap import Sap +from bibx.builders.openalex import HandleReferences +from bibx.collection import Collection app = typer.Typer() @@ -76,9 +77,15 @@ def sap(filename: str) -> None: @app.command() -def openalex(query: list[str]) -> None: +def openalex( + query: list[str], + references: HandleReferences = typer.Option( + help="how to handle references", + default=HandleReferences.BASIC, + ), +) -> None: """Run the sap algorithm on a seed file of any supported format.""" - c = query_openalex(" ".join(query)) + c = query_openalex(" ".join(query), references=references) s = Sap() graph = s.create_graph(c) graph = s.clean_graph(graph) diff --git a/src/bibx/clients/openalex.py b/src/bibx/clients/openalex.py index 8219e84..ea39096 100644 --- a/src/bibx/clients/openalex.py +++ b/src/bibx/clients/openalex.py @@ -155,7 +155,7 @@ def list_recent_articles(self, query: str, limit: int = 600) -> list[Work]: "fetched %d works in page %d", len(work_response.results), page ) results.extend(work_response.results) - if page * MAX_WORKS_PER_PAGE >= work_response.meta.count: + if page * MAX_WORKS_PER_PAGE >= min(work_response.meta.count, limit): break except (requests.RequestException, ValidationError) as error: raise OpenAlexError(str(error)) from error diff --git a/src/bibx/_entities/collection.py b/src/bibx/collection.py similarity index 97% rename from src/bibx/_entities/collection.py rename to src/bibx/collection.py index d03a085..fc33677 100644 --- a/src/bibx/_entities/collection.py +++ b/src/bibx/collection.py @@ -7,13 +7,15 @@ import networkx as nx -from bibx._entities.article import Article +from .article import Article logger = logging.getLogger(__name__) @dataclass class Collection: + """A collection of scientific articles.""" + articles: list[Article] def merge(self, other: "Collection") -> "Collection": @@ -81,6 +83,7 @@ def deduplicate_articles( cls, articles: list[Article], ) -> list[Article]: + """Deduplicate a list of articles.""" article_by_id = cls._uniqe_articles_by_id(articles) unique_articles: list[Article] = [] @@ -108,6 +111,7 @@ def deduplicate_articles( @property def citation_pairs(self) -> Iterable[tuple[Article, Article]]: + """Return a generator with all citation pairs.""" for article in self.articles: if not article.references: continue diff --git a/tests/entities/collection_builders/test_scopus_bib.py b/tests/builders/test_scopus_bib.py similarity index 100% rename from tests/entities/collection_builders/test_scopus_bib.py rename to tests/builders/test_scopus_bib.py diff --git a/tests/entities/collection_builders/test_scopus_ris.py b/tests/builders/test_scopus_ris.py similarity index 100% rename from tests/entities/collection_builders/test_scopus_ris.py rename to tests/builders/test_scopus_ris.py diff --git a/tests/entities/collection_builders/test_wos.py b/tests/builders/test_wos.py similarity index 100% rename from tests/entities/collection_builders/test_wos.py rename to tests/builders/test_wos.py diff --git a/tests/entities/test_collection.py b/tests/test_collection.py similarity index 97% rename from tests/entities/test_collection.py rename to tests/test_collection.py index d65a083..de9bcc5 100644 --- a/tests/entities/test_collection.py +++ b/tests/test_collection.py @@ -1,5 +1,5 @@ -from bibx._entities.article import Article -from bibx._entities.collection import Collection +from bibx.article import Article +from bibx.collection import Collection articles = [ Article(