Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions src/bibx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,21 @@
import logging
from typing import TextIO

from bibx._entities.article import Article
from bibx._entities.collection import Collection
from bibx._entities.collection_builders.openalex import OpenAlexCollectionBuilder
from bibx._entities.collection_builders.scopus_bib import ScopusBibCollectionBuilder
from bibx._entities.collection_builders.scopus_ris import ScopusRisCollectionBuilder
from bibx._entities.collection_builders.wos import WosCollectionBuilder
from bibx.algorithms.sap import Sap
from bibx.article import Article
from bibx.builders.openalex import HandleReferences, OpenAlexCollectionBuilder
from bibx.builders.scopus_bib import ScopusBibCollectionBuilder
from bibx.builders.scopus_ris import ScopusRisCollectionBuilder
from bibx.builders.wos import WosCollectionBuilder
from bibx.collection import Collection
from bibx.exceptions import BibXError

logger = logging.getLogger(__name__)

__all__ = [
"Article",
"Collection",
"HandleReferences",
"Sap",
"query_openalex",
"read_any",
Expand All @@ -25,12 +26,16 @@
"read_wos",
]

__version__ = "0.3.1"
__version__ = "0.4.0"


def query_openalex(query: str, limit: int = 600) -> Collection:
def query_openalex(
query: str,
limit: int = 600,
references: HandleReferences = HandleReferences.BASIC,
) -> Collection:
"""Query OpenAlex and return a collection."""
return OpenAlexCollectionBuilder(query, limit).build()
return OpenAlexCollectionBuilder(query, limit, references=references).build()


def read_scopus_bib(*files: TextIO) -> Collection:
Expand Down
Empty file removed src/bibx/_entities/__init__.py
Empty file.
Empty file.
7 changes: 0 additions & 7 deletions src/bibx/_entities/collection_builders/base.py

This file was deleted.

15 changes: 0 additions & 15 deletions src/bibx/_entities/collection_builders/cross_ref.py

This file was deleted.

10 changes: 0 additions & 10 deletions src/bibx/_entities/collection_builders/generic.py

This file was deleted.

2 changes: 1 addition & 1 deletion src/bibx/algorithms/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from xlsxwriter import Workbook
from xlsxwriter.worksheet import Worksheet

from bibx import Collection
from bibx.collection import Collection

from .sap import BRANCH, LEAF, ROOT, TRUNK, Sap

Expand Down
3 changes: 2 additions & 1 deletion src/bibx/algorithms/sap.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import networkx as nx
from networkx.algorithms.community.louvain import louvain_communities

from bibx import Article, Collection
from bibx.article import Article
from bibx.collection import Collection

YEAR = "year"
LEAF = "leaf"
Expand Down
10 changes: 10 additions & 0 deletions src/bibx/_entities/article.py → src/bibx/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ def _keep_longest(a: str, b: str) -> str:

@dataclass
class Article:
"""A scientific article."""

label: str
ids: set[str]
authors: list[str] = field(default_factory=list)
Expand Down Expand Up @@ -55,10 +57,12 @@ def merge(self, other: "Article") -> "Article":

@property
def key(self) -> str:
"""Return the first ID of the article."""
return next(iter(sorted(self.ids)))

@property
def simple_label(self) -> Optional[str]:
"""Return a simple label for the article."""
pieces = {
"AU": self.authors[0].replace(",", "") if self.authors else None,
"PY": str(self.year) if self.year else None,
Expand All @@ -73,6 +77,7 @@ def simple_label(self) -> Optional[str]:

@property
def permalink(self) -> Optional[str]:
"""Return the permalink of the article."""
if self._permalink is not None:
return self._permalink
if self.doi is not None:
Expand All @@ -81,21 +86,25 @@ def permalink(self) -> Optional[str]:

@property
def simple_id(self) -> Optional[str]:
"""Return a simple ID for the article."""
if self.authors and self.year is not None:
author = self.authors[0].split(" ")[0].replace(",", "")
return f"{author}{self.year}".lower()
return None

def __repr__(self) -> str:
"""Return a string representation of the article."""
return f"Article(ids={self.ids!r}, authors={self.authors!r})"

def add_simple_id(self) -> "Article":
"""Add a simple ID to the article."""
if self.simple_id is None:
return self
self.ids.add(f"simple:{self.simple_id}")
return self

def set_simple_label(self) -> "Article":
"""Set the simple label as the label of the article."""
if self.simple_label is None:
return self
self.label = self.simple_label
Expand All @@ -104,6 +113,7 @@ def set_simple_label(self) -> "Article":
def info(
self,
) -> dict[str, Union[str, int, list[str], None]]:
"""Return a dictionary with the information of the article."""
return {
"permalink": self.permalink,
"label": self.label,
Expand Down
1 change: 1 addition & 0 deletions src/bibx/builders/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Builders for diverse Collection types."""
11 changes: 11 additions & 0 deletions src/bibx/builders/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from typing import Protocol

from bibx.collection import Collection


class CollectionBuilder(Protocol):
"""Protocol for classes that build collections of articles."""

def build(self) -> Collection:
"""Build a collection of articles."""
...
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,25 @@
from typing import Optional
from urllib.parse import urlparse

from bibx._entities.article import Article
from bibx._entities.collection import Collection
from bibx.article import Article
from bibx.clients.openalex import OpenAlexClient, Work
from bibx.collection import Collection

from .base import CollectionBuilder

logger = logging.getLogger(__name__)


class HandleReferences(Enum):
"""How to handle references when building an openalex collection."""

BASIC = "basic"
FULL = "full"


class OpenAlexCollectionBuilder(CollectionBuilder):
"""Builder for collections of articles from the OpenAlex API."""

def __init__(
self,
query: str,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,23 @@

import bibtexparser

from bibx._entities.article import Article
from bibx._entities.collection import Collection
from bibx._entities.collection_builders.base import CollectionBuilder
from bibx.article import Article
from bibx.collection import Collection
from bibx.exceptions import MissingCriticalInformationError

from .base import CollectionBuilder


class ScopusBibCollectionBuilder(CollectionBuilder):
"""Builder for collections of articles from Scopus BibTeX files."""

def __init__(self, *scopus_files: TextIO) -> None:
self._files = scopus_files
for file in self._files:
file.seek(0)

def build(self) -> Collection:
"""Build a collection of articles from Scopus BibTeX files."""
articles = self._get_articles_from_files()
return Collection(Collection.deduplicate_articles(list(articles)))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
from collections.abc import Iterable
from typing import Optional, TextIO

from bibx._entities.article import Article
from bibx._entities.collection import Collection
from bibx._entities.collection_builders.base import CollectionBuilder
from bibx.article import Article
from bibx.collection import Collection
from bibx.exceptions import InvalidScopusFileError, MissingCriticalInformationError

from .base import CollectionBuilder

logger = logging.getLogger(__name__)

_RIS_PATTERN = re.compile(r"^(((?P<key>[A-Z0-9]{2}))[ ]{2}-[ ]{1})?(?P<value>(.*))$")
Expand Down Expand Up @@ -37,12 +38,15 @@ def _joined(raw: Optional[list[str]]) -> Optional[str]:


class ScopusRisCollectionBuilder(CollectionBuilder):
"""Builder for collections of articles from Scopus RIS files."""

def __init__(self, *ris_files: TextIO) -> None:
self._files = ris_files
for file in self._files:
file.seek(0)

def build(self) -> Collection:
"""Build a collection of articles from Scopus RIS files."""
articles = self._get_articles_from_files()
return Collection(Collection.deduplicate_articles(list(articles)))

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from bibx._entities.article import Article
from bibx._entities.collection import Collection
from bibx._entities.collection_builders.base import CollectionBuilder
from bibx.article import Article
from bibx.collection import Collection

from .base import CollectionBuilder


class SimpleCollectionBuilder(CollectionBuilder):
"""Builder for collections of articles from a list of articles."""

def __init__(self, articles: list[Article]) -> None:
self.articles = articles

def build(self) -> Collection:
"""Build a collection of articles from a list of articles."""
return Collection(Collection.deduplicate_articles(self.articles))
Loading