Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/bibx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"read_wos",
]

__version__ = "0.3.0"
__version__ = "0.3.1"


def query_openalex(query: str, limit: int = 600) -> Collection:
Expand Down
27 changes: 19 additions & 8 deletions src/bibx/_entities/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@ def _keep(a: T, b: T) -> T:
return a if a is not None else b


def _keep_longest(a: str, b: str) -> str:
return a if len(a) > len(b) else b


@dataclass
class Article:
label: str
ids: set[str]
authors: list[str] = field(default_factory=list)
year: Optional[int] = None
Expand All @@ -20,7 +25,6 @@ class Article:
issue: Optional[str] = None
page: Optional[str] = None
doi: Optional[str] = None
_label: Optional[str] = None
_permalink: Optional[str] = None
times_cited: Optional[int] = None
references: list["Article"] = field(default_factory=list)
Expand All @@ -31,6 +35,7 @@ class Article:
def merge(self, other: "Article") -> "Article":
"""Merge two articles into a new one."""
return Article(
label=_keep_longest(self.label, other.label),
ids=self.ids.union(other.ids),
authors=self.authors if self.authors else other.authors,
year=_keep(self.year, other.year),
Expand All @@ -40,7 +45,6 @@ def merge(self, other: "Article") -> "Article":
issue=_keep(self.issue, other.issue),
page=_keep(self.page, other.page),
doi=_keep(self.doi, other.doi),
_label=_keep(self._label, other._label),
_permalink=_keep(self._permalink, other._permalink),
times_cited=_keep(self.times_cited, other.times_cited),
references=self.references or other.references,
Expand All @@ -54,17 +58,17 @@ def key(self) -> str:
return next(iter(sorted(self.ids)))

@property
def label(self) -> str:
if self._label is not None:
return self._label
def simple_label(self) -> Optional[str]:
pieces = {
"AU": self.authors[0].replace(",", "") if self.authors else "anonymous",
"AU": self.authors[0].replace(",", "") if self.authors else None,
"PY": str(self.year) if self.year else None,
"J9": str(self.journal) if self.journal else None,
"VL": f"V{self.volume}" if self.volume else None,
"BP": f"P{self.page}" if self.page else None,
"DI": f"DOI {self.doi}" if self.doi else None,
}
if not any(pieces.values()):
return None
return ", ".join(value for value in pieces.values() if value)

@property
Expand All @@ -85,10 +89,17 @@ def simple_id(self) -> Optional[str]:
def __repr__(self) -> str:
return f"Article(ids={self.ids!r}, authors={self.authors!r})"

def add_simple_id(self) -> None:
def add_simple_id(self) -> "Article":
if self.simple_id is None:
return
return self
self.ids.add(f"simple:{self.simple_id}")
return self

def set_simple_label(self) -> "Article":
if self.simple_label is None:
return self
self.label = self.simple_label
return self

def info(
self,
Expand Down
3 changes: 2 additions & 1 deletion src/bibx/_entities/collection_builders/openalex.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def _extract_doi(url: str) -> str:
@classmethod
def _work_to_article(cls, work: Work) -> Article:
article = Article(
label=work.id,
ids={
f"{source}:{id_}"
if source != "doi"
Expand All @@ -93,7 +94,6 @@ def _work_to_article(cls, work: Work) -> Article:
issue=work.biblio.issue,
page=work.biblio.first_page,
doi=cls._extract_doi(work.doi) if work.doi else None,
_label=work.id,
_permalink=work.primary_location and work.primary_location.landing_page_url,
times_cited=work.cited_by_count,
references=[cls._reference_to_article(r) for r in work.referenced_works],
Expand All @@ -108,6 +108,7 @@ def _work_to_article(cls, work: Work) -> Article:
@staticmethod
def _reference_to_article(reference: str) -> Article:
return Article(
label=reference,
ids={f"openalex:{reference}"},
_permalink=reference,
sources={"openalex"},
Expand Down
47 changes: 25 additions & 22 deletions src/bibx/_entities/collection_builders/scopus_bib.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,29 @@ def _article_from_entry(self, entry: dict) -> Article:
doi = entry.get("doi")
if doi is not None:
ids.add(f"doi:{doi}")
article = Article(
ids=ids,
authors=entry["author"].split(" and "),
year=int(entry["year"]),
title=entry.get("title"),
journal=entry.get("journal"),
volume=entry.get("volume"),
issue=entry.get("issue"),
page=entry.get("art_number"),
doi=entry.get("doi"),
references=list(self._articles_from_references(entry.get("references"))),
keywords=entry.get("keywords", "").split("; "),
extra=entry,
sources={json.dumps(entry)},
times_cited=times_cited,
return (
Article(
label=doi or entry.get("title", "replaceme"),
ids=ids,
authors=entry["author"].split(" and "),
year=int(entry["year"]),
title=entry.get("title"),
journal=entry.get("journal"),
volume=entry.get("volume"),
issue=entry.get("issue"),
page=entry.get("art_number"),
doi=entry.get("doi"),
references=list(
self._articles_from_references(entry.get("references"))
),
keywords=entry.get("keywords", "").split("; "),
extra=entry,
sources={json.dumps(entry)},
times_cited=times_cited,
)
.add_simple_id()
.set_simple_label()
)
article.add_simple_id()
return article

def _articles_from_references(self, references: Optional[str]) -> Iterable[Article]:
if references is None:
Expand All @@ -76,13 +81,11 @@ def _article_from_reference(reference: str) -> Article:
author = reference.split(",", maxsplit=2)[0].strip()
match = re.search(r"(10.\d{4,9}/[-._;()/:A-Z0-9]+)", reference)
doi = match.groups()[0] if match else None
article = Article(
return Article(
label=reference,
ids=set() if doi is None else {f"doi:{doi}"},
authors=[author],
year=year,
_label=reference,
doi=doi,
sources={reference},
)
article.add_simple_id()
return article
).add_simple_id()
48 changes: 25 additions & 23 deletions src/bibx/_entities/collection_builders/scopus_ris.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ def _article_form_reference(cls, scopusref: str) -> Article:
doi, _ = cls._find_doi(scopusref)
if not authors or not year:
raise MissingCriticalInformationError()
article = Article(
return Article(
label=scopusref,
ids=set() if doi is None else {f"doi:{doi}"},
authors=[f"{first_name} {last_name.replace(' ', '').replace('.', '')}"],
year=int(year),
Expand All @@ -114,9 +115,7 @@ def _article_form_reference(cls, scopusref: str) -> Article:
volume=volume_info.get("volume"),
page=volume_info.get("page"),
doi=doi,
)
article.add_simple_id()
return article
).add_simple_id()

@classmethod
def _parse_references(cls, refs: list[str]) -> list[Article]:
Expand All @@ -134,7 +133,6 @@ def _parse_references(cls, refs: list[str]) -> list[Article]:
def _ris_to_dict(record: str) -> dict[str, list[str]]:
parsed = defaultdict(list)
current = None

for line in record.split("\n"):
match = _RIS_PATTERN.match(line)
if not match:
Expand Down Expand Up @@ -163,25 +161,29 @@ def _article_from_record(cls, record: str) -> Article:
authors = data.get("AU", [])
if not authors or not year:
raise MissingCriticalInformationError()
doi = data.get("DO")
article = Article(
ids=set() if doi is None else {f"doi:{doi}"},
title=_joined(data.get("TI")),
authors=authors,
year=year,
journal=_joined(data.get("J2")),
volume=_joined(data.get("VL")),
issue=_joined(data.get("IS")),
page=_joined(data.get("SP")),
doi=_joined(data.get("DO")),
keywords=data.get("KW", []),
references=cls._parse_references(data.get("N1:References", [])),
sources={"scopus"},
extra=data,
times_cited=times_cited,
doi_list = data.get("DO")
doi = doi_list[0] if doi_list else None
return (
Article(
label=doi or "replaceme",
ids=set() if doi is None else {f"doi:{doi}"},
title=_joined(data.get("TI")),
authors=authors,
year=year,
journal=_joined(data.get("J2")),
volume=_joined(data.get("VL")),
issue=_joined(data.get("IS")),
page=_joined(data.get("SP")),
doi=doi,
keywords=data.get("KW", []),
references=cls._parse_references(data.get("N1:References", [])),
sources={"scopus"},
extra=data,
times_cited=times_cited,
)
.add_simple_id()
.set_simple_label()
)
article.add_simple_id()
return article

@classmethod
def _parse_file(cls, file: TextIO) -> Iterable[Article]:
Expand Down
43 changes: 23 additions & 20 deletions src/bibx/_entities/collection_builders/wos.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,26 +297,29 @@ def _parse_article_from_str(cls, article_as_str: str) -> Article:
article_data[field].append(parsed["value"])
processed = cls._parse_all(dict(article_data))
doi = processed.get("DOI")
article = Article(
ids=set() if doi is None else {f"doi:{doi}"},
authors=processed.get("authors", []),
year=processed.get("year"),
title=processed.get("title"),
journal=processed.get("source_abbreviation"),
volume=processed.get("volume"),
issue=processed.get("issue"),
page=processed.get("beginning_page"),
doi=doi,
times_cited=processed.get("times_cited"),
references=list(
cls._get_articles_from_references(processed.get("references"))
),
keywords=processed.get("keywords", []),
extra=processed,
sources={article_as_str},
return (
Article(
label=doi or "replaceme",
ids=set() if doi is None else {f"doi:{doi}"},
authors=processed.get("authors", []),
year=processed.get("year"),
title=processed.get("title"),
journal=processed.get("source_abbreviation"),
volume=processed.get("volume"),
issue=processed.get("issue"),
page=processed.get("beginning_page"),
doi=doi,
times_cited=processed.get("times_cited"),
references=list(
cls._get_articles_from_references(processed.get("references"))
),
keywords=processed.get("keywords", []),
extra=processed,
sources={article_as_str},
)
.add_simple_id()
.set_simple_label()
)
article.add_simple_id()
return article

@classmethod
def _parse_reference_from_str(cls, reference: str) -> Article:
Expand All @@ -327,8 +330,8 @@ def _parse_reference_from_str(cls, reference: str) -> Article:
processed = cls._parse_all(data)
doi = processed.get("DOI")
article = Article(
label=reference,
ids=set() if doi is None else {f"doi:{doi}"},
_label=reference,
title=processed.get("title"),
authors=processed.get("authors", []),
# FIXME: Year is required here
Expand Down
Loading