Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@ jobs:
uses: astral-sh/setup-uv@v2
with:
enable-cache: true
version: "0.5.7"
version: "0.5.26"

- name: Install dependencies
run: |
uv venv .venv
uv pip install ".[dev]"
uv sync

- name: Make sure we didn't forget anything in pre-commit
run: |
Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,10 @@ jobs:
uses: astral-sh/setup-uv@v2
with:
enable-cache: true
version: "0.5.7"
version: "0.5.26"

- name: Run tests for ${{ matrix.python-version }}
run: |
uv venv .venv
uv pip install ".[dev]"
uv sync
uv run pre-commit run --all
uv run pytest
7 changes: 4 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ repos:
hooks:
# Run the linter.
- id: ruff
args: [ --fix ]
args: [--fix]
# Run the formatter.
- id: ruff-format
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.13.0'
rev: "v1.13.0"
hooks:
- id: mypy
- id: mypy
additional_dependencies: ["types-requests>=2.32.0.20241016"]
93 changes: 81 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,108 @@
name = "bibx"
description = "Python bibliometric tools."
authors = [
{name = "Core of Science Team", email = "technology@coreofscience.org"},
{ name = "Core of Science Team", email = "technology@coreofscience.org" },
]
license = "MIT"
readme = "README.md"
keywords = [
"bibliometrics",
"science",
"text mining",
]
keywords = ["bibliometrics", "science", "text mining"]
dynamic = ["version"]
dependencies = [
"bibtexparser~=1.4.0",
"networkx~=3.0",
"pydantic~=2.10.6",
"requests~=2.32.3",
"typer[all]~=0.9.0",
"xlsxwriter~=3.2.0",
]
requires-python = ">=3.9"
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering :: Information Analysis",
"Topic :: Text Processing",
"Typing :: Typed",
]


[project.optional-dependencies]
[dependency-groups]
dev = [
"pytest~=7.2.0",
"pytest~=8.3.4",
"pre-commit~=2.20.0",
"ruff~=0.3.3",
"ruff~=0.8.2",
"mypy~=1.9.0",
"types-requests>=2.32.0.20241016",
"ipython>=8.18.1",
]

[project.scripts]
bibx = "bibx.cli:app"

[tool.ruff.lint]
select = ["I", "E", "F", "UP", "W"]
ignore = ["E501"]
select = [
"F", # Pyflakes
"W",
"E", # pycodestyle
"C90", # mccabe
"I", # isort
"D", # pydocstyle
"UP", # pyupgrade
"N", # pep8-naming
"YTT", # flake8-2020
"ANN", # flake8-annotations
"S", # flake8-bandit
"BLE", # flake8-blind-except
"FBT", # flake8-boolean-trap
"B", # flake8-bugbear
"A", # flake8-builtins
"C4", # flake8-comprehensions
"T10", # flake8-debugger
"EM", # flake8-errmsg
"ISC", # flake8-implicit-str-concat
"ICN", # flake8-import-conventions
"G", # flake8-logging-format
"T20", # flake8-print
"Q", # flake8-quotes
"RET", # flake8-return
"SIM", # flake8-simplify
"TID", # flake8-tidy-imports
"TID", # flake8-tidy-imports
"DTZ", # flake8-datetimez
"ARG", # flake8-unused-arguments
"PGH", # pygrep-hooks
"PLC",
"PLE",
"PLR",
"PLW", # Pylint
"RUF", # Ruff-specific rules
]
ignore = [
"A002",
"B008",
"D100",
"D106",
"D107",
"D203",
"D213",
"D406",
"D407",
"DTZ003",
"FBT001",
"FBT003",
"ISC001",
"N815",
"PGH003",
"S101",
"T201",
]

[tool.mypy]
mypy_path = "./stubs/"
Expand Down
33 changes: 19 additions & 14 deletions src/bibx/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
"""BibX is a library to work with bibliographic data."""

import logging
from typing import TextIO

from bibx._entities.article import Article
from bibx._entities.collection import Collection
from bibx._entities.collection_builders.openalex import OpenAlexCollectionBuilder
from bibx._entities.collection_builders.scopus_bib import ScopusBibCollectionBuilder
from bibx._entities.collection_builders.scopus_ris import ScopusRisCollectionBuilder
from bibx._entities.collection_builders.wos import WosCollectionBuilder
Expand All @@ -15,18 +18,23 @@
"Article",
"Collection",
"Sap",
"query_openalex",
"read_any",
"read_scopus_bib",
"read_scopus_ris",
"read_wos",
"read_any",
]

__version__ = "0.2.0"
__version__ = "0.3.0"


def query_openalex(query: str, limit: int = 600) -> Collection:
"""Query OpenAlex and return a collection."""
return OpenAlexCollectionBuilder(query, limit).build()


def read_scopus_bib(*files: TextIO) -> Collection:
"""
Takes any number of bibtex files from scopus and generates a collection.
"""Take any number of bibtex files from scopus and generates a collection.

:param files: Scopus bib files open.
:return: the collection
Expand All @@ -35,8 +43,7 @@ def read_scopus_bib(*files: TextIO) -> Collection:


def read_scopus_ris(*files: TextIO) -> Collection:
"""
Takes any number of ris files from scopus and generates a collection.
"""Take any number of ris files from scopus and generates a collection.

:param files: Scopus bib files open.
:return: the collection
Expand All @@ -45,8 +52,7 @@ def read_scopus_ris(*files: TextIO) -> Collection:


def read_wos(*files: TextIO) -> Collection:
"""
Takes any number of wos text files and returns a collection.
"""Take any number of wos text files and returns a collection.

:param files: WoS files open.
:return: the collection
Expand All @@ -55,16 +61,15 @@ def read_wos(*files: TextIO) -> Collection:


def read_any(file: TextIO) -> Collection:
"""
Tries to read a file with the supported formats.
"""
"""Try to read a file with the supported formats."""
for handler in (read_wos, read_scopus_ris, read_scopus_bib):
try:
return handler(file)
except BibXError as e:
logger.debug(f"Error: {e}")
logger.debug("Error: %s", e)
except ValueError:
logger.debug(
f"Error: the {handler.__name__} function does not support this file"
"Error: the %s function does not support this file", handler.__name__
)
raise ValueError("Unsupported file type")
message = "Unsupported file type"
raise ValueError(message)
84 changes: 75 additions & 9 deletions src/bibx/_entities/article.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
from collections.abc import Mapping
from dataclasses import dataclass, field
from typing import Optional
from typing import Optional, TypeVar, Union

T = TypeVar("T")


def _keep(a: T, b: T) -> T:
return a if a is not None else b


@dataclass
class Article:
ids: set[str]
authors: list[str] = field(default_factory=list)
year: Optional[int] = None
title: Optional[str] = None
Expand All @@ -14,23 +21,40 @@ class Article:
page: Optional[str] = None
doi: Optional[str] = None
_label: Optional[str] = None
_permalink: Optional[str] = None
times_cited: Optional[int] = None
references: list["Article"] = field(default_factory=list)
keywords: list[str] = field(default_factory=list)
sources: set[str] = field(default_factory=set)
extra: Mapping = field(default_factory=dict)

def merge(self, other: "Article") -> "Article":
"""Merge two articles into a new one."""
return Article(
ids=self.ids.union(other.ids),
authors=self.authors if self.authors else other.authors,
year=_keep(self.year, other.year),
title=_keep(self.title, other.title),
journal=_keep(self.journal, other.journal),
volume=_keep(self.volume, other.volume),
issue=_keep(self.issue, other.issue),
page=_keep(self.page, other.page),
doi=_keep(self.doi, other.doi),
_label=_keep(self._label, other._label),
_permalink=_keep(self._permalink, other._permalink),
times_cited=_keep(self.times_cited, other.times_cited),
references=self.references or other.references,
keywords=self.keywords or other.keywords,
sources=self.sources.union(other.sources),
extra={**self.extra, **other.extra},
)

@property
def key(self):
if self.authors:
author = self.authors[0].split(" ")[0].replace(",", "")
else:
author = "anonymous"
year = self.year
return f"{author}{year}".lower()
def key(self) -> str:
return next(iter(sorted(self.ids)))

@property
def label(self):
def label(self) -> str:
if self._label is not None:
return self._label
pieces = {
Expand All @@ -42,3 +66,45 @@ def label(self):
"DI": f"DOI {self.doi}" if self.doi else None,
}
return ", ".join(value for value in pieces.values() if value)

@property
def permalink(self) -> Optional[str]:
if self._permalink is not None:
return self._permalink
if self.doi is not None:
return f"https://doi.org/{self.doi}"
return None

@property
def simple_id(self) -> Optional[str]:
if self.authors and self.year is not None:
author = self.authors[0].split(" ")[0].replace(",", "")
return f"{author}{self.year}".lower()
return None

def __repr__(self) -> str:
return f"Article(ids={self.ids!r}, authors={self.authors!r})"

def add_simple_id(self) -> None:
if self.simple_id is None:
return
self.ids.add(f"simple:{self.simple_id}")

def info(
self,
) -> dict[str, Union[str, int, list[str], None]]:
return {
"permalink": self.permalink,
"label": self.label,
"authors": self.authors,
"year": self.year,
"title": self.title,
"journal": self.journal,
"volume": self.volume,
"issue": self.issue,
"page": self.page,
"doi": self.doi,
"times_cited": self.times_cited,
"keywords": self.keywords,
"sources": list(self.sources),
}
Loading