Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions adsrefpipe/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,9 @@ def update_resolved_reference_records(self, session: object, resolved_list: List
"score": r.score,
"reference_raw": r.reference_raw,
"external_identifier": _ensure_list(getattr(r, "external_identifier", None)) or [],
"scix_id": getattr(r, "scix_id", None),
"publication_year": getattr(r, "publication_year", None),
"refereed_status": getattr(r, "refereed_status", None),
})

session.bulk_update_mappings(ResolvedReference, mappings)
Expand Down Expand Up @@ -474,7 +477,9 @@ def populate_resolved_reference_records_pre_resolved(self, references: List, his
scix_id = '0000',
score=-1,
reference_raw=ref.get('refraw', None),
external_identifier=_ensure_list(ref.get('external_identifier', None)) or [])
external_identifier=_ensure_list(ref.get('external_identifier', None)) or [],
publication_year=ref.get('publication_year', None),
refereed_status=ref.get('refereed_status', None))
resolved_records.append(resolved_record)
# add the id and remove xml_reference that is now in database
ref['id'] = 'H%dI%d' % (history_id, item_num)
Expand Down Expand Up @@ -578,7 +583,9 @@ def populate_tables_post_resolved(self, resolved_reference: List, source_bibcode
scix_id=ref.get('scix_id',None),
score=ref.get('score', None),
reference_raw=ref.get('refstring', None),
external_identifier=_ensure_list(ref.get('external_identifier', None)) or [])
external_identifier=_ensure_list(ref.get('external_identifier', None)) or [],
publication_year=ref.get('publication_year', None),
refereed_status=ref.get('refereed_status', None))
resolved_records.append(resolved_record)
if resolved_classic:
compare_record = CompareClassic(history_id=history_id,
Expand Down
14 changes: 11 additions & 3 deletions adsrefpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,11 @@ class ResolvedReference(Base):
reference_raw = Column(String)
external_identifier = Column(ARRAY(String))
scix_id = Column(String)
publication_year = Column(Integer)
refereed_status = Column(Integer)

def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode: str, score: float, reference_raw: str, external_identifier: list = None, scix_id: str = None):
def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode: str, score: float, reference_raw: str,
external_identifier: list = None, scix_id: str = None, publication_year: int = None, refereed_status: int = None):
"""
initializes a resolved reference object

Expand All @@ -228,6 +231,8 @@ def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode:
:param score: confidence score of the resolved reference
:param reference_raw: raw reference string
:param external_identifier: list of external identifiers associated with the reference, e.g. ["doi:...", "arxiv:...", "ascl:..."]
:param publication_year: publication year
:param refereed_status: refereed status flag (0 or 1)
"""
self.history_id = history_id
self.item_num = item_num
Expand All @@ -237,6 +242,8 @@ def __init__(self, history_id: int, item_num: int, reference_str: str, bibcode:
self.reference_raw = reference_raw
self.external_identifier = external_identifier or []
self.scix_id = scix_id
self.publication_year = publication_year
self.refereed_status = refereed_status

def toJSON(self) -> dict:
"""
Expand All @@ -252,7 +259,9 @@ def toJSON(self) -> dict:
'item_num': self.item_num,
**({'reference_raw': self.reference_raw} if self.reference_raw else {}),
'external_identifier': self.external_identifier,
**({'scix_id': self.scix_id} if self.scix_id else {})
**({'scix_id': self.scix_id} if self.scix_id else {}),
**({'publication_year': self.publication_year} if self.publication_year is not None else {}),
**({'refereed_status': self.refereed_status} if self.refereed_status is not None else {}),
}


Expand Down Expand Up @@ -299,4 +308,3 @@ def toJSON(self) -> dict:
'score': self.score,
'state': self.state,
}

72 changes: 71 additions & 1 deletion adsrefpipe/tests/unittests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,28 @@ def _get_scix_id(rec):
return getattr(rec, "scix_id", None)


def _get_publication_year(rec):
"""
Works whether rec is a dict (bulk mappings) or an ORM object.
"""
if rec is None:
return None
if isinstance(rec, dict):
return rec.get("publication_year")
return getattr(rec, "publication_year", None)


def _get_refereed_status(rec):
"""
Works whether rec is a dict (bulk mappings) or an ORM object.
"""
if rec is None:
return None
if isinstance(rec, dict):
return rec.get("refereed_status")
return getattr(rec, "refereed_status", None)


def _make_session_scope_cm(session):
"""
Return a context manager mock that behaves like app.session_scope()
Expand Down Expand Up @@ -717,6 +739,8 @@ def test_populate_tables_post_resolved_with_classic(self):
'score': 1.0,
'external_identifier': ['doi:10.1234/abc', 'arxiv:2301.00001'],
'scix_id': 'scix:ABCD-1234-ref1',
'publication_year': 2023,
'refereed_status': 1,
},
{
'id': 'H1I2',
Expand All @@ -725,6 +749,8 @@ def test_populate_tables_post_resolved_with_classic(self):
'score': 0.8,
'external_identifier': ['ascl:2301.001', 'doi:10.9999/xyz'],
'scix_id': 'scix:ABCD-1234-ref2',
'publication_year': 2021,
'refereed_status': 0,
}
]

Expand Down Expand Up @@ -756,6 +782,10 @@ def test_populate_tables_post_resolved_with_classic(self):

self.assertEqual(_get_scix_id(resolved_records[0]), 'scix:ABCD-1234-ref1')
self.assertEqual(_get_scix_id(resolved_records[1]), 'scix:ABCD-1234-ref2')
self.assertEqual(_get_publication_year(resolved_records[0]), 2023)
self.assertEqual(_get_publication_year(resolved_records[1]), 2021)
self.assertEqual(_get_refereed_status(resolved_records[0]), 1)
self.assertEqual(_get_refereed_status(resolved_records[1]), 0)

@patch("adsrefpipe.app.ProcessedHistory")
@patch("adsrefpipe.app.ResolvedReference")
Expand Down Expand Up @@ -1058,13 +1088,17 @@ def test_resolved_reference_toJSON_includes_scix_id(self):
reference_raw="Some ref raw",
external_identifier=["doi:10.1234/xyz"],
scix_id="scix:ABCD-1234-0004",
publication_year=2020,
refereed_status=1,
)
got = rr.toJSON()
self.assertEqual(got["history_id"], 123)
self.assertEqual(got["item_num"], 1)
self.assertEqual(got["bibcode"], "2020A&A...000A...1X")
self.assertEqual(got["external_identifier"], ["doi:10.1234/xyz"])
self.assertEqual(got["scix_id"], "scix:ABCD-1234-0004")
self.assertEqual(got["publication_year"], 2020)
self.assertEqual(got["refereed_status"], 1)

def test_resolved_reference_toJSON_omits_scix_id_when_none(self):
"""Test ResolvedReference.toJSON omits scix_id when not set"""
Expand All @@ -1077,9 +1111,13 @@ def test_resolved_reference_toJSON_omits_scix_id_when_none(self):
reference_raw="Some ref raw",
external_identifier=["doi:10.1234/xyz"],
scix_id=None,
publication_year=None,
refereed_status=0,
)
got = rr.toJSON()
self.assertTrue("scix_id" not in got)
self.assertTrue("publication_year" not in got)
self.assertEqual(got["refereed_status"], 0)


class TestDatabaseNoStubdata(unittest.TestCase):
Expand Down Expand Up @@ -1126,6 +1164,31 @@ def test_app(self):
assert self.app._config.get('SQLALCHEMY_URL') == 'postgresql://mock/mock'
assert self.app.conf.get('SQLALCHEMY_URL') == 'postgresql://mock/mock'

def test_update_resolved_reference_records_includes_new_columns(self):
"""Verify bulk update payload includes publication_year and refereed_status."""
rr = ResolvedReference(
history_id=1,
item_num=2,
reference_str="Some reference",
bibcode="2023A&A...657A...1X",
score=1.0,
reference_raw="Some reference",
external_identifier=["doi:10.1234/example"],
scix_id="scix:ABCD-1234-9999",
publication_year=2023,
refereed_status=1,
)

result = self.app.update_resolved_reference_records(self.mock_session, [rr])
self.assertTrue(result)

self.mock_session.bulk_update_mappings.assert_called_once()
called_model, called_mappings = self.mock_session.bulk_update_mappings.call_args[0]
self.assertIs(called_model, ResolvedReference)
self.assertEqual(len(called_mappings), 1)
self.assertEqual(called_mappings[0]["publication_year"], 2023)
self.assertEqual(called_mappings[0]["refereed_status"], 1)

def test_query_reference_tbl_when_empty(self):
""" verify reference_source table being empty """
self.app.diagnostic_query = MagicMock(return_value=[])
Expand Down Expand Up @@ -1164,6 +1227,8 @@ def test_populate_tables(self):
"id": "H1I1",
"external_identifier": ["arxiv:1009.5514", "doi:10.1234/abc"],
"scix_id": "scix:ABCD-1234-0005",
"publication_year": 2011,
"refereed_status": 1,
},
{
"score": "1.0",
Expand All @@ -1173,6 +1238,8 @@ def test_populate_tables(self):
"id": "H1I2",
"external_identifier": ["arxiv:1709.02923", "ascl:2301.001"],
"scix_id": "scix:ABCD-1234-0006",
"publication_year": 2017,
"refereed_status": 0,
}
]

Expand Down Expand Up @@ -1221,6 +1288,10 @@ def test_populate_tables(self):
self.assertEqual(got[1]["external_identifier"], ["arxiv:1709.02923", "ascl:2301.001"])
self.assertEqual(got[0]["scix_id"], "scix:ABCD-1234-0005")
self.assertEqual(got[1]["scix_id"], "scix:ABCD-1234-0006")
self.assertEqual(got[0]["publication_year"], 2011)
self.assertEqual(got[1]["publication_year"], 2017)
self.assertEqual(got[0]["refereed_status"], 1)
self.assertEqual(got[1]["refereed_status"], 0)

def test_get_parser_error(self):
""" test get_parser when it errors for unrecognized source filename """
Expand All @@ -1242,4 +1313,3 @@ def _fake_get_parser(path):

if __name__ == '__main__':
unittest.main()

39 changes: 39 additions & 0 deletions alembic/versions/835999dfb9e3_add_scix_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""add scix_id

Revision ID: 835999dfb9e3
Revises: 08ca70bd6f5f
Create Date: 2026-02-11 12:45:45.441650

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = '835999dfb9e3'
down_revision = '08ca70bd6f5f'
branch_labels = None
depends_on = None


def upgrade():
bind = op.get_bind()
inspector = sa.inspect(bind)
if not inspector.has_table("resolved_reference"):
raise RuntimeError(
"Migration 835999dfb9e3 requires table `resolved_reference`, "
"but it does not exist. Database schema and alembic_version are out of sync."
)
columns = {c["name"] for c in inspector.get_columns("resolved_reference")}
if "scix_id" not in columns:
op.add_column("resolved_reference", sa.Column("scix_id", sa.String(), nullable=True))


def downgrade():
bind = op.get_bind()
inspector = sa.inspect(bind)
if not inspector.has_table("resolved_reference"):
return
columns = {c["name"] for c in inspector.get_columns("resolved_reference")}
if "scix_id" in columns:
op.drop_column("resolved_reference", "scix_id")
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""add publication_year and refereed_status

Revision ID: 9a4b1e8b6c7d
Revises: 835999dfb9e3
Create Date: 2026-03-11 00:00:00.000000

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "9a4b1e8b6c7d"
down_revision = "835999dfb9e3"
branch_labels = None
depends_on = None


def upgrade():
bind = op.get_bind()
inspector = sa.inspect(bind)
if not inspector.has_table("resolved_reference"):
raise RuntimeError(
"Migration 9a4b1e8b6c7d requires table `resolved_reference`, "
"but it does not exist. Database schema and alembic_version are out of sync."
)

columns = {c["name"] for c in inspector.get_columns("resolved_reference")}
if "publication_year" not in columns:
op.add_column("resolved_reference", sa.Column("publication_year", sa.Integer(), nullable=True))
if "refereed_status" not in columns:
op.add_column("resolved_reference", sa.Column("refereed_status", sa.Integer(), nullable=True))


def downgrade():
bind = op.get_bind()
inspector = sa.inspect(bind)
if not inspector.has_table("resolved_reference"):
return

columns = {c["name"] for c in inspector.get_columns("resolved_reference")}
if "refereed_status" in columns:
op.drop_column("resolved_reference", "refereed_status")
if "publication_year" in columns:
op.drop_column("resolved_reference", "publication_year")
Loading