From 11be5781cc5c91d0c1a53ac57c4f870b51b69aa5 Mon Sep 17 00:00:00 2001 From: ADS Administration Date: Wed, 1 Apr 2026 08:33:38 -0400 Subject: [PATCH 1/3] SciX ID included in output --- config.py | 2 +- referencesrv/resolver/common.py | 7 ++++--- referencesrv/resolver/solrtestdata.py | 4 +++- referencesrv/resolver/solve.py | 3 +-- .../tests/unittests/test_referencesrv_parser.py | 5 ++++- .../tests/unittests/test_referencesrv_resolver.py | 14 +++++++------- referencesrv/views.py | 4 +++- 7 files changed, 23 insertions(+), 16 deletions(-) diff --git a/config.py b/config.py index 024e9c5..4e59f8c 100644 --- a/config.py +++ b/config.py @@ -13,7 +13,7 @@ REFERENCE_SERVICE_MAX_RECORDS_SOLR = 100 REFERENCE_SERVICE_QUERY_FIELDS_SOLR = "author,[fields author=10]author_norm,[fields author_norm=10],first_author_norm," \ - "year,title,pub,pub_raw,aff_raw,[fields aff_raw=1]," \ + "year,title,pub,pub_raw,aff_raw,[fields aff_raw=1],scix_id," \ "volume,issue,page,page_range,bibstem,bibcode,identifier,doi,doctype" # maximum references that can be resolved in one call diff --git a/referencesrv/resolver/common.py b/referencesrv/resolver/common.py index 2a41a21..a844800 100644 --- a/referencesrv/resolver/common.py +++ b/referencesrv/resolver/common.py @@ -264,7 +264,7 @@ class Solution(object): * score * source_hypothesis (the hypothesis that eventually got it right) """ - def __init__(self, cited_bibcode, score, source_hypothesis='not given', citing_bibcode=None): + def __init__(self, cited_bibcode, score, source_hypothesis='not given', citing_bibcode=None, scix_id=None): """ :param cited_bibcode: @@ -276,6 +276,7 @@ def __init__(self, cited_bibcode, score, source_hypothesis='not given', citing_b self.score = score self.citing_bibcode = str(citing_bibcode) self.source_hypothesis = source_hypothesis + self.scix_id = scix_id def __str__(self): """ @@ -283,7 +284,7 @@ def __str__(self): :return: """ if isinstance(self.score, Evidences): - return '%.1f %s'%(self.score.avg(),self.cited_bibcode) + return '%.1f bibcode:%s scixid:%s'%(self.score.avg(),self.cited_bibcode, self.scix_id) raise NoSolution("NotResolved") def __repr__(self): @@ -459,4 +460,4 @@ def predicate(x): non_numbers = filterfalse(predicate, t2) sorted_numbers = sorted(numbers) sorted_non_numbers = sorted(non_numbers, key=str) - return sorted_numbers + sorted_non_numbers \ No newline at end of file + return sorted_numbers + sorted_non_numbers diff --git a/referencesrv/resolver/solrtestdata.py b/referencesrv/resolver/solrtestdata.py index f34ac7e..889d365 100644 --- a/referencesrv/resolver/solrtestdata.py +++ b/referencesrv/resolver/solrtestdata.py @@ -20,6 +20,7 @@ def get_test_data(): u'numFound': 2, u'docs': [ {u'bibcode': u'2019AAS...23320704A', + u'scix_id': u'scix:6ANE-YQXJ-KRH0', u'author': [u'Accomazzi, Alberto'], u'title': [u'The NASA Astrophysics Data System\xe2\u20ac\u2122s Decadal Plan for the 2020s'], u'doctype': u'abstract', @@ -32,6 +33,7 @@ def get_test_data(): u'identifier': [u'2019AAS...23320704A'], u'page': [u'207.04']}, {u'bibcode': u'2019AAS...23338108A', + u'scix_id': u'scix:AGA3-9D3P-Y7EF', u'author': [u'Accomazzi, Alberto', u'Kurtz, Michael J.', u'Henneken, Edwin', u'Grant, Carolyn S.', u'Thompson, Donna M.', u'Chyla, Roman', u'McDonald, Stephen', u'Blanco-Cuaresma, Sergi', u'Shapurian, Golnaz', u'Hostetler, Timothy', u'Templeton, Matthew', u'Lockhart, Kelly'], u'title': [u'Transitioning from ADS Classic to the new ADS search platform'], u'doctype': u'abstract', @@ -45,4 +47,4 @@ def get_test_data(): u'page': [u'381.08']} ] } - } \ No newline at end of file + } diff --git a/referencesrv/resolver/solve.py b/referencesrv/resolver/solve.py index b52c0bc..00f4f59 100644 --- a/referencesrv/resolver/solve.py +++ b/referencesrv/resolver/solve.py @@ -291,8 +291,7 @@ def solve_for_fields(hypothesis): current_app.logger.debug("score %s %s %s"%(sol['bibcode'], score.get_score(), score)) score, sol = choose_solution(scored, query_string, hypothesis) - - return Solution(sol["bibcode"], score, hypothesis.name) + return Solution(sol["bibcode"], score, hypothesis.name, scix_id=sol["scix_id"]) raise OverflowOrNone("Got either too many or no records from solr") diff --git a/referencesrv/tests/unittests/test_referencesrv_parser.py b/referencesrv/tests/unittests/test_referencesrv_parser.py index 58de7ac..61bd058 100644 --- a/referencesrv/tests/unittests/test_referencesrv_parser.py +++ b/referencesrv/tests/unittests/test_referencesrv_parser.py @@ -756,6 +756,7 @@ def test_01(self): u'year': u'2020', u'page': u'2', u'bibcode': u'2020JHEP...09..002P', + u'scix_id': u'scix:5KGH-MC98-7AYN', u'author': [u'Penington, Geoffrey'], u'issue': u'9', u'aff_raw': u'Stanford Institute for Theoretical Physics, Stanford University, 450 Jane Stanford Way, 94305, Stanford, CA, USA', u'pub': u'Journal of High Energy Physics', @@ -769,7 +770,7 @@ def test_01(self): } }) r = self.client.post(path='/text', data=json.dumps({'reference': ['Penington, G, 2020, JHEP, 9']})) - self.assertEqual(r.data, b"1.0 2020JHEP...09..002P -- Penington, G, 2020, JHEP, 9") + self.assertEqual(r.data, b"1.0 bibcode:2020JHEP...09..002P scixid:scix:5KGH-MC98-7AYN -- Penington, G, 2020, JHEP, 9") def test_02(self): """ test text endpoint when request is to return in json format """ @@ -785,6 +786,7 @@ def test_02(self): u'year': u'2020', u'page': u'2', u'bibcode': u'2020JHEP...09..002P', + u'scix_id': u'scix:5KGH-MC98-7AYN', u'author': [u'Penington, Geoffrey'], u'issue': u'9', u'aff_raw': u'Stanford Institute for Theoretical Physics, Stanford University, 450 Jane Stanford Way, 94305, Stanford, CA, USA', u'pub': u'Journal of High Energy Physics', @@ -802,6 +804,7 @@ def test_02(self): data=json.dumps({'reference': ['Penington, G, 2020, JHEP, 9']}), headers={'accept':'application/json'}) self.assertEqual(json.loads(r.data), {"resolved": [{"refstring": "Penington, G, 2020, JHEP, 9", + "scix_id":"scix:5KGH-MC98-7AYN", "score": "1.0", "bibcode": "2020JHEP...09..002P"}]}) diff --git a/referencesrv/tests/unittests/test_referencesrv_resolver.py b/referencesrv/tests/unittests/test_referencesrv_resolver.py index 0df3e6a..1b2cd17 100755 --- a/referencesrv/tests/unittests/test_referencesrv_resolver.py +++ b/referencesrv/tests/unittests/test_referencesrv_resolver.py @@ -356,8 +356,8 @@ def test_Solution(self): """ e = Evidences() e.add_evidence(1, 'bibcode') - s = Solution(cited_bibcode='2013SPIE.8004.2013Z', score=e) - self.assertEqual(str(s), '1.0 2013SPIE.8004.2013Z') + s = Solution(cited_bibcode='2013SPIE.8004.2013Z', scix_id='foo', score=e) + self.assertEqual(str(s), '1.0 bibcode:2013SPIE.8004.2013Z scixid:foo') self.assertEqual(repr(s), "'2013SPIE.8004.2013Z'") @@ -417,7 +417,7 @@ def test_solve_reference(self): 'volume': '233', 'year': '2019', 'page': '207.04'} - self.assertEqual(str(solve_reference(Hypotheses(ref))), '1.0 2019AAS...23320704A') + self.assertEqual(str(solve_reference(Hypotheses(ref))), '1.0 bibcode:2019AAS...23320704A scixid:scix:6ANE-YQXJ-KRH0') # testing with first author only and page # eventhough other authors are missing but because of page match is found ref = {'authors': 'Accomazzi, A.', @@ -425,14 +425,14 @@ def test_solve_reference(self): 'volume': '233', 'year': '2019', 'page': '381.08'} - self.assertEqual(str(solve_reference(Hypotheses(ref))), '0.8 2019AAS...23338108A') + self.assertEqual(str(solve_reference(Hypotheses(ref))), '0.8 bibcode:2019AAS...23338108A scixid:scix:AGA3-9D3P-Y7EF') # testing with first author only and no page, hence record with only the first author is returned ref = {'authors': 'Accomazzi, A.', 'journal': 'AAS233 Meeting', 'volume': '233', 'year': '2019', 'page': '0'} - self.assertEqual(str(solve_reference(Hypotheses(ref))), '0.8 2019AAS...23320704A') + self.assertEqual(str(solve_reference(Hypotheses(ref))), '0.8 bibcode:2019AAS...23320704A scixid:scix:6ANE-YQXJ-KRH0') # when we have multiple solutions and not enough reference information to decide which # page and author are the deciding factor between these two test records # here first author and page are wrong @@ -454,7 +454,7 @@ def test_solve_reference(self): # however the first record is authored by one author only and # it is the same first author of the second record # verify that the first record is returned - self.assertEqual(str(solve_reference(Hypotheses(ref))), '0.8 2019AAS...23320704A') + self.assertEqual(str(solve_reference(Hypotheses(ref))), '0.8 bibcode:2019AAS...23320704A scixid:scix:6ANE-YQXJ-KRH0') def test_add_volume_evidence(self): @@ -637,7 +637,7 @@ def test_Querier(self): self.assertEqual(solrquery.make_params('author:("Accomazzi, A") AND year:"2019" AND bibstem:(AAS)'), {'q': 'author:("Accomazzi, A") AND year:"2019" AND bibstem:(AAS)', 'rows': '100', - 'fl': u'author,[fields author=10]author_norm,[fields author_norm=10],first_author_norm,year,title,pub,pub_raw,aff_raw,[fields aff_raw=1],volume,issue,page,page_range,bibstem,bibcode,identifier,doi,doctype'}) + 'fl': u'author,[fields author=10]author_norm,[fields author_norm=10],first_author_norm,year,title,pub,pub_raw,aff_raw,[fields aff_raw=1],scix_id,volume,issue,page,page_range,bibstem,bibcode,identifier,doi,doctype'}) # no author_norm solution = {u'bibcode': u'2013JARS....7.3461V', diff --git a/referencesrv/views.py b/referencesrv/views.py index c561c5a..06d9820 100644 --- a/referencesrv/views.py +++ b/referencesrv/views.py @@ -123,7 +123,9 @@ def format_resolved_reference(returned_format, resolved, reference, id, cache=Tr cache_resolved_set(reference, resolved) if 'application/json' in returned_format: resolved = resolved.split() - result = {'refstring': reference, 'score': resolved[0], 'bibcode': resolved[1]} + bibcode = resolved[1].replace('bibcode:','').strip() + scix_id = resolved[2].replace('scixid:','').strip() + result = {'refstring': reference, 'score': resolved[0], 'bibcode': bibcode, 'scix_id':scix_id} if comment: result['comment'] = comment if id: From c994853626c17c96dc5660ae56c2f2c2a74a2ed5 Mon Sep 17 00:00:00 2001 From: ADS Administration Date: Fri, 3 Apr 2026 15:59:58 -0400 Subject: [PATCH 2/3] PR feeback implementation --- referencesrv/resolver/common.py | 2 +- referencesrv/resolver/solve.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/referencesrv/resolver/common.py b/referencesrv/resolver/common.py index a844800..0b32bc2 100644 --- a/referencesrv/resolver/common.py +++ b/referencesrv/resolver/common.py @@ -264,7 +264,7 @@ class Solution(object): * score * source_hypothesis (the hypothesis that eventually got it right) """ - def __init__(self, cited_bibcode, score, source_hypothesis='not given', citing_bibcode=None, scix_id=None): + def __init__(self, cited_bibcode, score, source_hypothesis='not given', cited_bibcode=None, citing_bibcode=None, scix_id=None): """ :param cited_bibcode: diff --git a/referencesrv/resolver/solve.py b/referencesrv/resolver/solve.py index 00f4f59..7e44751 100644 --- a/referencesrv/resolver/solve.py +++ b/referencesrv/resolver/solve.py @@ -288,10 +288,10 @@ def solve_for_fields(hypothesis): current_app.logger.debug("evidences from %s"%(hypothesis.name)) for score, sol in sorted2(scored): - current_app.logger.debug("score %s %s %s"%(sol['bibcode'], score.get_score(), score)) + current_app.logger.debug("score %s %s %s"%(sol.get('bibcode',None), score.get_score(), score)) score, sol = choose_solution(scored, query_string, hypothesis) - return Solution(sol["bibcode"], score, hypothesis.name, scix_id=sol["scix_id"]) + return Solution(sol.get("bibcode",None), score, hypothesis.name, scix_id=sol.get("scix_id",None)) raise OverflowOrNone("Got either too many or no records from solr") From 8347b6a543a41093b2ffa34c5649821a3e3377f4 Mon Sep 17 00:00:00 2001 From: ADS Administration Date: Tue, 7 Apr 2026 12:24:43 -0400 Subject: [PATCH 3/3] More SciX ID integration --- referencesrv/resolver/common.py | 2 +- referencesrv/resolver/solve.py | 33 +++++++++++++++++++++------------ referencesrv/views.py | 4 ++-- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/referencesrv/resolver/common.py b/referencesrv/resolver/common.py index 0b32bc2..a844800 100644 --- a/referencesrv/resolver/common.py +++ b/referencesrv/resolver/common.py @@ -264,7 +264,7 @@ class Solution(object): * score * source_hypothesis (the hypothesis that eventually got it right) """ - def __init__(self, cited_bibcode, score, source_hypothesis='not given', cited_bibcode=None, citing_bibcode=None, scix_id=None): + def __init__(self, cited_bibcode, score, source_hypothesis='not given', citing_bibcode=None, scix_id=None): """ :param cited_bibcode: diff --git a/referencesrv/resolver/solve.py b/referencesrv/resolver/solve.py index 7e44751..4bf66ae 100644 --- a/referencesrv/resolver/solve.py +++ b/referencesrv/resolver/solve.py @@ -157,7 +157,7 @@ def inspect_doubtful_solutions(scored_solutions, query_string, hypothesis): non_veto_solutions = [(evidences, solution) for evidences, solution in scored_solutions if not evidences.has_veto()] if len(non_veto_solutions) == 1: sol = non_veto_solutions - raise Undecidable("Try again if desperate", considered_solutions=[(sol[0][0].get_score(), sol[0][1]["bibcode"])]) + raise Undecidable("Try again if desperate", considered_solutions=[(sol[0][0].get_score(), sol[0][1].get("bibcode",None), sol[0][1].get("scix_id",None))]) # Some of the following rules only make sense for fielded # hypotheses. Always be aware that input_fields might be None @@ -170,7 +170,7 @@ def inspect_doubtful_solutions(scored_solutions, query_string, hypothesis): # we should base this on the result bibstem, I guess. for evidences, solution in scored_solutions: if evidences.single_veto_from("page") and not input_fields.get("page"): - raise Undecidable("Try again if desperate", considered_solutions=[(evidences.get_score(), solution["bibcode"])]) + raise Undecidable("Try again if desperate", considered_solutions=[(evidences.get_score(), solution.get("bibcode",None), solution.get("scix_id",None))]) raise NoSolution(reason="No unique non-vetoed doubtful solution", ref=query_string) @@ -212,7 +212,7 @@ def inspect_ambiguous_solutions(scored_solutions, query_string, hypothesis): current_app.logger.debug("Breaking ambiguity with %s suspecting it's a duplicate book"%non_vetoed[-2][1]["bibcode"]) return non_vetoed[-1] - to_stash = [(score.get_score(), sol["bibcode"]) + to_stash = [(score.get_score(), sol.get("bibcode", None), sol.get("scix_id", None)) for score, sol in non_vetoed if score>current_app.config['EVIDENCE_SCORE_RANGE'][0]] current_app.logger.debug("Unsolved ambiguity, stashing %s"%(to_stash)) raise Undecidable("Ambiguous %s."%(query_string), considered_solutions=to_stash) @@ -234,7 +234,6 @@ def choose_solution(candidates, query_string, hypothesis): """ min_score = current_app.config['MIN_SCORE_FIRST_ROUND'] filtered = [(score, solution) for score, solution in candidates if score >= min_score*len(score)] - if len(filtered)==0: if candidates: current_app.logger.debug("No score above minimal score, inspecting doubtful solutions.") @@ -339,6 +338,9 @@ def solve_reference(ref): try: return solve_for_fields(hypothesis) except Undecidable as ex: + # The list of possible solutions is the list of triples sent back + # when the Undecidable exception is thrown in the solve_for_fields call. + # These are generated in inspect_doubtful_solutions. possible_solutions.extend(ex.considered_solutions) reason = ex.reason except (NoSolution, OverflowOrNone) as ex: @@ -354,18 +356,25 @@ def solve_reference(ref): # all others and accept that if possible_solutions: current_app.logger.debug("Considering stashed ties: %s"%(possible_solutions)) - cands = {} - for score, sol in possible_solutions: - cands.setdefault(sol, []).append((score, sol)) - for bibcode in cands: - cands[bibcode] = max(cands[bibcode]) + scx2bbc = {} + for score, sol, scixid in possible_solutions: + # The entries in the possible_solutions will always have SciX IDs, but not + # necessarily bibcodes. So, the dictionary of candidates will be keyed on + # SciX IDs and a mapping is kept for bibcodes when appropriate. + if sol: + scx2bbc[scixid] = sol + cands.setdefault(scixid, []).append((score, scixid)) + for scix in cands: + cands[scix] = max(cands[scix]) scored = sorted(zip(cands.values(), cands.keys())) - if len(scored)==1: - return Solution(scored[0][1], scored[0][0], "only remaining of tied solutions") + # Determine the bibcode (if any) from the correspondence created earlier + bibcode = scx2bbc.get(scored[0][1], None) + return Solution(bibcode, scored[0][0], "only remaining of tied solutions", scix_id=scored[0][1]) elif scored[-1][0]>scored[-2][0]: - return Solution(scored[0][1], scored[0][0], "best tied solution") + bibcode = scx2bbc.get(scored[0][1], None) + return Solution(bibcode, scored[0][0], "best tied solution", scix_id=scored[0][1]) else: current_app.logger.debug("Remaining ties, giving up") if reason: diff --git a/referencesrv/views.py b/referencesrv/views.py index 06d9820..22853be 100644 --- a/referencesrv/views.py +++ b/referencesrv/views.py @@ -167,7 +167,7 @@ def text_resolve(reference, returned_format, id): :param returned_format: :return: """ - not_resolved = '0.0 %s' % (19 * '.') + not_resolved = '0.0 bibcode:%s scixid:%s' % (19 * '.', 19 * '.') try: resolved = cache_resolved_get(reference) if resolved: @@ -222,7 +222,7 @@ def xml_resolve(parsed_reference, returned_format): :param returned_format: :return: """ - not_resolved = '0.0 %s' % (19 * '.') + not_resolved = '0.0 bibcode:%s scixid:%s' % (19 * '.', 19 * '.') try: resolved = str(solve_reference(Hypotheses(parsed_reference))) if resolved.startswith('0.0'):