From 11be5781cc5c91d0c1a53ac57c4f870b51b69aa5 Mon Sep 17 00:00:00 2001
From: ADS Administration <ads@cfa.harvard.edu>
Date: Wed, 1 Apr 2026 08:33:38 -0400
Subject: [PATCH 1/3] SciX ID included in output

---
 config.py                                          |  2 +-
 referencesrv/resolver/common.py                    |  7 ++++---
 referencesrv/resolver/solrtestdata.py              |  4 +++-
 referencesrv/resolver/solve.py                     |  3 +--
 .../tests/unittests/test_referencesrv_parser.py    |  5 ++++-
 .../tests/unittests/test_referencesrv_resolver.py  | 14 +++++++-------
 referencesrv/views.py                              |  4 +++-
 7 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/config.py b/config.py
index 024e9c5..4e59f8c 100644
--- a/config.py
+++ b/config.py
@@ -13,7 +13,7 @@
 REFERENCE_SERVICE_MAX_RECORDS_SOLR = 100
 
 REFERENCE_SERVICE_QUERY_FIELDS_SOLR = "author,[fields author=10]author_norm,[fields author_norm=10],first_author_norm," \
-                                      "year,title,pub,pub_raw,aff_raw,[fields aff_raw=1]," \
+                                      "year,title,pub,pub_raw,aff_raw,[fields aff_raw=1],scix_id," \
                                       "volume,issue,page,page_range,bibstem,bibcode,identifier,doi,doctype"
 
 # maximum references that can be resolved in one call
diff --git a/referencesrv/resolver/common.py b/referencesrv/resolver/common.py
index 2a41a21..a844800 100644
--- a/referencesrv/resolver/common.py
+++ b/referencesrv/resolver/common.py
@@ -264,7 +264,7 @@ class Solution(object):
     * score
     * source_hypothesis (the hypothesis that eventually got it right)
     """
-    def __init__(self, cited_bibcode, score, source_hypothesis='not given', citing_bibcode=None):
+    def __init__(self, cited_bibcode, score, source_hypothesis='not given', citing_bibcode=None, scix_id=None):
         """
 
         :param cited_bibcode:
@@ -276,6 +276,7 @@ def __init__(self, cited_bibcode, score, source_hypothesis='not given', citing_b
         self.score = score
         self.citing_bibcode = str(citing_bibcode)
         self.source_hypothesis = source_hypothesis
+        self.scix_id = scix_id
     
     def __str__(self):
         """
@@ -283,7 +284,7 @@ def __str__(self):
         :return:
         """
         if isinstance(self.score, Evidences):
-            return '%.1f %s'%(self.score.avg(),self.cited_bibcode)
+            return '%.1f bibcode:%s scixid:%s'%(self.score.avg(),self.cited_bibcode, self.scix_id)
         raise NoSolution("NotResolved")
 
     def __repr__(self):
@@ -459,4 +460,4 @@ def predicate(x):
     non_numbers = filterfalse(predicate, t2)
     sorted_numbers = sorted(numbers)
     sorted_non_numbers = sorted(non_numbers, key=str)
-    return sorted_numbers + sorted_non_numbers
\ No newline at end of file
+    return sorted_numbers + sorted_non_numbers
diff --git a/referencesrv/resolver/solrtestdata.py b/referencesrv/resolver/solrtestdata.py
index f34ac7e..889d365 100644
--- a/referencesrv/resolver/solrtestdata.py
+++ b/referencesrv/resolver/solrtestdata.py
@@ -20,6 +20,7 @@ def get_test_data():
                  u'numFound': 2,
                  u'docs': [
                      {u'bibcode': u'2019AAS...23320704A',
+                      u'scix_id': u'scix:6ANE-YQXJ-KRH0',
                       u'author': [u'Accomazzi, Alberto'],
                       u'title': [u'The NASA Astrophysics Data System\xe2\u20ac\u2122s Decadal Plan for the 2020s'],
                       u'doctype': u'abstract',
@@ -32,6 +33,7 @@ def get_test_data():
                       u'identifier': [u'2019AAS...23320704A'],
                       u'page': [u'207.04']},
                      {u'bibcode': u'2019AAS...23338108A',
+                      u'scix_id': u'scix:AGA3-9D3P-Y7EF',
                       u'author': [u'Accomazzi, Alberto', u'Kurtz, Michael J.', u'Henneken, Edwin', u'Grant, Carolyn S.', u'Thompson, Donna M.', u'Chyla, Roman', u'McDonald, Stephen', u'Blanco-Cuaresma, Sergi', u'Shapurian, Golnaz', u'Hostetler, Timothy', u'Templeton, Matthew', u'Lockhart, Kelly'],
                       u'title': [u'Transitioning from ADS Classic to the new ADS search platform'],
                       u'doctype': u'abstract',
@@ -45,4 +47,4 @@ def get_test_data():
                       u'page': [u'381.08']}
                  ]
                  }
-            }
\ No newline at end of file
+            }
diff --git a/referencesrv/resolver/solve.py b/referencesrv/resolver/solve.py
index b52c0bc..00f4f59 100644
--- a/referencesrv/resolver/solve.py
+++ b/referencesrv/resolver/solve.py
@@ -291,8 +291,7 @@ def solve_for_fields(hypothesis):
                 current_app.logger.debug("score %s %s %s"%(sol['bibcode'], score.get_score(), score))
 
             score, sol = choose_solution(scored, query_string, hypothesis)
-
-            return Solution(sol["bibcode"], score, hypothesis.name)
+            return Solution(sol["bibcode"], score, hypothesis.name, scix_id=sol["scix_id"])
 
     raise OverflowOrNone("Got either too many or no records from solr")
 
diff --git a/referencesrv/tests/unittests/test_referencesrv_parser.py b/referencesrv/tests/unittests/test_referencesrv_parser.py
index 58de7ac..61bd058 100644
--- a/referencesrv/tests/unittests/test_referencesrv_parser.py
+++ b/referencesrv/tests/unittests/test_referencesrv_parser.py
@@ -756,6 +756,7 @@ def test_01(self):
                                                                       u'year': u'2020',
                                                                       u'page': u'2',
                                                                       u'bibcode': u'2020JHEP...09..002P',
+                                                                      u'scix_id': u'scix:5KGH-MC98-7AYN',
                                                                       u'author': [u'Penington, Geoffrey'], u'issue': u'9',
                                                                       u'aff_raw': u'Stanford Institute for Theoretical Physics, Stanford University, 450 Jane Stanford Way, 94305, Stanford, CA, USA',
                                                                       u'pub': u'Journal of High Energy Physics',
@@ -769,7 +770,7 @@ def test_01(self):
                                                           }
                                             })
             r = self.client.post(path='/text', data=json.dumps({'reference': ['Penington, G, 2020, JHEP, 9']}))
-            self.assertEqual(r.data, b"1.0 2020JHEP...09..002P -- Penington, G, 2020, JHEP, 9")
+            self.assertEqual(r.data, b"1.0 bibcode:2020JHEP...09..002P scixid:scix:5KGH-MC98-7AYN -- Penington, G, 2020, JHEP, 9")
 
     def test_02(self):
         """ test text endpoint when request is to return in json format """
@@ -785,6 +786,7 @@ def test_02(self):
                                                                       u'year': u'2020',
                                                                       u'page': u'2',
                                                                       u'bibcode': u'2020JHEP...09..002P',
+                                                                      u'scix_id': u'scix:5KGH-MC98-7AYN',
                                                                       u'author': [u'Penington, Geoffrey'], u'issue': u'9',
                                                                       u'aff_raw': u'Stanford Institute for Theoretical Physics, Stanford University, 450 Jane Stanford Way, 94305, Stanford, CA, USA',
                                                                       u'pub': u'Journal of High Energy Physics',
@@ -802,6 +804,7 @@ def test_02(self):
                                  data=json.dumps({'reference': ['Penington, G, 2020, JHEP, 9']}),
                                  headers={'accept':'application/json'})
             self.assertEqual(json.loads(r.data), {"resolved": [{"refstring": "Penington, G, 2020, JHEP, 9",
+                                                                "scix_id":"scix:5KGH-MC98-7AYN",
                                                                 "score": "1.0",
                                                                 "bibcode": "2020JHEP...09..002P"}]})
 
diff --git a/referencesrv/tests/unittests/test_referencesrv_resolver.py b/referencesrv/tests/unittests/test_referencesrv_resolver.py
index 0df3e6a..1b2cd17 100755
--- a/referencesrv/tests/unittests/test_referencesrv_resolver.py
+++ b/referencesrv/tests/unittests/test_referencesrv_resolver.py
@@ -356,8 +356,8 @@ def test_Solution(self):
         """
         e = Evidences()
         e.add_evidence(1, 'bibcode')
-        s = Solution(cited_bibcode='2013SPIE.8004.2013Z', score=e)
-        self.assertEqual(str(s), '1.0 2013SPIE.8004.2013Z')
+        s = Solution(cited_bibcode='2013SPIE.8004.2013Z', scix_id='foo', score=e)
+        self.assertEqual(str(s), '1.0 bibcode:2013SPIE.8004.2013Z scixid:foo')
         self.assertEqual(repr(s), "'2013SPIE.8004.2013Z'")
 
 
@@ -417,7 +417,7 @@ def test_solve_reference(self):
                'volume': '233',
                'year': '2019',
                'page': '207.04'}
-        self.assertEqual(str(solve_reference(Hypotheses(ref))), '1.0 2019AAS...23320704A')
+        self.assertEqual(str(solve_reference(Hypotheses(ref))), '1.0 bibcode:2019AAS...23320704A scixid:scix:6ANE-YQXJ-KRH0')
         # testing with first author only and page
         # eventhough other authors are missing but because of page match is found
         ref = {'authors': 'Accomazzi, A.',
@@ -425,14 +425,14 @@ def test_solve_reference(self):
                'volume': '233',
                'year': '2019',
                'page': '381.08'}
-        self.assertEqual(str(solve_reference(Hypotheses(ref))), '0.8 2019AAS...23338108A')
+        self.assertEqual(str(solve_reference(Hypotheses(ref))), '0.8 bibcode:2019AAS...23338108A scixid:scix:AGA3-9D3P-Y7EF')
         # testing with first author only and no page, hence record with only the first author is returned
         ref = {'authors': 'Accomazzi, A.',
                'journal': 'AAS233 Meeting',
                'volume': '233',
                'year': '2019',
                'page': '0'}
-        self.assertEqual(str(solve_reference(Hypotheses(ref))), '0.8 2019AAS...23320704A')
+        self.assertEqual(str(solve_reference(Hypotheses(ref))), '0.8 bibcode:2019AAS...23320704A scixid:scix:6ANE-YQXJ-KRH0')
         # when we have multiple solutions and not enough reference information to decide which
         # page and author are the deciding factor between these two test records
         # here first author and page are wrong
@@ -454,7 +454,7 @@ def test_solve_reference(self):
         # however the first record is authored by one author only and
         # it is the same first author of the second record
         # verify that the first record is returned
-        self.assertEqual(str(solve_reference(Hypotheses(ref))), '0.8 2019AAS...23320704A')
+        self.assertEqual(str(solve_reference(Hypotheses(ref))), '0.8 bibcode:2019AAS...23320704A scixid:scix:6ANE-YQXJ-KRH0')
 
 
     def test_add_volume_evidence(self):
@@ -637,7 +637,7 @@ def test_Querier(self):
         self.assertEqual(solrquery.make_params('author:("Accomazzi, A") AND year:"2019" AND bibstem:(AAS)'),
                          {'q': 'author:("Accomazzi, A") AND year:"2019" AND bibstem:(AAS)',
                           'rows': '100',
-                          'fl': u'author,[fields author=10]author_norm,[fields author_norm=10],first_author_norm,year,title,pub,pub_raw,aff_raw,[fields aff_raw=1],volume,issue,page,page_range,bibstem,bibcode,identifier,doi,doctype'})
+                          'fl': u'author,[fields author=10]author_norm,[fields author_norm=10],first_author_norm,year,title,pub,pub_raw,aff_raw,[fields aff_raw=1],scix_id,volume,issue,page,page_range,bibstem,bibcode,identifier,doi,doctype'})
 
         # no author_norm
         solution = {u'bibcode': u'2013JARS....7.3461V',
diff --git a/referencesrv/views.py b/referencesrv/views.py
index c561c5a..06d9820 100644
--- a/referencesrv/views.py
+++ b/referencesrv/views.py
@@ -123,7 +123,9 @@ def format_resolved_reference(returned_format, resolved, reference, id, cache=Tr
         cache_resolved_set(reference, resolved)
     if 'application/json' in returned_format:
         resolved = resolved.split()
-        result = {'refstring': reference, 'score': resolved[0], 'bibcode': resolved[1]}
+        bibcode = resolved[1].replace('bibcode:','').strip()
+        scix_id = resolved[2].replace('scixid:','').strip()
+        result = {'refstring': reference, 'score': resolved[0], 'bibcode': bibcode, 'scix_id':scix_id}
         if comment:
             result['comment'] = comment
         if id:

From c994853626c17c96dc5660ae56c2f2c2a74a2ed5 Mon Sep 17 00:00:00 2001
From: ADS Administration <ads@cfa.harvard.edu>
Date: Fri, 3 Apr 2026 15:59:58 -0400
Subject: [PATCH 2/3] PR feeback implementation

---
 referencesrv/resolver/common.py | 2 +-
 referencesrv/resolver/solve.py  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/referencesrv/resolver/common.py b/referencesrv/resolver/common.py
index a844800..0b32bc2 100644
--- a/referencesrv/resolver/common.py
+++ b/referencesrv/resolver/common.py
@@ -264,7 +264,7 @@ class Solution(object):
     * score
     * source_hypothesis (the hypothesis that eventually got it right)
     """
-    def __init__(self, cited_bibcode, score, source_hypothesis='not given', citing_bibcode=None, scix_id=None):
+    def __init__(self, cited_bibcode, score, source_hypothesis='not given', cited_bibcode=None, citing_bibcode=None, scix_id=None):
         """
 
         :param cited_bibcode:
diff --git a/referencesrv/resolver/solve.py b/referencesrv/resolver/solve.py
index 00f4f59..7e44751 100644
--- a/referencesrv/resolver/solve.py
+++ b/referencesrv/resolver/solve.py
@@ -288,10 +288,10 @@ def solve_for_fields(hypothesis):
 
             current_app.logger.debug("evidences from %s"%(hypothesis.name))
             for score, sol in sorted2(scored):
-                current_app.logger.debug("score %s %s %s"%(sol['bibcode'], score.get_score(), score))
+                current_app.logger.debug("score %s %s %s"%(sol.get('bibcode',None), score.get_score(), score))
 
             score, sol = choose_solution(scored, query_string, hypothesis)
-            return Solution(sol["bibcode"], score, hypothesis.name, scix_id=sol["scix_id"])
+            return Solution(sol.get("bibcode",None), score, hypothesis.name, scix_id=sol.get("scix_id",None))
 
     raise OverflowOrNone("Got either too many or no records from solr")
 

From 8347b6a543a41093b2ffa34c5649821a3e3377f4 Mon Sep 17 00:00:00 2001
From: ADS Administration <ads@cfa.harvard.edu>
Date: Tue, 7 Apr 2026 12:24:43 -0400
Subject: [PATCH 3/3] More SciX ID integration

---
 referencesrv/resolver/common.py |  2 +-
 referencesrv/resolver/solve.py  | 33 +++++++++++++++++++++------------
 referencesrv/views.py           |  4 ++--
 3 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/referencesrv/resolver/common.py b/referencesrv/resolver/common.py
index 0b32bc2..a844800 100644
--- a/referencesrv/resolver/common.py
+++ b/referencesrv/resolver/common.py
@@ -264,7 +264,7 @@ class Solution(object):
     * score
     * source_hypothesis (the hypothesis that eventually got it right)
     """
-    def __init__(self, cited_bibcode, score, source_hypothesis='not given', cited_bibcode=None, citing_bibcode=None, scix_id=None):
+    def __init__(self, cited_bibcode, score, source_hypothesis='not given', citing_bibcode=None, scix_id=None):
         """
 
         :param cited_bibcode:
diff --git a/referencesrv/resolver/solve.py b/referencesrv/resolver/solve.py
index 7e44751..4bf66ae 100644
--- a/referencesrv/resolver/solve.py
+++ b/referencesrv/resolver/solve.py
@@ -157,7 +157,7 @@ def inspect_doubtful_solutions(scored_solutions, query_string, hypothesis):
     non_veto_solutions = [(evidences, solution) for evidences, solution in scored_solutions if not evidences.has_veto()]
     if len(non_veto_solutions) == 1:
         sol = non_veto_solutions
-        raise Undecidable("Try again if desperate", considered_solutions=[(sol[0][0].get_score(), sol[0][1]["bibcode"])])
+        raise Undecidable("Try again if desperate", considered_solutions=[(sol[0][0].get_score(), sol[0][1].get("bibcode",None), sol[0][1].get("scix_id",None))])
 
     # Some of the following rules only make sense for fielded
     # hypotheses.  Always be aware that input_fields might be None
@@ -170,7 +170,7 @@ def inspect_doubtful_solutions(scored_solutions, query_string, hypothesis):
         # we should base this on the result bibstem, I guess.
         for evidences, solution in scored_solutions:
             if evidences.single_veto_from("page") and not input_fields.get("page"):
-                raise Undecidable("Try again if desperate", considered_solutions=[(evidences.get_score(), solution["bibcode"])])
+                raise Undecidable("Try again if desperate", considered_solutions=[(evidences.get_score(), solution.get("bibcode",None), solution.get("scix_id",None))])
 
     raise NoSolution(reason="No unique non-vetoed doubtful solution", ref=query_string)
 
@@ -212,7 +212,7 @@ def inspect_ambiguous_solutions(scored_solutions, query_string, hypothesis):
         current_app.logger.debug("Breaking ambiguity with %s suspecting it's a duplicate book"%non_vetoed[-2][1]["bibcode"])
         return non_vetoed[-1]
 
-    to_stash = [(score.get_score(), sol["bibcode"])
+    to_stash = [(score.get_score(), sol.get("bibcode", None), sol.get("scix_id", None))
                 for score, sol in non_vetoed if score>current_app.config['EVIDENCE_SCORE_RANGE'][0]]
     current_app.logger.debug("Unsolved ambiguity, stashing %s"%(to_stash))
     raise Undecidable("Ambiguous %s."%(query_string), considered_solutions=to_stash)
@@ -234,7 +234,6 @@ def choose_solution(candidates, query_string, hypothesis):
     """
     min_score = current_app.config['MIN_SCORE_FIRST_ROUND']
     filtered = [(score, solution) for score, solution in candidates if score >= min_score*len(score)]
-
     if len(filtered)==0:
         if candidates:
             current_app.logger.debug("No score above minimal score, inspecting doubtful solutions.")
@@ -339,6 +338,9 @@ def solve_reference(ref):
         try:
             return solve_for_fields(hypothesis)
         except Undecidable as ex:
+            # The list of possible solutions is the list of triples sent back
+            # when the Undecidable exception is thrown in the solve_for_fields call.
+            # These are generated in inspect_doubtful_solutions. 
             possible_solutions.extend(ex.considered_solutions)
             reason = ex.reason
         except (NoSolution, OverflowOrNone) as ex:
@@ -354,18 +356,25 @@ def solve_reference(ref):
     # all others and accept that
     if possible_solutions:
         current_app.logger.debug("Considering stashed ties: %s"%(possible_solutions))
-
         cands = {}
-        for score, sol in possible_solutions:
-            cands.setdefault(sol, []).append((score, sol))
-        for bibcode in cands:
-            cands[bibcode] = max(cands[bibcode])
+        scx2bbc = {}
+        for score, sol, scixid in possible_solutions:
+            # The entries in the possible_solutions will always have SciX IDs, but not
+            # necessarily bibcodes. So, the dictionary of candidates will be keyed on
+            # SciX IDs and a mapping is kept for bibcodes when appropriate.
+            if sol:
+                scx2bbc[scixid] = sol
+            cands.setdefault(scixid, []).append((score, scixid))
+        for scix in cands:
+            cands[scix] = max(cands[scix])
         scored = sorted(zip(cands.values(), cands.keys()))
-
         if len(scored)==1:
-            return Solution(scored[0][1], scored[0][0], "only remaining of tied solutions")
+            # Determine the bibcode (if any) from the correspondence created earlier
+            bibcode = scx2bbc.get(scored[0][1], None)
+            return Solution(bibcode, scored[0][0], "only remaining of tied solutions", scix_id=scored[0][1])
         elif scored[-1][0]>scored[-2][0]:
-            return Solution(scored[0][1], scored[0][0], "best tied solution")
+            bibcode = scx2bbc.get(scored[0][1], None)
+            return Solution(bibcode, scored[0][0], "best tied solution", scix_id=scored[0][1])
         else:
             current_app.logger.debug("Remaining ties, giving up")
     if reason:
diff --git a/referencesrv/views.py b/referencesrv/views.py
index 06d9820..22853be 100644
--- a/referencesrv/views.py
+++ b/referencesrv/views.py
@@ -167,7 +167,7 @@ def text_resolve(reference, returned_format, id):
     :param returned_format:
     :return:
     """
-    not_resolved = '0.0 %s' % (19 * '.')
+    not_resolved = '0.0 bibcode:%s scixid:%s' % (19 * '.', 19 * '.')
     try:
         resolved = cache_resolved_get(reference)
         if resolved:
@@ -222,7 +222,7 @@ def xml_resolve(parsed_reference, returned_format):
     :param returned_format:
     :return:
     """
-    not_resolved = '0.0 %s' % (19 * '.')
+    not_resolved = '0.0 bibcode:%s scixid:%s' % (19 * '.', 19 * '.')
     try:
         resolved = str(solve_reference(Hypotheses(parsed_reference)))
         if resolved.startswith('0.0'):