forked from adsabs/ADSDocMatchPipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcommon.py
More file actions
71 lines (61 loc) · 2.58 KB
/
common.py
File metadata and controls
71 lines (61 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""
The module contains methods that are called from both match_to_pub and match_to_arxiv modules.
"""
import os
from adsputils import setup_logging
logger = setup_logging('docmatch_log')
def get_filenames(filename):
"""
read input file and return list of arXiv metadata full filenames
:param filename:
:return:
"""
filenames = []
try:
with open(filename, 'r') as fp:
for filename in fp.readlines():
filenames.append(filename.rstrip('\r\n'))
except Exception as e:
logger.error('Unable to open/read input file', e)
return filenames
def format_results(results, separator):
"""
:param results:
:param separator:
:return:
"""
if results.get('matched_bibcode', None):
match = separator.join([str(results.get(field, '')) for field in ['source_bibcode', 'matched_bibcode', 'confidence', 'score', 'comment']])
if results.get('inspection', None):
# found low confidence match or multiple matches
for_inspection = [results.get('source_bibcode'), results.get('confidence'),
results['inspection'].get('bibcodes'), results['inspection'].get('scores'),
results['inspection'].get('comment')]
return match, for_inspection
# single match
return match, None
# when error, return status_code
return '%s status_code=%s'%(results.get('comment', ''), results.get('status_code', '')), None
def write_for_inspection_hits(result_filename, inspection_hits):
"""
:param result_filename:
:param inspection_hits:
:return:
"""
csv_file = result_filename + '.csv'
if os.path.exists(csv_file):
fp = open(csv_file, 'a')
else:
fp = open(csv_file, 'w')
# new file, write header line
fp.write('source bibcode (link),verified bibcode,confidence,matched bibcode (link),matched scores,matched bibcode (link),matched scores\n')
hyperlink_format = '"=HYPERLINK(""https://ui.adsabs.harvard.edu/abs/%s/abstract"",""%s"")",'
score_format = '"%s",'
# source bibcode, empty column reserved for curators adding verified bibcode, and the score
csv_line = hyperlink_format%(inspection_hits[0], inspection_hits[0]) + ',' + score_format%(inspection_hits[1])
for bibcode, score in zip(inspection_hits[2], inspection_hits[3]):
csv_line += hyperlink_format%(bibcode,bibcode) + score_format%(score)
comment = (', ,' if len(inspection_hits[2]) == 1 else '') + '"%s"'%inspection_hits[4]
csv_line += comment
fp.write('%s\n'%(csv_line))
fp.close()