#!/usr/bin/env python
"""Updates entries in an existing bibtex file"""
import logging
from mcutils.utils.bibtex import file, pubmed
from copy import deepcopy
logger = logging.getLogger(__name__)
[docs]def update_pmids(input_bibtex: file.BibTexSet):
"""
Creates a new bibtex set updated from pubmed
All entries with a pmid or PMID will be checked and updated
"""
new_bibtex = deepcopy(input_bibtex)
pmid_replace = []
doi_replace = []
for entry in new_bibtex:
for pmid_name in ('pmid', 'PMID'):
if pmid_name in entry.tags:
pmid_replace.append((entry, entry.tags[pmid_name]))
break
else:
if (
('journal' in entry.tags and 'biorxiv' in entry.tags['journal'].lower()) or
('publisher' in entry.tags and 'Cold Spring Harbor Laboratory' in entry.tags['publisher'])
):
for key in ('url', 'URL', 'Url'):
if key in entry.tags:
break
else:
print(f'No URL found for {entry}')
continue
doi = pubmed.biorxiv_to_doi(entry.tags[key])
if doi is not None:
doi_replace.append((entry, doi))
else:
logger.info(f'No DOI found on {entry.tags[key]}')
for doi_name in ('doi', 'DOI'):
if doi_name in entry.tags:
value = entry.tags[doi_name]
for starter in (
'http://dx.doi.org/',
'https://doi.org/',
):
if value.startswith(starter):
value = value[len(starter):]
doi_replace.append((entry, value))
break
else:
if 'adsurl' in entry.tags:
doi = pubmed.ads_to_doi(entry.tags['adsurl'])
if doi is not None:
doi_replace.append((entry, doi))
else:
logger.info(f'No DOI found on {entry.tags["adsurl"]}')
pmids = pubmed.pmid_from_doi([doi for _, doi in doi_replace])
if len(pmids) != len(doi_replace):
print(len(pmids), len(doi_replace))
print(pmids)
print([doi for _, doi in doi_replace])
raise ValueError()
for (entry, _), pmid in zip(doi_replace, pmids):
if pmid is not None:
pmid_replace.append((entry, pmid))
# process entries with PMID
articles = pubmed.query_mult([pmid for _, pmid in pmid_replace])
logger.info(f'Failed to find article for {[p for a, (_, p) in zip(articles, pmid_replace) if a is None]}')
if len(articles) != len(pmid_replace):
print([pmid for _, pmid in pmid_replace])
print(len(articles), len(pmid_replace))
raise ValueError()
assert len(articles) == len(pmid_replace)
for (entry, _), article in zip(pmid_replace, articles):
if article is not None:
new_tags = pubmed.to_bibtex(article).tags
for tag_name in new_tags:
if new_tags[tag_name] == '?':
new_tags[tag_name] = entry.tags.get(tag_name, '?')
if entry.tags.get('title', '').lower() != new_tags.get('title', '').lower():
logger.info(f'replacing "{entry.tags.get("title", "")}" with "{new_tags.get("title", "")}"')
entry.tags = pubmed.to_bibtex(article).tags
for entry in new_bibtex:
for tag_name in entry.tags:
entry.tags[tag_name] = entry.tags[tag_name].replace('\n', ' ')
return new_bibtex
[docs]def run_from_args(args):
"""
Runs the script based on a Namespace containing the command line arguments
"""
res = update_pmids(
file.BibTexFile(args.input)
)
if args.output:
res.write(args.output)
else:
print(repr(res))
[docs]def add_to_parser(parser):
"""
Creates the parser of the command line arguments
"""
parser.add_argument('input', help='input bibtex file')
parser.add_argument('output', default=None, nargs='?',
help='output bibtex file (default: to stdout)')