Source code for mcutils.scripts.bibtex.update

#!/usr/bin/env python
"""Updates entries in an existing bibtex file"""
import logging
from mcutils.utils.bibtex import file, pubmed
from copy import deepcopy

logger = logging.getLogger(__name__)


[docs]def update_pmids(input_bibtex: file.BibTexSet): """ Creates a new bibtex set updated from pubmed All entries with a pmid or PMID will be checked and updated """ new_bibtex = deepcopy(input_bibtex) pmid_replace = [] doi_replace = [] for entry in new_bibtex: for pmid_name in ('pmid', 'PMID'): if pmid_name in entry.tags: pmid_replace.append((entry, entry.tags[pmid_name])) break else: if ( ('journal' in entry.tags and 'biorxiv' in entry.tags['journal'].lower()) or ('publisher' in entry.tags and 'Cold Spring Harbor Laboratory' in entry.tags['publisher']) ): for key in ('url', 'URL', 'Url'): if key in entry.tags: break else: print(f'No URL found for {entry}') continue doi = pubmed.biorxiv_to_doi(entry.tags[key]) if doi is not None: doi_replace.append((entry, doi)) else: logger.info(f'No DOI found on {entry.tags[key]}') for doi_name in ('doi', 'DOI'): if doi_name in entry.tags: value = entry.tags[doi_name] for starter in ( 'http://dx.doi.org/', 'https://doi.org/', ): if value.startswith(starter): value = value[len(starter):] doi_replace.append((entry, value)) break else: if 'adsurl' in entry.tags: doi = pubmed.ads_to_doi(entry.tags['adsurl']) if doi is not None: doi_replace.append((entry, doi)) else: logger.info(f'No DOI found on {entry.tags["adsurl"]}') pmids = pubmed.pmid_from_doi([doi for _, doi in doi_replace]) if len(pmids) != len(doi_replace): print(len(pmids), len(doi_replace)) print(pmids) print([doi for _, doi in doi_replace]) raise ValueError() for (entry, _), pmid in zip(doi_replace, pmids): if pmid is not None: pmid_replace.append((entry, pmid)) # process entries with PMID articles = pubmed.query_mult([pmid for _, pmid in pmid_replace]) logger.info(f'Failed to find article for {[p for a, (_, p) in zip(articles, pmid_replace) if a is None]}') if len(articles) != len(pmid_replace): print([pmid for _, pmid in pmid_replace]) print(len(articles), len(pmid_replace)) raise ValueError() assert len(articles) == len(pmid_replace) for (entry, _), article in zip(pmid_replace, articles): if article is not None: new_tags = pubmed.to_bibtex(article).tags for tag_name in new_tags: if new_tags[tag_name] == '?': new_tags[tag_name] = entry.tags.get(tag_name, '?') if entry.tags.get('title', '').lower() != new_tags.get('title', '').lower(): logger.info(f'replacing "{entry.tags.get("title", "")}" with "{new_tags.get("title", "")}"') entry.tags = pubmed.to_bibtex(article).tags for entry in new_bibtex: for tag_name in entry.tags: entry.tags[tag_name] = entry.tags[tag_name].replace('\n', ' ') return new_bibtex
[docs]def run_from_args(args): """ Runs the script based on a Namespace containing the command line arguments """ res = update_pmids( file.BibTexFile(args.input) ) if args.output: res.write(args.output) else: print(repr(res))
[docs]def add_to_parser(parser): """ Creates the parser of the command line arguments """ parser.add_argument('input', help='input bibtex file') parser.add_argument('output', default=None, nargs='?', help='output bibtex file (default: to stdout)')