Created
October 15, 2013 16:36
-
-
Save newville/6994546 to your computer and use it in GitHub Desktop.
Convert ISI Web of Science Plaintext to bibtex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2.7 | |
""" | |
translate ISI bibliography (in plaintext) to bibtex | |
isi2bib savedrecs.txt > savedrecs.bib | |
Notes: | |
1. this script tries very hard to make sensible lists | |
of author names, but the task is hard, and results | |
should be checked carefully. | |
2. every entry is assumed to be a journal article. | |
3. no attempt is made to typeset article titles. | |
""" | |
import sys | |
from collections import OrderedDict | |
ISINAMES = {'VL': 'vol', 'IS': 'issue', 'PD': 'month', | |
'PY': 'year', 'BP': 'page1', 'EP': 'page2'} | |
def parse_isi(fname): | |
"convert ISI text to a list of record dictionaries" | |
fileh = open(fname, 'r') | |
lines = fileh.readlines() | |
fileh.close() | |
citations = [] | |
last_key = '' | |
for line in lines: | |
key = line[0:2] | |
val = line[3:-1] | |
if key == ' ': | |
key = last_key | |
if key == 'PT': # start record | |
rec = {'title':' ', 'source':' '} | |
auths = [] | |
elif key == 'ER': # end record | |
rec['authors'] = auths | |
citations.append(rec) | |
elif key == 'TI': | |
rec['title'] = "%s %s" % (rec['title'], val) | |
elif key == 'SO': | |
rec['source'] = "%s %s" % (rec['source'], val) | |
elif key == 'AU': | |
auths.append(val.split(',')) | |
elif key in ISINAMES: | |
rec[ISINAMES[key]] = val.split()[0] | |
last_key = key | |
return citations | |
def generate_bibtex(citations): | |
"convert list of dictionaries (from parse_isi) to bibtex" | |
cite_labels = [] | |
output = OrderedDict() | |
for cite in citations: | |
auths = cite['authors'] | |
year = cite['year'] | |
nauthors = len(auths) | |
iauthor = 0 | |
authorlist = '' | |
citename = None | |
for authnames in auths: | |
iauthor += 1 | |
if len(authnames) == 1: | |
firstname, suffix = '', '' | |
lastname = authnames[0] | |
if ' ' in lastname: | |
firstname, lastname = lastname.split(' ', 1) | |
else: | |
lastname = authnames[0].strip().title() | |
firstname = authnames[1].strip() | |
suffix = '' | |
if len(authnames) > 2: | |
suffix = ' '.join(authnames[2:]) | |
if firstname == firstname.upper(): | |
fnamex = [] | |
for letter in firstname: | |
if letter != '.' and letter != ' ': | |
fnamex.append(letter) | |
firstname = '.~'.join(fnamex) + '.' | |
if citename is None: | |
citename = lastname | |
aname = "%s %s" % (firstname, lastname) | |
if suffix != '': | |
aname = "%s, %s" % (aname, suffix.strip()) | |
if iauthor < nauthors: | |
aname = aname + " and" | |
authorlist = authorlist + ' ' + aname | |
label = _label = "%s%s" % (citename, year) | |
app = 'a' | |
while label in cite_labels: | |
label = "%s%s" % (_label, app) | |
app = chr(1+ord(app)) | |
cite_labels.append(label) | |
para ="""@article{%s, | |
author = "{%s}", | |
title = "{%s}", | |
journal = "{%s}", | |
year = "%s", | |
volume = "%s", """ % (label, authorlist.strip(), | |
cite['title'].strip().title(), | |
cite['source'].strip().title(), | |
year, cite['vol']) | |
if 'issue' in cite: | |
para = "%s\n number = \"%s\"," % (para, cite['issue']) | |
if 'page1' in cite: | |
if 'page2' in cite and cite['page2'] != cite['page1']: | |
pageline = "pages = \"%s--%s\"," % (cite['page1'], | |
cite['page2']) | |
else: | |
pageline = "pages = \"%s\"," % (cite['page1']) | |
para = "%s\n %s" % (para, pageline) | |
para = "%s\n }\n" % para | |
output[label] = para | |
return output | |
if __name__ == '__main__': | |
for recfile in sys.argv[1:]: | |
citation = parse_isi(recfile) | |
for refname, bibtex in generate_bibtex(citation).items(): | |
print bibtex | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment