Skip to content

Instantly share code, notes, and snippets.

@newville
Created October 15, 2013 16:36
Show Gist options
  • Save newville/6994546 to your computer and use it in GitHub Desktop.
Save newville/6994546 to your computer and use it in GitHub Desktop.
Convert ISI Web of Science Plaintext to bibtex
#!/usr/bin/env python2.7
"""
translate ISI bibliography (in plaintext) to bibtex
isi2bib savedrecs.txt > savedrecs.bib
Notes:
1. this script tries very hard to make sensible lists
of author names, but the task is hard, and results
should be checked carefully.
2. every entry is assumed to be a journal article.
3. no attempt is made to typeset article titles.
"""
import sys
from collections import OrderedDict
ISINAMES = {'VL': 'vol', 'IS': 'issue', 'PD': 'month',
'PY': 'year', 'BP': 'page1', 'EP': 'page2'}
def parse_isi(fname):
"convert ISI text to a list of record dictionaries"
fileh = open(fname, 'r')
lines = fileh.readlines()
fileh.close()
citations = []
last_key = ''
for line in lines:
key = line[0:2]
val = line[3:-1]
if key == ' ':
key = last_key
if key == 'PT': # start record
rec = {'title':' ', 'source':' '}
auths = []
elif key == 'ER': # end record
rec['authors'] = auths
citations.append(rec)
elif key == 'TI':
rec['title'] = "%s %s" % (rec['title'], val)
elif key == 'SO':
rec['source'] = "%s %s" % (rec['source'], val)
elif key == 'AU':
auths.append(val.split(','))
elif key in ISINAMES:
rec[ISINAMES[key]] = val.split()[0]
last_key = key
return citations
def generate_bibtex(citations):
"convert list of dictionaries (from parse_isi) to bibtex"
cite_labels = []
output = OrderedDict()
for cite in citations:
auths = cite['authors']
year = cite['year']
nauthors = len(auths)
iauthor = 0
authorlist = ''
citename = None
for authnames in auths:
iauthor += 1
if len(authnames) == 1:
firstname, suffix = '', ''
lastname = authnames[0]
if ' ' in lastname:
firstname, lastname = lastname.split(' ', 1)
else:
lastname = authnames[0].strip().title()
firstname = authnames[1].strip()
suffix = ''
if len(authnames) > 2:
suffix = ' '.join(authnames[2:])
if firstname == firstname.upper():
fnamex = []
for letter in firstname:
if letter != '.' and letter != ' ':
fnamex.append(letter)
firstname = '.~'.join(fnamex) + '.'
if citename is None:
citename = lastname
aname = "%s %s" % (firstname, lastname)
if suffix != '':
aname = "%s, %s" % (aname, suffix.strip())
if iauthor < nauthors:
aname = aname + " and"
authorlist = authorlist + ' ' + aname
label = _label = "%s%s" % (citename, year)
app = 'a'
while label in cite_labels:
label = "%s%s" % (_label, app)
app = chr(1+ord(app))
cite_labels.append(label)
para ="""@article{%s,
author = "{%s}",
title = "{%s}",
journal = "{%s}",
year = "%s",
volume = "%s", """ % (label, authorlist.strip(),
cite['title'].strip().title(),
cite['source'].strip().title(),
year, cite['vol'])
if 'issue' in cite:
para = "%s\n number = \"%s\"," % (para, cite['issue'])
if 'page1' in cite:
if 'page2' in cite and cite['page2'] != cite['page1']:
pageline = "pages = \"%s--%s\"," % (cite['page1'],
cite['page2'])
else:
pageline = "pages = \"%s\"," % (cite['page1'])
para = "%s\n %s" % (para, pageline)
para = "%s\n }\n" % para
output[label] = para
return output
if __name__ == '__main__':
for recfile in sys.argv[1:]:
citation = parse_isi(recfile)
for refname, bibtex in generate_bibtex(citation).items():
print bibtex
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment