Skip to content

Instantly share code, notes, and snippets.

Created October 15, 2013 16:36
Show Gist options
  • Save newville/6994546 to your computer and use it in GitHub Desktop.
Save newville/6994546 to your computer and use it in GitHub Desktop.
Convert ISI Web of Science Plaintext to bibtex
#!/usr/bin/env python2.7
translate ISI bibliography (in plaintext) to bibtex
isi2bib savedrecs.txt > savedrecs.bib
1. this script tries very hard to make sensible lists
of author names, but the task is hard, and results
should be checked carefully.
2. every entry is assumed to be a journal article.
3. no attempt is made to typeset article titles.
import sys
from collections import OrderedDict
ISINAMES = {'VL': 'vol', 'IS': 'issue', 'PD': 'month',
'PY': 'year', 'BP': 'page1', 'EP': 'page2'}
def parse_isi(fname):
"convert ISI text to a list of record dictionaries"
fileh = open(fname, 'r')
lines = fileh.readlines()
citations = []
last_key = ''
for line in lines:
key = line[0:2]
val = line[3:-1]
if key == ' ':
key = last_key
if key == 'PT': # start record
rec = {'title':' ', 'source':' '}
auths = []
elif key == 'ER': # end record
rec['authors'] = auths
elif key == 'TI':
rec['title'] = "%s %s" % (rec['title'], val)
elif key == 'SO':
rec['source'] = "%s %s" % (rec['source'], val)
elif key == 'AU':
elif key in ISINAMES:
rec[ISINAMES[key]] = val.split()[0]
last_key = key
return citations
def generate_bibtex(citations):
"convert list of dictionaries (from parse_isi) to bibtex"
cite_labels = []
output = OrderedDict()
for cite in citations:
auths = cite['authors']
year = cite['year']
nauthors = len(auths)
iauthor = 0
authorlist = ''
citename = None
for authnames in auths:
iauthor += 1
if len(authnames) == 1:
firstname, suffix = '', ''
lastname = authnames[0]
if ' ' in lastname:
firstname, lastname = lastname.split(' ', 1)
lastname = authnames[0].strip().title()
firstname = authnames[1].strip()
suffix = ''
if len(authnames) > 2:
suffix = ' '.join(authnames[2:])
if firstname == firstname.upper():
fnamex = []
for letter in firstname:
if letter != '.' and letter != ' ':
firstname = '.~'.join(fnamex) + '.'
if citename is None:
citename = lastname
aname = "%s %s" % (firstname, lastname)
if suffix != '':
aname = "%s, %s" % (aname, suffix.strip())
if iauthor < nauthors:
aname = aname + " and"
authorlist = authorlist + ' ' + aname
label = _label = "%s%s" % (citename, year)
app = 'a'
while label in cite_labels:
label = "%s%s" % (_label, app)
app = chr(1+ord(app))
para ="""@article{%s,
author = "{%s}",
title = "{%s}",
journal = "{%s}",
year = "%s",
volume = "%s", """ % (label, authorlist.strip(),
year, cite['vol'])
if 'issue' in cite:
para = "%s\n number = \"%s\"," % (para, cite['issue'])
if 'page1' in cite:
if 'page2' in cite and cite['page2'] != cite['page1']:
pageline = "pages = \"%s--%s\"," % (cite['page1'],
pageline = "pages = \"%s\"," % (cite['page1'])
para = "%s\n %s" % (para, pageline)
para = "%s\n }\n" % para
output[label] = para
return output
if __name__ == '__main__':
for recfile in sys.argv[1:]:
citation = parse_isi(recfile)
for refname, bibtex in generate_bibtex(citation).items():
print bibtex
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment