Skip to content

Instantly share code, notes, and snippets.

@hpiwowar
Created June 23, 2011 23:24
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hpiwowar/1043879 to your computer and use it in GitHub Desktop.
Save hpiwowar/1043879 to your computer and use it in GitHub Desktop.
For converting ISI Web of Science tab-delimited exports into bibtex format
#!/usr/bin/env python
# Initially written by Heather Piwowar, June 2011
# Public domain: have at it!
# For converting ISI Web of Science tab-delimited exports into bibtex format
import csv
import glob
import random
from collections import defaultdict
import codecs
lookup = dict(AF="author", TI="title", JI="journal", LA="language", DT="keywords", DI="doi",
AB="abstract", DE="keywords", ID="keywords", C1="address", PU="publisher",
PI="address", PA="address", PD="month", PY="year", VL="volume", IS="issue", BP="pages",
AR="number", SB="isbn", EM="email", FU="funding", UT="issn", SC="categories",
FX="acknowledgements")
def get_updated_heading(heading):
try:
lookup_value = lookup[heading]
except:
lookup_value = ""
return(lookup_value)
def update_bib_dict(line, header_line, filename):
bib_dict = defaultdict(str)
bib_dict["filename"] = filename
for (item, old_heading) in zip(line, header_line):
new_heading = get_updated_heading(old_heading)
if new_heading and item:
if bib_dict[new_heading]:
bib_dict[new_heading] = "; ".join([bib_dict[new_heading], item])
else:
bib_dict[new_heading] = item
if bib_dict["author"]:
bib_dict["author"] = bib_dict["author"].replace("; ", " and ")
if bib_dict["doi"] and not bib_dict["url"]:
bib_dict["url"] = "http://dx.doi.org/" + bib_dict["doi"]
return(bib_dict)
def parse_wos_txt(filename):
"""Takes a filename and returns a dictionary"""
print filename
wos_txt_fh = csv.reader(open(filename, "r"), delimiter="\t")
header = wos_txt_fh.next()
bib_list = []
for line in wos_txt_fh:
bib_list.append(update_bib_dict(line, header, filename))
return(bib_list)
def export_as_bibtex(bib_list, fh, filename):
counter = 0
for bib_dict in bib_list:
counter += 1
bib_id = filename + "-" + str(counter)
fh.write("@article{" + str(bib_id) + ",")
lines = [heading + " = {" + str(bib_dict[heading]) + "}" for heading in bib_dict]
fh.write(",\r\n".join(lines))
fh.write("\r\n}\r\n")
def convert_dir_of_wos_txt_into_bibtex(mydir, output_filename):
mydict = dict()
counter = 0
writing_fh = codecs.open(output_filename, "w", "utf-8")
for filename in glob.glob(mydir + "/*.txt"):
bib_list = parse_wos_txt(filename)
export_as_bibtex(bib_list, writing_fh, filename)
writing_fh.close()
#convert_dir_of_wos_txt_into_bibtex("Pangaea", "Pangaea_raw.bib")
#convert_dir_of_wos_txt_into_bibtex("GEOROC", "GEOROC_raw.bib")
#convert_dir_of_wos_txt_into_bibtex("GEO", "GEO_raw.bib")
convert_dir_of_wos_txt_into_bibtex("TreeBase", "TreeBase_raw.bib")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment