Skip to content

Instantly share code, notes, and snippets.

@harej
Created April 22, 2016 19:31
Show Gist options
  • Save harej/6165200daca600dfc69400ddc9087da7 to your computer and use it in GitHub Desktop.
Save harej/6165200daca600dfc69400ddc9087da7 to your computer and use it in GitHub Desktop.
Creates a CSV based on Citoid output
import requests
import csv
from collections import defaultdict
def get_citation(inputstring):
r = requests.get("https://citoid.wikimedia.org/api?format=mediawiki&search=" + inputstring)
return r.json()[0]
def main(sourcefile):
manifest = [] # all the citation data will be entered into this list of defaultdicts
headers = []
with open(sourcefile) as f:
csvdump = csv.reader(f)
for row in csvdump:
citation_data = get_citation(row[0])
if "Error" in citation_data:
continue
entry = defaultdict()
#print(citation_data)
for key, value in citation_data.items():
if key in ["tags", "notes", "extra", "accessDate", "abstractNote"]:
continue # Not worth our time
if key not in headers:
headers.append(key)
if type(value) is str:
entry[key] = value
elif type(value) is int or type(value) is float:
entry[key] = str(value)
elif type(value) is list:
counter = 1
for item in value:
new_key = key + str(counter)
if new_key not in headers:
headers.append(new_key)
if key == "author":
entry[new_key] = item[0] + " " + item[1]
else:
entry[new_key] = item # e.g. author1, author2...
counter += 1
elif type(value) is dict:
continue # Ain't nobody got time for that.
manifest.append(entry)
with open('citation_data.csv', 'w') as f:
writer = csv.DictWriter(f, fieldnames=headers)
writer.writeheader()
for entry in manifest:
writer.writerow(entry)
if __name__ == "__main__":
main("nioshtic-sample.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment