Created
July 27, 2018 01:28
-
-
Save Phoenix-Effect/b1e0acc4e494e813e4c56a502d7e3894 to your computer and use it in GitHub Desktop.
Downloads a bunch of DOI numbers from airtable, gets information related to that DOI number and upload it back to airtable.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from habanero import Crossref | |
from airtable import Airtable | |
from airtable.auth import AirtableAuth | |
import time | |
import pprint | |
import requests | |
import datetime | |
import html.parser | |
apikey = "API KEY" | |
baseurl = "BASE ID" | |
tableid = "ENTER TABLE NAME" | |
view = "ENTER VIEW NAME" | |
# string to length | |
def string_to_length(string): | |
if '-' not in string: | |
return ''.join(x for x in string if x.isnumeric()) | |
else: | |
left = ''.join(x for x in string.split('-')[0] if x.isnumeric()) | |
right = ''.join(x for x in string.split('-')[1] if x.isnumeric()) | |
num = int(right) - int(left) + 1 | |
return str(num) | |
def doi_to_line(y): | |
x = {} | |
if y['status'] == "ok": | |
y = y['message'] | |
x['ref-count'] = y['reference-count'] | |
x['publisher'] = y['publisher'] | |
x['DOI'] = y['DOI'] | |
x['type'] = y['type'] | |
x['created'] = y['created']['timestamp'] | |
if 'page' in y: | |
x['length'] = string_to_length(y['page']) | |
else: | |
x['length'] = "0" | |
x['source'] = y['source'] | |
x['ref-by'] = y['is-referenced-by-count'] | |
x['title'] = y['title'][0] | |
if 'link' in y: | |
x['url'] = y['link'][0]['URL'] | |
else: | |
x['url'] = "" | |
authfl = [] | |
if 'author' in y: | |
for author in y['author']: | |
if 'given' in author and 'family' in author: | |
authfl.append(author['given'] + ' ' + author['family']) | |
x['auth-FL'] = ", ".join(authfl) | |
authlf = [] | |
for author in y['author']: | |
if 'given' in author and 'family' in author: | |
authlf.append(author['family'] + ";" + author['given']) | |
x['auth-LF'] = ", ".join(authlf) | |
else: | |
x['auth-FL'] = "Unknown" | |
x['auth-LF'] = "Unknown" | |
if 'subject' in y: | |
x['subject'] = ", ".join(y['subject']) | |
else: | |
x['subject'] = "" | |
return x | |
else: | |
return "Error" | |
# uploads to airtable | |
def upload_to_airtable(line, id): | |
airtable = Airtable(baseurl, tableid, apikey) | |
data = {"DOI or ISBN": line['DOI'], "Title": line['title'], "sys-timestamp": line['created'], | |
"Sponsor": line['publisher'], "Hosting site": line['source'], "Tags": line['subject'], "sys-fixit": False, | |
"Creator_FirstLast": line['auth-FL'], "APA Reference": line['apa']} | |
airtable.update(id, data) | |
def addReference(doi, url): | |
headers = { 'Accept': 'text/bibliography', 'style': 'apa' } | |
r = requests.get(url, headers=headers) | |
r.encoding = 'utf-8' | |
doi['apa'] = html.parser.HTMLParser().unescape(r.text) | |
print(r.text) | |
# MAIN program | |
airtable = Airtable(baseurl, tableid, apikey) | |
articles = airtable.get_all(view=view) | |
cr = Crossref() | |
cr.mailto="suhail.ghafoor@asu.edu" | |
for article in articles: | |
if 'sys-fixit' in article['fields'] and article['fields']['Kind of resource'][0] == 'Article': | |
x = cr.works(article['fields']['URL']) | |
y = doi_to_line(x) | |
addReference(y, article['fields']['URL']) | |
pprint.pprint(y) | |
upload_to_airtable(y, article['id']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment