Skip to content

Instantly share code, notes, and snippets.

@ravenscroftj
Created April 13, 2018 15:50
Show Gist options
  • Save ravenscroftj/a42fddfe7412ad21a134872ddf5678ef to your computer and use it in GitHub Desktop.
Save ravenscroftj/a42fddfe7412ad21a134872ddf5678ef to your computer and use it in GitHub Desktop.
Python Unpaywall Wrapper
"""
Simple function wrapper for the unpaywall API.
"""
import requests
YOUR_EMAIL = "test@gmail.com" # you should put your own email address here
def unpaywall(doi, retry=0, pdfonly=True):
"""Find legal open access version of paper"""
r = requests.get("https://api.unpaywall.org/v2/{}".format(doi), params={"email":YOUR_EMAIL})
if r.status_code == 404:
print("Invalid/unknown DOI {}".format(doi))
return None
if r.status_code == 500:
print("Unpaywall API failed. Try: {}/3".format(retry+1))
if retry < 3:
return unpaywall(doi, retry+1)
else:
print("Retried 3 times and failed. Giving up")
return None
best_loc = None
try:
best_loc = r.json()['best_oa_location']
except json.decoder.JSONDecodeError:
print("Response was not json")
print(r.text)
except KeyError:
print("best_oa_location not set")
print(r.text)
except:
print("Something weird happened")
print(r.text)
return None
if not r.json()['is_oa'] or best_loc is None:
print("No OA paper found for {}".format(doi))
return None
if(best_loc['url_for_pdf'] is None and pdfonly is True):
print("No PDF found..")
print(best_loc)
return None
else:
return best_loc['url']
return best_loc['url_for_pdf']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment