Created
April 13, 2018 15:50
-
-
Save ravenscroftj/a42fddfe7412ad21a134872ddf5678ef to your computer and use it in GitHub Desktop.
Python Unpaywall Wrapper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Simple function wrapper for the unpaywall API. | |
""" | |
import requests | |
YOUR_EMAIL = "test@gmail.com" # you should put your own email address here | |
def unpaywall(doi, retry=0, pdfonly=True): | |
"""Find legal open access version of paper""" | |
r = requests.get("https://api.unpaywall.org/v2/{}".format(doi), params={"email":YOUR_EMAIL}) | |
if r.status_code == 404: | |
print("Invalid/unknown DOI {}".format(doi)) | |
return None | |
if r.status_code == 500: | |
print("Unpaywall API failed. Try: {}/3".format(retry+1)) | |
if retry < 3: | |
return unpaywall(doi, retry+1) | |
else: | |
print("Retried 3 times and failed. Giving up") | |
return None | |
best_loc = None | |
try: | |
best_loc = r.json()['best_oa_location'] | |
except json.decoder.JSONDecodeError: | |
print("Response was not json") | |
print(r.text) | |
except KeyError: | |
print("best_oa_location not set") | |
print(r.text) | |
except: | |
print("Something weird happened") | |
print(r.text) | |
return None | |
if not r.json()['is_oa'] or best_loc is None: | |
print("No OA paper found for {}".format(doi)) | |
return None | |
if(best_loc['url_for_pdf'] is None and pdfonly is True): | |
print("No PDF found..") | |
print(best_loc) | |
return None | |
else: | |
return best_loc['url'] | |
return best_loc['url_for_pdf'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment