Created
January 9, 2023 09:35
-
-
Save mstimberg/9e2ae6db58ec31a325c12602dd6aee23 to your computer and use it in GitHub Desktop.
Add openaccess links to Neuromatch Academy further reading lists (via unpaywall API)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import re | |
import requests | |
doi_link = r'(doi: \[.*\]\(https://doi.org/([a-zA-Z0-9/._\-()]+)\))' | |
unpaywall_api = 'https://api.unpaywall.org/v2/' | |
email = 'email@example.com' # ← EMAIL | |
def gather_doi_info(fname): | |
""" | |
Find all DOIs in file `fname` and query unpwayall about open access links. | |
Write all infos to a JSON file. | |
""" | |
with open(fname) as f: | |
content = f.readlines() | |
literature_info = {} | |
for line in content: | |
match = re.search(doi_link, line) | |
if match: | |
full = match.group(1) | |
doi = match.group(2) | |
print('doi: ', doi) | |
r = requests.get(unpaywall_api + doi, params={'email': email}) | |
if r.status_code == requests.codes.ok: | |
article_info = {} | |
article = r.json() | |
if not article['is_oa']: | |
# Unpaywall does not know of any OA version | |
pass | |
elif (article['best_oa_location']['url'].lower() == 'https://doi.org/'+doi.lower() or | |
article['best_oa_location']['url_for_landing_page'].lower() == 'https://doi.org/'+doi.lower()): | |
# already OA | |
article_info['open'] = True | |
else: | |
article_info['open'] = False | |
best_oa_url = article['best_oa_location']['url'] | |
if article['best_oa_location']['version'] == 'submittedVersion': | |
article_info['type'] = 'preprint' | |
elif article['best_oa_location']['version'] in ['publishedVersion', 'acceptedVersion']: | |
article_info['type'] = 'postprint' | |
else: | |
print('*** UNKNOWN TYPE ***', article['best_oa_location']['version']) | |
article_info['type'] = 'unknown' | |
article_info['open_url'] = best_oa_url | |
literature_info[doi] = article_info | |
else: | |
print('Request returned', r.status_code) | |
with open(fname[:-3] + '.json', 'wt') as f: | |
json.dump(literature_info, f) | |
def write_badges_urls(fname): | |
""" | |
For each DOI in `fname`: If the DOI already points to an open access link, add the ``{{ open_access }}`` short code. | |
If not, add ``{{ closed_access }}``, and add an addition open access link if possible. | |
""" | |
with open(fname) as f: | |
content = f.readlines() | |
with open(fname[:-3] + '.json') as f: | |
literature_info = json.load(f) | |
new_lines = [] | |
for line in content: | |
match = re.search(doi_link, line) | |
if not match: | |
new_lines.append(line) | |
else: | |
full = match.group(1) | |
doi = match.group(2) | |
print('doi: ', doi) | |
if len(literature_info[doi]) == 0: | |
new_line = line.replace(full, full + ' {{ closed_access }}') | |
elif literature_info[doi].get('open', False): | |
new_line = line.replace(full, full + ' {{ open_access }}') | |
else: | |
oa_type = literature_info[doi]['type'] | |
best_oa_url = literature_info[doi]['open_url'] | |
best_oa_readable_url = best_oa_url.replace('https://','').replace('http://', '') | |
new_line = line.replace(full, full + ' {{ closed_access }} ' + | |
'(' + oa_type + ': ['+ best_oa_readable_url + '](' + | |
best_oa_url + ') {{ open_access }})') | |
new_lines.append(new_line) | |
with open(fname, 'wt') as f: | |
f.writelines(new_lines) | |
for root, _, fnames in os.walk('tutorials'): | |
for fname in fnames: | |
if fname == 'further_reading.md': | |
full_fname = os.path.join(root, fname) | |
print(full_fname) | |
if not os.path.exists(full_fname[:-3] + '.json'): | |
gather_doi_info(full_fname) | |
else: | |
print('updating links...') | |
write_badges_urls(full_fname) | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment