Skip to content

Instantly share code, notes, and snippets.

@mstimberg
Created January 9, 2023 09:35
Show Gist options
  • Save mstimberg/9e2ae6db58ec31a325c12602dd6aee23 to your computer and use it in GitHub Desktop.
Save mstimberg/9e2ae6db58ec31a325c12602dd6aee23 to your computer and use it in GitHub Desktop.
Add openaccess links to Neuromatch Academy further reading lists (via unpaywall API)
import json
import os
import re
import requests
doi_link = r'(doi: \[.*\]\(https://doi.org/([a-zA-Z0-9/._\-()]+)\))'
unpaywall_api = 'https://api.unpaywall.org/v2/'
email = 'email@example.com' # ← EMAIL
def gather_doi_info(fname):
"""
Find all DOIs in file `fname` and query unpwayall about open access links.
Write all infos to a JSON file.
"""
with open(fname) as f:
content = f.readlines()
literature_info = {}
for line in content:
match = re.search(doi_link, line)
if match:
full = match.group(1)
doi = match.group(2)
print('doi: ', doi)
r = requests.get(unpaywall_api + doi, params={'email': email})
if r.status_code == requests.codes.ok:
article_info = {}
article = r.json()
if not article['is_oa']:
# Unpaywall does not know of any OA version
pass
elif (article['best_oa_location']['url'].lower() == 'https://doi.org/'+doi.lower() or
article['best_oa_location']['url_for_landing_page'].lower() == 'https://doi.org/'+doi.lower()):
# already OA
article_info['open'] = True
else:
article_info['open'] = False
best_oa_url = article['best_oa_location']['url']
if article['best_oa_location']['version'] == 'submittedVersion':
article_info['type'] = 'preprint'
elif article['best_oa_location']['version'] in ['publishedVersion', 'acceptedVersion']:
article_info['type'] = 'postprint'
else:
print('*** UNKNOWN TYPE ***', article['best_oa_location']['version'])
article_info['type'] = 'unknown'
article_info['open_url'] = best_oa_url
literature_info[doi] = article_info
else:
print('Request returned', r.status_code)
with open(fname[:-3] + '.json', 'wt') as f:
json.dump(literature_info, f)
def write_badges_urls(fname):
"""
For each DOI in `fname`: If the DOI already points to an open access link, add the ``{{ open_access }}`` short code.
If not, add ``{{ closed_access }}``, and add an addition open access link if possible.
"""
with open(fname) as f:
content = f.readlines()
with open(fname[:-3] + '.json') as f:
literature_info = json.load(f)
new_lines = []
for line in content:
match = re.search(doi_link, line)
if not match:
new_lines.append(line)
else:
full = match.group(1)
doi = match.group(2)
print('doi: ', doi)
if len(literature_info[doi]) == 0:
new_line = line.replace(full, full + ' {{ closed_access }}')
elif literature_info[doi].get('open', False):
new_line = line.replace(full, full + ' {{ open_access }}')
else:
oa_type = literature_info[doi]['type']
best_oa_url = literature_info[doi]['open_url']
best_oa_readable_url = best_oa_url.replace('https://','').replace('http://', '')
new_line = line.replace(full, full + ' {{ closed_access }} ' +
'(' + oa_type + ': ['+ best_oa_readable_url + '](' +
best_oa_url + ') {{ open_access }})')
new_lines.append(new_line)
with open(fname, 'wt') as f:
f.writelines(new_lines)
for root, _, fnames in os.walk('tutorials'):
for fname in fnames:
if fname == 'further_reading.md':
full_fname = os.path.join(root, fname)
print(full_fname)
if not os.path.exists(full_fname[:-3] + '.json'):
gather_doi_info(full_fname)
else:
print('updating links...')
write_badges_urls(full_fname)
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment