Skip to content

Instantly share code, notes, and snippets.

@maxhawkins
Last active November 10, 2017 20:17
Show Gist options
  • Save maxhawkins/7128380393aef64b5f66dbea92944431 to your computer and use it in GitHub Desktop.
Save maxhawkins/7128380393aef64b5f66dbea92944431 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import html
import random
import requests
import sys
def get_abstract(abstract_id):
'''Fetch an abstract by id from pubmed's JSON API'''
abstract_id = str(abstract_id)
url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&rettype=abstract&id=' + abstract_id
resp = requests.get(url)
resp.raise_for_status()
data = resp.json()
results = data['result']
item = results[abstract_id]
error = item.get('error', None)
if error == "cannot get document summary":
return None
if error != None:
raise StandardError(error)
return item
def find_latest_abstract():
'''Do a binary search to find the most recently published paper id'''
first, last = 29121706, 35000000
if get_abstract(last):
raise StandardError(
'find_latest_abstract: far-future abstract %d unexpectedly exists' % last)
while first <= last:
midpoint = int((first + last) / 2)
if get_abstract(midpoint):
first = midpoint + 1
else:
last = midpoint - 1
return first - 1
def get_random_abstract(min_id, max_id):
'''Downloads a random abstract in the range, skipping deleted abstracts'''
while True:
selected_id = random.randint(min_id, max_id)
abstract = get_abstract(selected_id)
if abstract:
return abstract
def compose_tweet(abstract):
'''Turn a pubmed abstract into tweet text'''
title = html.unescape(abstract['title'])
url = 'https://www.ncbi.nlm.nih.gov/pubmed/%s' % abstract['uid']
tweet_length = 280
gap = "\n\n"
ellipsis = "…"
title_len = tweet_length - len(url) - len(gap)
if len(title) > title_len:
title = title[:title_len - len(ellipsis)] + ellipsis
return title + gap + url
def main():
if (sys.version_info < (3, 0)):
print('python 3 is required to run this script')
return
earliest_id = 25635855 # a paper from roughly 10 yrs go
latest_id = find_latest_abstract() # the latest paper
abstract = get_random_abstract(earliest_id, latest_id)
tweet = compose_tweet(abstract)
print(tweet)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment