Skip to content

Instantly share code, notes, and snippets.

@liuyigh
Last active June 14, 2018 16:49
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save liuyigh/1dc809424ff9db6cd80d30ed95abaa10 to your computer and use it in GitHub Desktop.
Save liuyigh/1dc809424ff9db6cd80d30ed95abaa10 to your computer and use it in GitHub Desktop.
Trialert: Notify You by eMail When Clinical Trial Updates
index name ctID lastUpdate
0 DV281 NCT03326752 November 1, 2017
1 SD101 NCT02521870 December 7, 2017
2 AZD1419 NCT02898662 December 21, 2017
3 SD101+MK1966 NCT02731742 December 11, 2017
import lxml.html as lh
import requests #, sqlite
import pandas as pd
import sys, traceback
# use csv for now; sqlite in the future
trialDB = pd.read_csv('trialDB.csv', index_col=0)
# initialize email
email = 'Trialert:<br>'
# notify script error
def notifyException():
exc_type, exc_value, exc_traceback = sys.exc_info()
lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
pre = ''.join('!> ' + line for line in lines)
pre = pre.replace('>','&gt;')
pre = pre.replace('<','&lt;')
html = '<pre>'+pre+'</pre>'
# post message with mailgun api
# revise this according to your mailgun setup
requests.post(
"https://api.mailgun.net/v3/<your_domain>/messages",
auth=("api", "<api_key>"),
data={"from": "<your sending email>",
"to": ["<receiving email>"],
"subject": "trilert script error",
"html": html})
# scrape clinicaltrials.gov
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}
try:
for row in trialDB.itertuples():
r = requests.get('https://clinicaltrials.gov/ct2/show/{}'.format(row.ctID),stream=True, headers=hdr)
html = lh.fromstring(r.content)
lastUpdate = html.xpath("//span[@data-term='Last Update Posted']/../text()")[1][3:] # or .strip('\n: ')
if lastUpdate != row.lastUpdate:
email += '===========<br>Trial '+ row.name +' updated on '+ lastUpdate +' :<br>https://clinicaltrials.gov/ct2/show/{}<br>'.format(row.ctID)
trialDB.loc[row.Index, 'lastUpdate'] = lastUpdate
except:
notifyException()
trialDB.to_csv('trialDB.csv')
## if updated, add link to email; if email != init, send email as alert; update CSV with new info.
## uncomment the next line to use it in a command line
# print(email)
if email !='Trialert:<br>':
requests.post(
"https://api.mailgun.net/v3/<your_domain>/messages",
auth=("api", "<api_key>"),
data={"from": "<your sending email>",
"to": ["<receiving email>"],
"subject": "Clinical Trial Alert",
"html": email})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment