Skip to content

Instantly share code, notes, and snippets.

@bhpayne
Created December 27, 2020 00:45
Show Gist options
  • Save bhpayne/3190cbeb666cd773c89e629dea31d438 to your computer and use it in GitHub Desktop.
Save bhpayne/3190cbeb666cd773c89e629dea31d438 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
The arXiv RSS (https://arxiv.org/help/rss) is separate from
the arXiv API (https://arxiv.org/help/api/index)
The API supports /query
The RSS provides XML for a given topic
https://arxiv.org/help/robots
"""
# python3 rss_filter_email.py
import xmltodict
import datetime
import json
import requests
import os
#import smtplib
with open('.env','r') as fil:
api_key = fil.read().strip()
#print(api_key)
r = requests.get('https://export.arxiv.org/rss/quant-ph')
rss_as_rdf = r.text
rss_as_dict = xmltodict.parse(rss_as_rdf)
str_to_send = "search of arxiv RSS feed: \n\n"
if 'rdf:RDF' in rss_as_dict.keys():
if 'item' in rss_as_dict['rdf:RDF'].keys():
for entry_dict in rss_as_dict['rdf:RDF']['item']:
# for keyword_tuple in [('keyword1','keyword2')]:
if 'title' in entry_dict.keys():
if ((('keyword1' in entry_dict['title'].lower()) and ('keyword2' in entry_dict['title'].lower())) or
('keyword3' in entry_dict['title'].lower())):
str_to_send += ('found match for '+'keyword1 keyword2'+' in title for ' + entry_dict['link'] + "\n")
str_to_send += ("TITLE: " + entry_dict['title'] + '\n')
if 'description' in entry_dict.keys() and '#text' in entry_dict['description']:
str_to_send += ("ABSTRACT: " + entry_dict['description']['#text'] + '\n\n')
break
else:
str_to_send += "'title' is not in RSS item"
if 'description' in entry_dict.keys() and '#text' in entry_dict['description']:
if ((('keyword1' in entry_dict['description']['#text'].lower()) and
('keyword2' in entry_dict['description']['#text'].lower())) or
('keyword3' in entry_dict['description']['#text'].lower())):
str_to_send += ('found match for '+'keyword1 keyword2'+' in abstract for ' + entry_dict['link'] + "\n")
str_to_send += ("TITLE: " + entry_dict['title'] + '\n')
str_to_send += ("ABSTRACT: " + entry_dict['description']['#text'] + '\n\n')
break
else:
str_to_send += "'description' or '#text' not found in RSS item"
if 'dc:creator' in entry_dict.keys():
list_of_authors = entry_dict['dc:creator'].replace('a>, <a','a>\n<a').split('\n')
for author_url in list_of_authors:
for name_tuple in [('John','Doe'),
('John','Smith')]:
if (name_tuple[0] in author_url) and (name_tuple[1] in author_url):
str_to_send += ('found '+name_tuple[0]+' '+name_tuple[1]+' as author for ' + entry_dict['link'] + "\n")
str_to_send += ("TITLE: " + entry_dict['title'] + '\n')
str_to_send += ("ABSTRACT: " + entry_dict['description']['#text'] + '\n\n')
break
else:
str_to_send += "'dc:creator' not found in RSS item"
else:
str_to_send += "'item' not in RSS"
else:
str_to_send += "'rdf:RDF' not in RSS"
# DigitalOcean blocks port 25, so this won't work:
# # http://effbot.org/pyfaq/how-do-i-send-mail-from-a-python-script.htm
#server = smtplib.SMTP("localhost")
#server.sendmail("ben", ["ben@gmail.com"], "hello")
# I set up a SendMail account, so this works:
# curl --request POST --url https://api.sendgrid.com/v3/mail/send --header "Authorization: Bearer $SENDGRID_API_KEY" --header 'Content-Type: application/json' --data '{"personalizations": [{"to": [{"email": "ben@gmail.com"}]}],"from": {"email": "ben@gmail.com"},"subject": "Sending with SendGrid is Fun","content": [{"type": "text/plain", "value": "and easy to do anywhere, even with cURL"}]}'
# using the site https://curl.trillworks.com/#python
# I translated the above command into
headers = {
# 'Authorization': 'Bearer ' + os.environ.get('SENDGRID_API_KEY'),
'Authorization': 'Bearer ' + api_key,
'Content-Type': 'application/json',
}
#print(headers)
if len(str_to_send)<30:
# str_to_send = "no matches to existing filters for today"
subject = "no matches to report on "
else:
subject = "keyword match for arxiv quant-ph on "
data_dict = {"personalizations":
[{"to": [{"email": "ben@gmail.com"}, #}]}],
{"email": "another@uni.edu"}]}],
"from": {"email": "ben@gmail.com"},
"subject": subject + str(datetime.date.today()),
"content": [{"type": "text/plain", "value": str_to_send}]}
# send the Email as a POST
data_dict_no_email = {"personalizations":
[{"to": [{"email": "ben@gmail.com"}]}],
"from": {"email": "ben@gmail.com"},
"subject": subject + str(datetime.date.today()),
"content": [{"type": "text/plain", "value": str_to_send}]}
#if not str_to_send=="no matches to existing filters for today":
# response = requests.post('https://api.sendgrid.com/v3/mail/send',
# headers=headers,
# data=json.dumps(data_dict))
# if the POST to Sendmail is successful, nothing is produced
# if len(response.text)>0:
# print(response.text)
#else:
response = requests.post('https://api.sendgrid.com/v3/mail/send',
headers=headers,
data=json.dumps(data_dict_no_email))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment