Created
December 27, 2020 00:45
-
-
Save bhpayne/3190cbeb666cd773c89e629dea31d438 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
The arXiv RSS (https://arxiv.org/help/rss) is separate from | |
the arXiv API (https://arxiv.org/help/api/index) | |
The API supports /query | |
The RSS provides XML for a given topic | |
https://arxiv.org/help/robots | |
""" | |
# python3 rss_filter_email.py | |
import xmltodict | |
import datetime | |
import json | |
import requests | |
import os | |
#import smtplib | |
with open('.env','r') as fil: | |
api_key = fil.read().strip() | |
#print(api_key) | |
r = requests.get('https://export.arxiv.org/rss/quant-ph') | |
rss_as_rdf = r.text | |
rss_as_dict = xmltodict.parse(rss_as_rdf) | |
str_to_send = "search of arxiv RSS feed: \n\n" | |
if 'rdf:RDF' in rss_as_dict.keys(): | |
if 'item' in rss_as_dict['rdf:RDF'].keys(): | |
for entry_dict in rss_as_dict['rdf:RDF']['item']: | |
# for keyword_tuple in [('keyword1','keyword2')]: | |
if 'title' in entry_dict.keys(): | |
if ((('keyword1' in entry_dict['title'].lower()) and ('keyword2' in entry_dict['title'].lower())) or | |
('keyword3' in entry_dict['title'].lower())): | |
str_to_send += ('found match for '+'keyword1 keyword2'+' in title for ' + entry_dict['link'] + "\n") | |
str_to_send += ("TITLE: " + entry_dict['title'] + '\n') | |
if 'description' in entry_dict.keys() and '#text' in entry_dict['description']: | |
str_to_send += ("ABSTRACT: " + entry_dict['description']['#text'] + '\n\n') | |
break | |
else: | |
str_to_send += "'title' is not in RSS item" | |
if 'description' in entry_dict.keys() and '#text' in entry_dict['description']: | |
if ((('keyword1' in entry_dict['description']['#text'].lower()) and | |
('keyword2' in entry_dict['description']['#text'].lower())) or | |
('keyword3' in entry_dict['description']['#text'].lower())): | |
str_to_send += ('found match for '+'keyword1 keyword2'+' in abstract for ' + entry_dict['link'] + "\n") | |
str_to_send += ("TITLE: " + entry_dict['title'] + '\n') | |
str_to_send += ("ABSTRACT: " + entry_dict['description']['#text'] + '\n\n') | |
break | |
else: | |
str_to_send += "'description' or '#text' not found in RSS item" | |
if 'dc:creator' in entry_dict.keys(): | |
list_of_authors = entry_dict['dc:creator'].replace('a>, <a','a>\n<a').split('\n') | |
for author_url in list_of_authors: | |
for name_tuple in [('John','Doe'), | |
('John','Smith')]: | |
if (name_tuple[0] in author_url) and (name_tuple[1] in author_url): | |
str_to_send += ('found '+name_tuple[0]+' '+name_tuple[1]+' as author for ' + entry_dict['link'] + "\n") | |
str_to_send += ("TITLE: " + entry_dict['title'] + '\n') | |
str_to_send += ("ABSTRACT: " + entry_dict['description']['#text'] + '\n\n') | |
break | |
else: | |
str_to_send += "'dc:creator' not found in RSS item" | |
else: | |
str_to_send += "'item' not in RSS" | |
else: | |
str_to_send += "'rdf:RDF' not in RSS" | |
# DigitalOcean blocks port 25, so this won't work: | |
# # http://effbot.org/pyfaq/how-do-i-send-mail-from-a-python-script.htm | |
#server = smtplib.SMTP("localhost") | |
#server.sendmail("ben", ["ben@gmail.com"], "hello") | |
# I set up a SendMail account, so this works: | |
# curl --request POST --url https://api.sendgrid.com/v3/mail/send --header "Authorization: Bearer $SENDGRID_API_KEY" --header 'Content-Type: application/json' --data '{"personalizations": [{"to": [{"email": "ben@gmail.com"}]}],"from": {"email": "ben@gmail.com"},"subject": "Sending with SendGrid is Fun","content": [{"type": "text/plain", "value": "and easy to do anywhere, even with cURL"}]}' | |
# using the site https://curl.trillworks.com/#python | |
# I translated the above command into | |
headers = { | |
# 'Authorization': 'Bearer ' + os.environ.get('SENDGRID_API_KEY'), | |
'Authorization': 'Bearer ' + api_key, | |
'Content-Type': 'application/json', | |
} | |
#print(headers) | |
if len(str_to_send)<30: | |
# str_to_send = "no matches to existing filters for today" | |
subject = "no matches to report on " | |
else: | |
subject = "keyword match for arxiv quant-ph on " | |
data_dict = {"personalizations": | |
[{"to": [{"email": "ben@gmail.com"}, #}]}], | |
{"email": "another@uni.edu"}]}], | |
"from": {"email": "ben@gmail.com"}, | |
"subject": subject + str(datetime.date.today()), | |
"content": [{"type": "text/plain", "value": str_to_send}]} | |
# send the Email as a POST | |
data_dict_no_email = {"personalizations": | |
[{"to": [{"email": "ben@gmail.com"}]}], | |
"from": {"email": "ben@gmail.com"}, | |
"subject": subject + str(datetime.date.today()), | |
"content": [{"type": "text/plain", "value": str_to_send}]} | |
#if not str_to_send=="no matches to existing filters for today": | |
# response = requests.post('https://api.sendgrid.com/v3/mail/send', | |
# headers=headers, | |
# data=json.dumps(data_dict)) | |
# if the POST to Sendmail is successful, nothing is produced | |
# if len(response.text)>0: | |
# print(response.text) | |
#else: | |
response = requests.post('https://api.sendgrid.com/v3/mail/send', | |
headers=headers, | |
data=json.dumps(data_dict_no_email)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment