Skip to content

Instantly share code, notes, and snippets.

@Abrownn
Last active October 15, 2018 00:04
Show Gist options
  • Save Abrownn/ceddb6a743ceb5c5d1e9f8a6ade05c50 to your computer and use it in GitHub Desktop.
Save Abrownn/ceddb6a743ceb5c5d1e9f8a6ade05c50 to your computer and use it in GitHub Desktop.
RSS Feed bot for /r/Amazon
#/r/Amazon RSS feed news-bot for mod /u/Dequeued
import feedparser
import time
from datetime import datetime
import praw
feed = feedparser.parse("https://www.google.com/alerts/feeds/08942335556751662828/6437780261999102241")
feed_title = feed['feed']['title']
feed_entries = feed.entries
def botLogin():
redditInstance = praw.Reddit(username = "",
password = "",
client_id = "",
client_secret = "",
user_agent = "")
return redditInstance
r = botLogin()
badTitles = ("trump", "authoritarian", "capitalist", "sanders", "democrat", "republican", "democrats", "republicans", "billionaire", "communist", "socialst", "refugee", "astroturf", "stamps", "attacks", "cashiers", "conservative", "liberal", "facebook", "giant", "globalism", "homless", "human", "immigrant", "greed", "inequality", "oppress", "terror", "underpay", "underpaid", "oligarch", "suffer", "union", "welfare", "whitehouse", "scandal")
masterWhiteList = ('arstechnica.com', 'cnet.com','consumerist.com', 'discover.com', 'economist.com','hbr.org','marketwatch.com', 'clarkhoward.com', 'krebsonsecurity.com', 'biggerpockets.com', 'bloomberg.com', 'bostonglobe.com', 'budgetbytes.com', 'businessinsider.com', 'businesswire.com', 'cbslocal.com', 'cbsnews.com', 'chicagotribune.com', 'cnbc.com', 'cnn.com', 'fool.com', 'forbes.com', 'fortune.com', 'foxbusiness.com', 'geekwire.com', 'independent.co.uk', 'inquisitr.com', 'kiplinger.com', 'kiro7.com', 'latimes.com', 'moneysavingexpert.com', 'nbcnews.com', 'nolo.com', 'npr.org', 'nytimes.com', 'quora.com', 'seattlepi.com', 'seattletimes.com', 'slate.com', 'snopes.com', 'techcrunch.com', 'telegraph.co.uk', 'theatlantic.com', 'theguardian.com', 'theintercept.com', 'thestar.com', 'thestarpress.com', 'thesundaytimes.co.uk', 'theverge.com', 'time.com', 'usatoday.com', 'usnews.com', 'variety.com', 'venturebeat.com', 'vice.com','washingtonpost.com', 'wsj.com', 'yahoo.com', 'zdnet.com', 'cnn.com','gizmodo.com','theonion.com', 'slate.com', 'huffingtonpost.com')
whiteListBest = ('arstechnica.com', 'cnet.com','consumerist.com', 'discover.com', 'economist.com','hbr.org','marketwatch.com', 'clarkhoward.com', 'krebsonsecurity.com')
whiteListOkay = ('biggerpockets.com', 'bloomberg.com', 'bostonglobe.com', 'budgetbytes.com', 'businessinsider.com', 'businesswire.com', 'cbslocal.com', 'cbsnews.com', 'chicagotribune.com', 'cnbc.com', 'cnn.com', 'fool.com', 'forbes.com', 'fortune.com', 'foxbusiness.com', 'geekwire.com', 'independent.co.uk', 'inquisitr.com', 'kiplinger.com', 'kiro7.com', 'latimes.com', 'moneysavingexpert.com', 'nbcnews.com', 'nolo.com', 'npr.org', 'nytimes.com', 'quora.com', 'seattlepi.com', 'seattletimes.com', 'slate.com', 'snopes.com', 'techcrunch.com', 'telegraph.co.uk', 'theatlantic.com', 'theguardian.com', 'theintercept.com', 'thestar.com', 'thestarpress.com', 'thesundaytimes.co.uk', 'theverge.com', 'time.com', 'usatoday.com', 'usnews.com', 'variety.com', 'venturebeat.com', 'vice.com', 'washingtonpost.com', 'wsj.com', 'yahoo.com', 'zdnet.com')
whiteListMeh = ('cnn.com','gizmodo.com','theonion.com',"slate.com", 'huffingtonpost.com')
targetSub = r.subreddit("Amazon")
postedIDs = [] #meant to accumulate RSS-feed entry IDs to prevent duplicate posting if an AMP or mobile site throws off site comparison
def scanAndPost(r):
posted = False
bestArticles = []
okayArticles = []
badArticles = []
lastTwo = []
for post in r.redditor("AmazonNewsBot").submissions.new(limit=4): #finds out the last 4 sites posted so that a unique 5th site will be posted to keep the feed varied
lastTwo.append(str(post.domain))
for entry in feed.entries:
postID = entry.id
postID = postID.lstrip("tag:google.com,2013:googlealerts/feed:")
domain = entry.link[42:]
domain = domain.split("/")
domain = domain[2]
domain = domain.lstrip("www.")
if domain not in masterWhiteList:
continue
if postID in postedIDs:
continue
if domain in lastTwo:
continue
if domain in whiteListBest:
bestArticles.append(domain)
elif domain in whiteListOkay:
okayArticles.append(domain)
else:
badArticles.append(domain)
if len(bestArticles) == 0:
pass
else:
for site in bestArticles:
site = site
for entry in feed.entries:
domain = entry.link[42:]
domain = domain.split("/")
domain = domain[2]
domain = domain.lstrip("www.")
if site == domain:
link = entry.link[42:]
title = entry.title
formatting(link, title, postID)
print("Posting from Good list")
return
if len(okayArticles) == 0:
pass
else:
for site in okayArticles:
site = site
for entry in feed.entries:
domain = entry.link[42:]
domain = domain.split("/")
domain = domain[2]
domain = domain.lstrip("www.")
if site == domain:
link = entry.link[42:]
title = entry.title
formatting(link, title, postID)
print("Posting from Okay list")
return
if len(badArticles) == 0:
print("Wow, zero good articles...")
else:
for site in badArticles:
site = site
for entry in feed.entries:
domain = entry.link[42:]
domain = domain.split("/")
domain = domain[2]
domain = domain.lstrip("www.")
if site == domain:
link = entry.link[42:]
title = entry.title
formatting(link, title, postID)
print("Posting from Meh list")
return
def formatting(link, title, postID):
link = link.rstrip("%")
link = link.split("&")
link = link[0]
title = title.replace("</b>","")
title = title.replace("<b>","")
title = title.replace("&#39;", "'")
titleSplit = title.split(" ")
for word in titleSplit:
if word.lower() in badTitles:
continue
print("Title: " + title)
print("Link: " + link)
targetSub.submit(title, url = link)
postedIDs.append(postID)
while True:
try:
scanAndPost(r)
print("Sleeping")
time.sleep(16400)
except Exception as e:
print(str(e))
exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment