Last active
October 15, 2018 00:04
-
-
Save Abrownn/ceddb6a743ceb5c5d1e9f8a6ade05c50 to your computer and use it in GitHub Desktop.
RSS Feed bot for /r/Amazon
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/r/Amazon RSS feed news-bot for mod /u/Dequeued | |
import feedparser | |
import time | |
from datetime import datetime | |
import praw | |
feed = feedparser.parse("https://www.google.com/alerts/feeds/08942335556751662828/6437780261999102241") | |
feed_title = feed['feed']['title'] | |
feed_entries = feed.entries | |
def botLogin(): | |
redditInstance = praw.Reddit(username = "", | |
password = "", | |
client_id = "", | |
client_secret = "", | |
user_agent = "") | |
return redditInstance | |
r = botLogin() | |
badTitles = ("trump", "authoritarian", "capitalist", "sanders", "democrat", "republican", "democrats", "republicans", "billionaire", "communist", "socialst", "refugee", "astroturf", "stamps", "attacks", "cashiers", "conservative", "liberal", "facebook", "giant", "globalism", "homless", "human", "immigrant", "greed", "inequality", "oppress", "terror", "underpay", "underpaid", "oligarch", "suffer", "union", "welfare", "whitehouse", "scandal") | |
masterWhiteList = ('arstechnica.com', 'cnet.com','consumerist.com', 'discover.com', 'economist.com','hbr.org','marketwatch.com', 'clarkhoward.com', 'krebsonsecurity.com', 'biggerpockets.com', 'bloomberg.com', 'bostonglobe.com', 'budgetbytes.com', 'businessinsider.com', 'businesswire.com', 'cbslocal.com', 'cbsnews.com', 'chicagotribune.com', 'cnbc.com', 'cnn.com', 'fool.com', 'forbes.com', 'fortune.com', 'foxbusiness.com', 'geekwire.com', 'independent.co.uk', 'inquisitr.com', 'kiplinger.com', 'kiro7.com', 'latimes.com', 'moneysavingexpert.com', 'nbcnews.com', 'nolo.com', 'npr.org', 'nytimes.com', 'quora.com', 'seattlepi.com', 'seattletimes.com', 'slate.com', 'snopes.com', 'techcrunch.com', 'telegraph.co.uk', 'theatlantic.com', 'theguardian.com', 'theintercept.com', 'thestar.com', 'thestarpress.com', 'thesundaytimes.co.uk', 'theverge.com', 'time.com', 'usatoday.com', 'usnews.com', 'variety.com', 'venturebeat.com', 'vice.com','washingtonpost.com', 'wsj.com', 'yahoo.com', 'zdnet.com', 'cnn.com','gizmodo.com','theonion.com', 'slate.com', 'huffingtonpost.com') | |
whiteListBest = ('arstechnica.com', 'cnet.com','consumerist.com', 'discover.com', 'economist.com','hbr.org','marketwatch.com', 'clarkhoward.com', 'krebsonsecurity.com') | |
whiteListOkay = ('biggerpockets.com', 'bloomberg.com', 'bostonglobe.com', 'budgetbytes.com', 'businessinsider.com', 'businesswire.com', 'cbslocal.com', 'cbsnews.com', 'chicagotribune.com', 'cnbc.com', 'cnn.com', 'fool.com', 'forbes.com', 'fortune.com', 'foxbusiness.com', 'geekwire.com', 'independent.co.uk', 'inquisitr.com', 'kiplinger.com', 'kiro7.com', 'latimes.com', 'moneysavingexpert.com', 'nbcnews.com', 'nolo.com', 'npr.org', 'nytimes.com', 'quora.com', 'seattlepi.com', 'seattletimes.com', 'slate.com', 'snopes.com', 'techcrunch.com', 'telegraph.co.uk', 'theatlantic.com', 'theguardian.com', 'theintercept.com', 'thestar.com', 'thestarpress.com', 'thesundaytimes.co.uk', 'theverge.com', 'time.com', 'usatoday.com', 'usnews.com', 'variety.com', 'venturebeat.com', 'vice.com', 'washingtonpost.com', 'wsj.com', 'yahoo.com', 'zdnet.com') | |
whiteListMeh = ('cnn.com','gizmodo.com','theonion.com',"slate.com", 'huffingtonpost.com') | |
targetSub = r.subreddit("Amazon") | |
postedIDs = [] #meant to accumulate RSS-feed entry IDs to prevent duplicate posting if an AMP or mobile site throws off site comparison | |
def scanAndPost(r): | |
posted = False | |
bestArticles = [] | |
okayArticles = [] | |
badArticles = [] | |
lastTwo = [] | |
for post in r.redditor("AmazonNewsBot").submissions.new(limit=4): #finds out the last 4 sites posted so that a unique 5th site will be posted to keep the feed varied | |
lastTwo.append(str(post.domain)) | |
for entry in feed.entries: | |
postID = entry.id | |
postID = postID.lstrip("tag:google.com,2013:googlealerts/feed:") | |
domain = entry.link[42:] | |
domain = domain.split("/") | |
domain = domain[2] | |
domain = domain.lstrip("www.") | |
if domain not in masterWhiteList: | |
continue | |
if postID in postedIDs: | |
continue | |
if domain in lastTwo: | |
continue | |
if domain in whiteListBest: | |
bestArticles.append(domain) | |
elif domain in whiteListOkay: | |
okayArticles.append(domain) | |
else: | |
badArticles.append(domain) | |
if len(bestArticles) == 0: | |
pass | |
else: | |
for site in bestArticles: | |
site = site | |
for entry in feed.entries: | |
domain = entry.link[42:] | |
domain = domain.split("/") | |
domain = domain[2] | |
domain = domain.lstrip("www.") | |
if site == domain: | |
link = entry.link[42:] | |
title = entry.title | |
formatting(link, title, postID) | |
print("Posting from Good list") | |
return | |
if len(okayArticles) == 0: | |
pass | |
else: | |
for site in okayArticles: | |
site = site | |
for entry in feed.entries: | |
domain = entry.link[42:] | |
domain = domain.split("/") | |
domain = domain[2] | |
domain = domain.lstrip("www.") | |
if site == domain: | |
link = entry.link[42:] | |
title = entry.title | |
formatting(link, title, postID) | |
print("Posting from Okay list") | |
return | |
if len(badArticles) == 0: | |
print("Wow, zero good articles...") | |
else: | |
for site in badArticles: | |
site = site | |
for entry in feed.entries: | |
domain = entry.link[42:] | |
domain = domain.split("/") | |
domain = domain[2] | |
domain = domain.lstrip("www.") | |
if site == domain: | |
link = entry.link[42:] | |
title = entry.title | |
formatting(link, title, postID) | |
print("Posting from Meh list") | |
return | |
def formatting(link, title, postID): | |
link = link.rstrip("%") | |
link = link.split("&") | |
link = link[0] | |
title = title.replace("</b>","") | |
title = title.replace("<b>","") | |
title = title.replace("'", "'") | |
titleSplit = title.split(" ") | |
for word in titleSplit: | |
if word.lower() in badTitles: | |
continue | |
print("Title: " + title) | |
print("Link: " + link) | |
targetSub.submit(title, url = link) | |
postedIDs.append(postID) | |
while True: | |
try: | |
scanAndPost(r) | |
print("Sleeping") | |
time.sleep(16400) | |
except Exception as e: | |
print(str(e)) | |
exit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment