Created
September 18, 2017 19:39
-
-
Save mick001/a24e63a4244cd018c2e61310194b512e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!usr/bin/python3 | |
""" | |
Created on Mon Sep 4 15:06:35 2017 | |
@author: Michy | |
""" | |
import os | |
import praw | |
import logging | |
import argparse | |
import config_data | |
from datetime import datetime | |
from prawcore import NotFound | |
VERSION = '1.0' | |
def find_relevant_posts(reddit_obj, subreddit_name, keyword, limit=50, flag='new'): | |
# This function looks for relevant posts in a given subreddit using the supplied | |
# keywords. | |
# | |
# Params: | |
# @reddit_obj: a Reddit instance. | |
# @subreddit_name: name of the subreddit to be searched (string) | |
# @keyword: keyword to be used for the search (string) | |
# @limit: maximum number of posts searched (integer). | |
# @flag: Reddit's posts flag (string). | |
# | |
# Returns a tuple of two lists, titles and urls containing the titles and | |
# the urls of the relevant posts, respectively. | |
# | |
subreddit = reddit.subreddit(subreddit_name) | |
if flag == 'new': | |
new_submissions = subreddit.new(limit=limit) | |
elif flag == 'rising': | |
new_submissions = subreddit.rising(limit=limit) | |
elif flag == 'controversial': | |
new_submissions = subreddit.controversial(limit=limit) | |
elif flag == 'top': | |
new_submissions = subreddit.top(limit=limit) | |
else: | |
new_submissions = subreddit.new(limit=limit) | |
urls = [] | |
titles = [] | |
for submission in new_submissions: | |
if not submission.stickied: | |
if keyword in submission.title.lower() or keyword in submission.selftext.lower(): | |
urls.append(submission.url) | |
titles.append(submission.title) | |
return titles, urls | |
def find_relevant_posts_wider(reddit_obj, subreddit_names, keywords, limit=50, flag='new'): | |
# This function looks for relevant posts in each subreddit supplied using the | |
# keywords supplied in the keywords argument. | |
# | |
# Params: | |
# @reddit_obj: a Reddit instance. | |
# @subreddit_names: names of the subreddit to be searched (list of strings) | |
# @keywords: keywords to be used for the search (list of string) | |
# @limit: maximum number of posts searched (integer). | |
# @flag: Reddit's posts flag (string). | |
# | |
# Returns a tuple of two lists, titles_wider and urls_wider containing the | |
# titles and the urls of the relevant posts, respectively. | |
# | |
titles_wider = [] | |
urls_wider = [] | |
for subreddit in subreddit_names: | |
for keyword in keywords: | |
titles, urls = find_relevant_posts(reddit_obj, subreddit, keyword, limit, flag) | |
for t, u in zip(titles, urls): | |
titles_wider.append(t) | |
urls_wider.append(u) | |
return titles_wider, urls_wider | |
def save_findings(titles, urls, filename): | |
# This function saves the results of the search. | |
# | |
# Params: | |
# @titles: titles of the posts (list of strings). | |
# @urls: urls of the posts (list of strings). | |
# @filename: name of the file to save (string). | |
# | |
# Returns void. | |
# | |
filename = os.path.join(os.getcwd(), filename) | |
if os.path.exists(filename): | |
mode = 'a' | |
else: | |
mode = 'w' | |
with open(filename, mode) as f: | |
for t, u in zip(titles, urls): | |
f.write('\n'.join([t, u])) | |
f.write('\n\n') | |
print("Search results saved in {}".format(filename)) | |
def check_subreddit_exists(reddit, subreddit): | |
# This function checks if a subreddit exists. | |
# | |
# Params: | |
# @reddit: a Reddit instance. | |
# @subreddit: subreddit to be checked (string). | |
# | |
# Returns: True if the subreddit exists, false otherwise. | |
# | |
exists = True | |
try: | |
reddit.subreddits.search_by_name(subreddit, exact=True) | |
except NotFound: | |
exists = False | |
return exists | |
def check_limit_range(limit): | |
# This function checks that the limit parameter is in the 1-500 range. | |
# If limit is not within the selected range, an ArgumentTypeError is raised. | |
# | |
# Params: | |
# @limit: limit to be checked (integer) | |
# | |
# Returns: limit | |
# | |
limit = int(limit) | |
if limit <= 0 or limit > 500: | |
raise argparse.ArgumentTypeError("{} is not a valid value".format(limit)) | |
return limit | |
def setup_argparser(): | |
# This function sets up the argument parser. | |
# | |
# Returns the arguments | |
# | |
parser = argparse.ArgumentParser(description='Reddit Browsing Bot version {}'.format(VERSION)) | |
parser.add_argument('-s','--subreddits', type=str, required=True, help='Subreddits to look into.') | |
parser.add_argument('-k', '--keywords', type=str, required=True, help='Keywords to search for.') | |
parser.add_argument('-l', '--limit', type=check_limit_range, default=50, help='Maximum number of searches. Must be included in the range 1 - 500') | |
parser.add_argument('-f', '--flag', type=str, default='new', choices=['new', 'rising', 'controversial', 'top'], help='Reddit flags.') | |
parser.add_argument('-o', '--output', type=str, help='Output file name.') | |
parser.add_argument('-v', '--verbose', action='store_true', help='Be verbose? Prints output if flag is set.') | |
args = parser.parse_args() | |
return args | |
def setup_logger(): | |
# This function sets up the logger. | |
# | |
# Returns logger. | |
# | |
logging.basicConfig(filename='reddit_bot_log.log', level=logging.DEBUG) | |
logger = logging.getLogger(name='Reddit Browsing Bot V. {}'.format(VERSION)) | |
return logger | |
# Main | |
if __name__ == '__main__': | |
# Setup argument parser | |
args = setup_argparser() | |
# Initialize logger | |
logger = setup_logger() | |
# Retrieve arguments | |
subreddits = args.subreddits | |
keywords = args.keywords | |
limit = args.limit | |
flag = args.flag | |
filename = args.output | |
verbose = args.verbose | |
# Initialize reddit instance | |
reddit = praw.Reddit(client_id = config_data.client_id, | |
client_secret = config_data.client_secret, | |
username = config_data.username, | |
password = config_data.password, | |
user_agent = 'Reading bot looking for hot topics') | |
logger.log(logging.INFO, "Reddit instance initiated.") | |
# Check if every subreddits exist. Ignore those that do not exist | |
subreddits = [sub.lower() for sub in subreddits if check_subreddit_exists(reddit, sub.lower())] | |
# Check that length of keywords is > 1. Ignore keywords whose length is < 1 | |
keywords = [key.lower() for key in keywords if len(key) > 1] | |
print("Subreddits searched: {} \nKeywords used {}\n\n".format(subreddits, keywords)) | |
# Start search | |
logger.log(logging.INFO, | |
"Started search for {} in {} at {}".format(keywords, | |
subreddits, | |
datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) | |
titles, urls = find_relevant_posts_wider(reddit, subreddits, keywords, limit, flag) | |
logger.log(logging.INFO, "Search ended.") | |
# Save findings if a filename has been provided. | |
if filename is not None: | |
logger.log(logging.INFO, "Saving data.") | |
save_findings(titles, urls, filename) | |
# If the program needs to be verbose or if filename has not been provided, | |
# print output to the console | |
if verbose or filename is None: | |
for t, u in zip(titles, urls): | |
print(t, u, sep='\n', end='\n\n') | |
# Main ended | |
logger.log(logging.INFO, "Main executon ended successfully.") | |
print("\n\nExiting....") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment