Skip to content

Instantly share code, notes, and snippets.

@mgrady3
Last active January 9, 2016 18:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mgrady3/dc5dae7968b81aec2931 to your computer and use it in GitHub Desktop.
Save mgrady3/dc5dae7968b81aec2931 to your computer and use it in GitHub Desktop.
NHLStreamCheck
#!/usr/bin/env python
# Check reddit.com/r/nhlstreams for a game thread for a given team
#
# based on:
# https://gist.github.com/cheesinglee/49add283073a9a517771
import argparse
import praw
import time
# adapted from https://gist.github.com/cheesinglee/49add283073a9a517771
# removed extraneous functionality
def process_post(post, keys):
d = {}
postdict = vars(post)
for key in keys:
val = postdict[key]
try:
val = val.lower()
except AttributeError:
pass
d[key] = val
d['has_thumbnail'] = (post.thumbnail != u'default') and (post.thumbnail != u'self')
post.replace_more_comments(limit=None, threshold=0)
comments = post.comments
flat_comments = praw.helpers.flatten_tree(comments)
d['n_comments'] = len(list(flat_comments))
return d
### PARSE COMMAND LINE ARGS
def main():
parser = argparse.ArgumentParser()
helpstr = "Enter an NHL Team Name without location. Example: \'Maple Leafs\' not \'Toronto Maple Leafs.\' "
parser.add_argument("team", help=helpstr)
parser.add_argument("--num", help="Number of posts to scrape")
args = parser.parse_args()
teams = {
"Boston" : "Bruins",
"Buffalo" : "Sabres",
"Detroit" : "Red Wings",
"Florida" : "Panthers",
"Montreal" : "Canadians",
"Ottawa" : "Senators",
"Tampa Bay" : "Lightning",
"Toronto" : "Maple Leafs",
"Carolina" : "Hurrocanes",
"Columbus" : "Blue Jackets",
"New Jersey" : "Devils",
"New York" : "Islanders",
"New York" : "Rangers",
"Philadelphia" : "Flyers",
"Pittsburgh" : "Penguins",
"Washington" : "Capitals",
"Anaheim" : "Ducks",
"Arizona" : "Coyotes",
"Calgary" : "Flames",
"Edmonton" : "Oilers",
"Los Angeles " : "Kings",
"San Jose" : "Sharks",
"Vancouver" : "Canucks",
"Chicago" : "Blackhawks",
"Colorado" : "Avalanche",
"Dallas" : "Stars",
"Minnesota" : "Wild",
"Nashville" : "Predators",
"Saint Louis" : "Blues",
"Winnipeg" : "Jets"
}
# validate command line args
if args.team not in teams.values():
print("Error: Team Not Found")
print("Enter a valid team name")
else:
print("Searching for stream for {0}".format(args.team))
if args.num:
num_posts = args.num
else:
num_posts = 10 # DEFAULT VALUE
print('Number of Posts to scrape: {}'.format(num_posts))
### Scrape Section
POST_KEYS = ['title', 'created_utc'] # reddit post attributes to store; created time currently unused
r = praw.Reddit('Reddit Dataset builder')
ids = []
posts = []
searched = []
urls = []
ts = time.time()
print("Scraping subreddit: {0}".format('nhlstreams'))
sub = r.get_subreddit('nhlstreams')
print('Scraping Posts...')
# Generate list of newest submissions to /r/nhlstreams with length num_posts
for post in sub.get_new(limit=num_posts):
if post.id not in ids:
# print post.title
# if not posts:
# print type(post)
posts.append(process_post(post, POST_KEYS))
urls.append(post.permalink)
ids.append(post.id)
print('Scraped {0} posts'.format(len(posts)))
tf = time.time()
print('Total time elapsed: {0} seconds.'.format(round(tf-ts, 2)))
# Search the scraped posts for thread titles with team name
# This could be wrapped into the scraping to speed up the script
# ie. only store matching posts instead of scraping and storing all posts
# then searching for match after the fact
for idx, post in enumerate(posts):
if args.team in post['title'] or args.team.lower() in post['title']:
searched.append((post['title'], urls[idx]))
### RESULTS
# print('from all scrape:')
# print(posts[0])
# print(urls[0])
# print("\n")
print('After search: ')
if not searched:
print('Failed to find Game Thread for {0} in newest {1} posts at /r/nhlstreams'.format(args.team, num_posts))
else:
print('Found Threads:')
for match in searched:
print("Thread: {0} \n URL: {1}".format(match[0], match[1]))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment