/main.py

## main.py
import praw
import datetime
import time
import re
import mimetypes
import logging

# Regex which (hopefully) matches the WAYWT titles
waywt_title_pattern = re.compile('^WAYWT - [A-Z][a-z]+ [0-9]+[a-z]+$')

# Regex to extract URLs out of comment bodies
html_link_pattern = re.compile('a href=\"([^\"]+)\"')

def get_current_month_name():
    """
    Returns the current month name as a string.
    """
    return datetime.date.today().strftime("%B")

def get_urls_from_comment(comment):
    """
    Returns a list of all URLs in a comment.
    """
    return re.findall(html_link_pattern, comment.body_html)

def get_url_type(url):
    """
    Tries to guess wether an URL points to an image.
    """
    link_type, link_encoding = mimetypes.guess_type(url)

    if link_type is None:
        return "link"

    return "image" if link_type.startswith("image/") else "link"

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    # Connect to reddit
    reddit = praw.Reddit(user_agent='TopOfWAYWT Collector v0.1')

    # Query to search threads with WAYWT and the current month name in their
    # title
    query = "title:WAYWT and title:{} and author:MFAModerator".format(get_current_month_name())

    # Perform search on /r/malefashionadvice
    posts = reddit.search(query, subreddit="malefashionadvice")

    # This list will contain all the top comments
    comments = []

    # Go through each submission
    for submission in posts:
        # Get difference between submission date and today
        date = datetime.date.fromtimestamp(int(submission.created_utc))
        diff = date.today() - date

        # Not submitted this month? Ignore.
        if diff.days > 31:
            continue

        # Title doesn't match "WAYWT - Month Day"? Ignore.
        if re.match(waywt_title_pattern, submission.title) is None:
            continue

        logging.info("Checking {} posted {} ago...".format(submission.title, diff.days))

        # Check each comment of the submission
        for comment in submission.comments:
            if isinstance(comment, praw.objects.MoreComments):
                continue

            # That's what we're looking for
            if comment.score >= 75:
                comments.append(comment)

        # Reddit says: Make no more than thirty requests per minute, so let's
        # sleep for 2 seconds.
        time.sleep(2)

    logging.info("Found {} comments.".format(len(comments)))

    # Sort comments
    comments.sort(key=lambda comment: comment.score, reverse=True)

    for rank, comment in enumerate(comments, 1):
        urls = get_urls_from_comment(comment)

        if not urls:
            logging.warning("No URLs found in comment {}.".format(comment.permalink))
            continue

        # Print informations about the post: rank, permalink, author and score
        print "{}. [Post]({}) by *{}* (+{})  ".format(rank, comment.permalink, comment.author, comment.score)

        buckets = {
            "link"  : [],
            "image" : [],
        }

        for url in get_urls_from_comment(comment):
            buckets[get_url_type(url)].append(url)

        # Print 4 spaces (actually only 3 because Python prints the 4th) to
        # let MarkDown indent the current line on the list item level.
        print "   ",

        # Print all links by their category
        for key, values in buckets.items():
            if not values:
                continue

            name = key.capitalize()
            for index, url in enumerate(values, 1):
                print "[{} {}]({})".format(name, index, url)
	import praw
	import datetime
	import time
	import re
	import mimetypes
	import logging

	# Regex which (hopefully) matches the WAYWT titles
	waywt_title_pattern = re.compile('^WAYWT - [A-Z][a-z]+ [0-9]+[a-z]+$')

	# Regex to extract URLs out of comment bodies
	html_link_pattern = re.compile('a href=\"([^\"]+)\"')

	def get_current_month_name():
	"""
	Returns the current month name as a string.
	"""
	return datetime.date.today().strftime("%B")

	def get_urls_from_comment(comment):
	"""
	Returns a list of all URLs in a comment.
	"""
	return re.findall(html_link_pattern, comment.body_html)

	def get_url_type(url):
	"""
	Tries to guess wether an URL points to an image.
	"""
	link_type, link_encoding = mimetypes.guess_type(url)

	if link_type is None:
	return "link"

	return "image" if link_type.startswith("image/") else "link"

	if __name__ == "__main__":
	logging.basicConfig(level=logging.INFO)

	# Connect to reddit
	reddit = praw.Reddit(user_agent='TopOfWAYWT Collector v0.1')

	# Query to search threads with WAYWT and the current month name in their
	# title
	query = "title:WAYWT and title:{} and author:MFAModerator".format(get_current_month_name())

	# Perform search on /r/malefashionadvice
	posts = reddit.search(query, subreddit="malefashionadvice")

	# This list will contain all the top comments
	comments = []

	# Go through each submission
	for submission in posts:
	# Get difference between submission date and today
	date = datetime.date.fromtimestamp(int(submission.created_utc))
	diff = date.today() - date

	# Not submitted this month? Ignore.
	if diff.days > 31:
	continue

	# Title doesn't match "WAYWT - Month Day"? Ignore.
	if re.match(waywt_title_pattern, submission.title) is None:
	continue

	logging.info("Checking {} posted {} ago...".format(submission.title, diff.days))

	# Check each comment of the submission
	for comment in submission.comments:
	if isinstance(comment, praw.objects.MoreComments):
	continue

	# That's what we're looking for
	if comment.score >= 75:
	comments.append(comment)

	# Reddit says: Make no more than thirty requests per minute, so let's
	# sleep for 2 seconds.
	time.sleep(2)

	logging.info("Found {} comments.".format(len(comments)))

	# Sort comments
	comments.sort(key=lambda comment: comment.score, reverse=True)

	for rank, comment in enumerate(comments, 1):
	urls = get_urls_from_comment(comment)

	if not urls:
	logging.warning("No URLs found in comment {}.".format(comment.permalink))
	continue

	# Print informations about the post: rank, permalink, author and score
	print "{}. [Post]({}) by {} (+{}) ".format(rank, comment.permalink, comment.author, comment.score)

	buckets = {
	"link" : [],
	"image" : [],
	}

	for url in get_urls_from_comment(comment):
	buckets[get_url_type(url)].append(url)

	# Print 4 spaces (actually only 3 because Python prints the 4th) to
	# let MarkDown indent the current line on the list item level.
	print " ",

	# Print all links by their category
	for key, values in buckets.items():
	if not values:
	continue

	name = key.capitalize()
	for index, url in enumerate(values, 1):
	print "[{} {}]({})".format(name, index, url)