Skip to content

Instantly share code, notes, and snippets.

@KevOrr
Created September 8, 2014 00:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save KevOrr/19dc0d4d75e9201742f6 to your computer and use it in GitHub Desktop.
Save KevOrr/19dc0d4d75e9201742f6 to your computer and use it in GitHub Desktop.
import time, urllib, sys
from html.parser import HTMLParser
from requests.exceptions import HTTPError
import praw, flask
from flask import Flask, make_response, render_template, url_for
from bs4 import BeautifulSoup as BS
LIMIT = 300 # Limit the number of comments fetched in `search`.
app = flask.Flask(__name__)
r = praw.Reddit('Snoogle Comment Searcher v0.1 by elaifiknow')
@app.route('/')
def main():
response = app.send_static_file('index.html')
response.headers['content'] = 'text/html; charset=utf-8'
return response
@app.route('/search')
def search():
last = time.time()
if 'username' not in flask.request.args or 'keywords' not in flask.request.args:
return flask.redirect('/')
else:
username = flask.request.args['username']
keywords = flask.request.args['keywords'].split()
try:
# Get redditor
user = r.get_redditor(username)
except HTTPError:
# Redditor likely doesn't exist
user = {'name': username,
'_url': 'http://www.reddit.com/u/' + username
}
last = time.time()
soup = BS(render_template('search.html', user=user))
times = render_times(time.time() - last)
else:
# Get comments
comments = user.get_comments(limit=LIMIT)
time_fetching, last = time.time() - last, time.time()
# Start search
count, results = do_search(comments, keywords)
time_searching= time.time() - last
# Sort results
results.sort(key=lambda item: item[0], reverse=True)
# Start rendering
last = time.time()
html = render_template('search.html', results=results)
time_rendering = time.time() - last
soup = BS(html)
times = render_times(time_fetching, time_searching, time_rendering, count=count)
# Add times, prettify, and serve
soup.body.append(BS(times))
response = make_response(soup.prettify())
response.headers['content'] = 'text/html; charset=utf-8'
return response
def do_search(comments, keywords):
parser = HTMLParser()
results = []
parity = 1
count = 0
for comment in comments:
count += 1
parity = not parity
relevancy = 0
for keyword in keywords:
relevancy += comment.body.lower().count(keyword.lower())
if relevancy:
results.append((relevancy, comment,
parser.unescape(comment.body_html),
get_parent(comment), ['even', 'odd'][parity]))
return count, results
def get_parent(comment):
if isinstance(comment, praw.objects.Comment):
if comment.is_root:
# Parent is the post, not another comment
return comment.submission
else:
# Parent is another comment
return r.get_info(thing_id=comment.parent_id)
elif isinstance(comment, praw.objects.Submission):
e = TypeError('Submissions don\'t have parents!')
raise e
else:
_name = comment.__class__.__name__
if comment.__class__.__name__ not in __builtins__:
_name = comment.__class__.__module__ + _name
e = TypeError(('comment must be of type praw.objects.Comment, not %s.' \
% _name) \
+'If you are using your own class, please inherit from praw.objects.Comment')
raise e
def render_times(*times, count=None):
if len(times) == 3:
# Redditor exists, show all times
if count is None:
e = TypeError('Must specify comment count')
raise e
text = ('<div class="search-times">\n'
+'<pre>Spent {: >7.3f} seconds fetching {count} (limit {limit}) comments</pre>\n'
+'<pre>Spent {: >7.3f} seconds searching results</pre>\n'
+'<pre>Spent {: >7.3f} seconds rendering template</pre>\n'
+'<hr class="search-times-hr" />'
+'<pre>Spent {total: >7.3f} seconds total</pre>\n'
+'</div>')
return text.format(*times, count=count, limit=LIMIT, total=sum(times))
elif len(times) == 1:
# Redditor does not exist, only show rendering time
text = ('<div class="search-times">\n'
+'<pre>Spent {: >7.3f} seconds rendering template</pre>\n'
+'</div>')
return text.format(*times)
else:
# I'm not really sure how you got here, but it's here just in case
return ''
if __name__ == '__main__':
if 'debug' in sys.argv:
app.run('127.0.0.1', debug=True)
else:
app.run(debug=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment