Skip to content

Instantly share code, notes, and snippets.

@sleibrock
Created August 2, 2016 20:00
Show Gist options
  • Save sleibrock/d7d5393c4250bf9e95fd59afa555a3b8 to your computer and use it in GitHub Desktop.
Save sleibrock/d7d5393c4250bf9e95fd59afa555a3b8 to your computer and use it in GitHub Desktop.
Search "Who's Hiring" HackerNews threads
#!/usr/bin/env python
#-*- coding: utf-8 -*-
"""
Scraping "Who's Hiring" Posts on Hackernews
Requires: BeautifulSoup4, Requests, Python 3
"""
from sys import argv
from requests import get as re_get
from bs4 import BeautifulSoup as BS
DEF_OUTPUT = "output.html"
HELP_MSG = """
Search HN "Who's Hiring" and export it to HTML file
Return comments if they match any search terms ie:
the search term is inside of a comment
Usage:
program.py <URL> <search_terms> [output] --and
* <URL> is the target URL to scan for
* <search_terms> is a comma-separated list of strings to look for
- [output] is an optional to redirect output to a target file
Ex:
program.py https://news.ycombinator.com/item?id=12202865 "Python,New York"
program.py https://news.ycombinator.com/item?id=12345678 "New York"
"""
def main(url, search_terms, output=DEF_OUTPUT):
"""
Main function to flip through all HN comments and yield
comments based on whether or not `search_term in comment`
"""
terms = search_terms.split(",")
bs = BS(re_get(url).text, 'html.parser')
comments = bs.find_all('tr', class_=["athing", "comtr"])
matches = [c for c in comments if SEARCH_FUN([st for st in terms if st in c.text])]
print("Comment size: {}".format(len(comments)))
print("Matches size: {}".format(len(matches)))
with open(output, 'w') as f:
for c in matches:
f.write(str(c))
f.write("\n")
pass
if __name__ == "__main__":
argv.pop(0) # pop off filename
if len(argv) < 2:
print(HELP_MSG)
quit()
try:
main(*argv)
except Exception as e:
print("Error: {}".format(e))
# end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment