Skip to content

Instantly share code, notes, and snippets.

@Q726kbXuN
Created April 28, 2021 14:49
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Q726kbXuN/15e61acc003bb6d46a458001fedf6829 to your computer and use it in GitHub Desktop.
Save Q726kbXuN/15e61acc003bb6d46a458001fedf6829 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import html
import sys
import json
import urllib.request
import os
import multiprocessing
HACKER_NEWS_USER = None # Need a username to follow
NUM_POSTS_TO_CHECK = 20
SHOW_ACTIVITY = False
class RssDoc:
def __init__(self):
self.page = "<rss><channel>"
self.page += "<title>Todo</title>"
self.page += "<link>http://www.example.com/</link>"
self.page += "<description>Todo</description>"
self.page += "<language>en-us</language>"
self.page += "<copyright></copyright>"
self.page += "<category>News</category>"
self.page += "<ttl>3600</ttl>"
def add_item(self, title, link, desc):
title = html.escape(title)
link = html.escape(link)
desc = desc.replace("]]>", "] ]>")
self.page += f"""
<item>
<title>{title}</title>
<link>{link}</link>
<description><![CDATA[{desc}]]></description>
</item>
"""
def write(self, target):
self.page += "</channel></rss>"
if target == "-":
print(self.page)
else:
with open(target, "w") as f:
f.write(self.page)
def fill_template(values):
return f"""
<table bgcolor="#ff9900" style="width: 100%; margin: 0; padding: 0;">
<tr style="margin: 0; padding: 0;"><td style="font-size: 14px; margin: 0; padding: 0;"></td>
<td style="font-size: 14px; display: block !important; max-width: 600px !important; clear: both !important; margin: 0 auto; padding: 0;">
<div style="max-width: 600px; display: block; margin: 0 auto; padding: 15px; font-weight: bold; text: black">
{values['re_stack']}<a href="https://news.ycombinator.com/item?id={values['story_id']}" style="color: #000">{html.escape(values['story_name'])}</a>
</div>
</td>
<td style="font-size: 14px; margin: 0; padding: 0;"></td>
</tr>
</table>
<table style="width: 100%; margin: 0; padding: 0;">
<tr style="margin: 0; padding: 0;"><td style="font-size: 14px; margin: 0; padding: 0;"></td>
<td bgcolor="#FFFFFF" style="font-size: 14px; display: block !important; max-width: 600px !important; clear: both !important; margin: 0 auto; padding: 0;">
<div style="max-width: 600px; display: block; margin: 0 auto; padding: 15px;">
<table style="width: 100%; margin: 0; padding: 0;">
<tr style="margin: 0; padding: 0;">
<td style="font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; font-size: 14px; margin: 0; padding: 0; line-height: 1.6em; color: black">
<a href="https://news.ycombinator.com/user?id={html.escape(values['poster'])}" style="color: #000">{html.escape(values['poster'])}</a> replied to <a
href="https://news.ycombinator.com/item?id={values['parent_id']}" style="color: #000">your comment</a> [<a
href="https://news.ycombinator.com/item?id={values['post_id']}" style="color: #000">link</a>]:
<br /><br />
{html.escape(values['body'])}
</td>
</tr>
</table>
</div>
</td>
<td style="font-size: 14px; margin: 0; padding: 0;"></td>
</tr>
</table>
"""
def load_item(cache=None, story_id=None, desc=""):
if cache is not None and str(story_id) in cache:
if SHOW_ACTIVITY:
print(f"Cache hit '{desc}'")
cache[str(story_id)]['used'] = True
return cache[str(story_id)]
if SHOW_ACTIVITY:
print(f"Cache MISS '{desc}'")
if story_id is None:
url = "https://hacker-news.firebaseio.com/v0/user/" + HACKER_NEWS_USER + ".json"
else:
url = "https://hacker-news.firebaseio.com/v0/item/" + str(story_id) + ".json"
resp = urllib.request.urlopen(url)
resp = resp.read()
ret = json.loads(resp)
ret['used'] = True
if cache is not None:
cache[str(story_id)] = ret
return ret
def worker(story_id):
return load_item(story_id=story_id, desc="Worker Loader")
def get_comments(dest_name):
cache = {}
if os.path.isfile("helper_hackernews.json"):
with open("helper_hackernews.json", encoding='utf-8') as f:
cache = json.load(f)
for key in cache:
cache[key]['used'] = False
rss = RssDoc()
pool = multiprocessing.Pool(processes=8)
todo = load_item(desc="Get user's activity")["submitted"][:NUM_POSTS_TO_CHECK]
for story in pool.imap_unordered(worker, todo):
for child in story.get("kids", []):
sub_story = load_item(story_id=child, cache=cache, desc="Get a reply")
parent = load_item(story_id=story['parent'], cache=cache, desc="Get a parent")
re_stack = "Re: "
while 'parent' in parent:
parent = load_item(story_id=parent['parent'], cache=cache, desc="Get a parent's parent")
re_stack += "Re: "
post = {
're_stack': re_stack,
'poster': sub_story.get('by', '(none)'),
'post_id': child,
'parent_id': story['id'],
'body': sub_story.get('text', '(deleted)'),
'story_id': parent['id'],
'story_name': parent.get('title', ''),
}
rss.add_item(
f"{html.escape(post['re_stack'])}{html.escape(post['story_name'])}",
f"https://news.ycombinator.com/item?id={post['post_id']}",
fill_template(post),
)
rss.write(dest_name)
cache = {x: y for x, y in cache.items() if y['used']}
with open("helper_hackernews.json", "w", encoding='utf-8', newline='') as f:
json.dump(cache, f, indent=2, sort_keys=True)
if __name__ == "__main__":
if len(sys.argv) == 2:
get_comments(sys.argv[1])
else:
print("Need to specify filename to write RSS results to")
print(" Use '-' to write to stdout")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment