Skip to content

Instantly share code, notes, and snippets.

@adibenc
Created December 18, 2020 11:01
Show Gist options
  • Save adibenc/2cbc8f9431c789b598f4d10061db4358 to your computer and use it in GitHub Desktop.
Save adibenc/2cbc8f9431c789b598f4d10061db4358 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
url = "https://news.ycombinator.com/"
upvoted = "upvoted?id=your.user.id&p="
fmt = "hn{}.html"
iframe = """<iframe src="{}" title="description" height="1500" width="800"></iframe>"""
cookies = {
"user":"your cookie"
}
def getUpvoted(page = 0):
ru = url + upvoted + str(page)
r = requests.get(ru, cookies=cookies)
return r
def parse(res):
soup = BeautifulSoup(res, 'html.parser')
trs = soup.find_all('tr')
return trs
def doGet(n=32):
for i in range(1,n):
r = getUpvoted(i)
try:
print r.text
open(fmt.format(str(i)), "w").write(r.text.encode('utf-8'))
except:
print i,"fail"
# trs = parse(r.text)
# print [[t.text,t.find_all('a')] for t in trs]
def doMerge(n=32):
allget = ""
for i in range(1,n):
fname = fmt.format(str(i))
r = open(fname, "r").read()
allget += iframe.format(fname)+"<br>"
open("alliframe.html", "w").write(allget)
doMerge(32)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment