Skip to content

Instantly share code, notes, and snippets.

pagedata = requests.get("https://news.ycombinator.com/")
python -m pip install --upgrade pip
pip install pypiwin32
pip install requests bs4 selenium
pip install requests bs4 selenium
# You may need to add --user to the end, depending on your system
jsresults = {list} [['https://techcrunch.com/2019/03/03/facebook-phone-number-look-up/', 'https://twitter.com/rqou_/status/1101331385632022528', 'https://qmlbook.github.io/', 'https://retrobitch.wordpress.com/2019/02/12/pac-man-the-untold-story-of-how-we-really-played-the-ga
00 = {list} ['https://techcrunch.com/2019/03/03/facebook-phone-number-look-up/', 'https://twitter.com/rqou_/status/1101331385632022528', 'https://qmlbook.github.io/', 'https://retrobitch.wordpress.com/2019/02/12/pac-man-the-untold-story-of-how-we-really-played-the-gam
01 = {list} ['https://twitter.com/rqou_/status/1101331385632022528', 'kpcyrd']
02 = {list} ['https://qmlbook.github.io/', 'swang']
03 = {list} ['https://retrobitch.wordpress.com/2019/02/12/pac-man-the-untold-story-of-how-we-really-played-the-game/', 'mattigames']
04 = {list} ['https://github.com/jakevdp/PythonDataScienceHandbook', 'wespiser_2018']
05 = {list} ['https://arxiv.org/abs/1902.07254', 'fulafel']
06 = {list} ['https://www.wbez.org/shows/wbez-news/the-middle-
results = {list} [['https://ourworldindata.org/the-link-between-life-expectancy-and-health-spending-us-focus', 'chrismeller'], ['https://www.sqlite.org/json1.html', 'Sean1708'], ['https://github.com/Microsoft/nni', 'No commenters'], ['https://github.com/pugwonk/gif2xlsx/bl
00 = {list} ['https://ourworldindata.org/the-link-between-life-expectancy-and-health-spending-us-focus', 'chrismeller']
01 = {list} ['https://www.sqlite.org/json1.html', 'Sean1708']
02 = {list} ['https://github.com/Microsoft/nni', 'No commenters']
03 = {list} ['https://github.com/pugwonk/gif2xlsx/blob/master/README.md', 'cosmie']
04 = {list} ['https://www.universityofcalifornia.edu/press-room/uc-terminates-subscriptions-worlds-largest-scientific-publisher-push-open-access-publicly', 'pwthornton']
05 = {list} ['https://github.com/triska/lisprolog', 'jasim']
06 = {list} ['https://adnauseam.io/', 'Spare_account']
07 = {list} ['item?id=19274941', 'zrail']
08 = {list} ['https://www.nytimes.com/2019/02/24/business/china-pig-technology-fa
threadIDs = {list} ['19279396', '19277809', '19279003', '19278075', '19273955', '19278555', '19278936', '19274941', '19277846', '19277653', '19274406', '19278891', '19278302', '19276113', '19277263', '19276977', '19277978', '19275755', '19276751', '19277910', '19275738', '19
00 = {str} '19279396'
01 = {str} '19277809'
02 = {str} '19279003'
03 = {str} '19278075'
04 = {str} '19273955'
05 = {str} '19278555'
06 = {str} '19278936'
07 = {str} '19274941'
08 = {str} '19277846'
threadIDs = {list} ['19279396', '19279396', '19279396', '19279396', '19277809', '19277809', '19277809', '19277809', '19279003', '19279003', '19279003', '19279003', '19278075', '19278075', '19278075', '19278075', '19273955', '19273955', '19273955', '19273955', '19278555', '19
000 = {str} '19279396'
001 = {str} '19279396'
002 = {str} '19279396'
003 = {str} '19279396'
004 = {str} '19277809'
005 = {str} '19277809'
006 = {str} '19277809'
007 = {str} '19277809'
008 = {str} '19279003'
>> pagedata
<Response [200]>
def jsscrapethread(jscleanthread):
jssinglethreadlinksearch = re.compile(r'\<a class="title" href="(.+?)"')
jssinglethreadlink = jssinglethreadlinksearch.findall(str(jscleanthread))
jscommenterIDsearch = re.compile(r'#\/user\/(.+?)"')
jscommenterIDs = jscommenterIDsearch.findall(str(jscleanthread))
try:
jsfirstcommenter = jscommenterIDs[1]
except:
jsfirstcommenter = "No Commenters"
return jssinglethreadlink, jsfirstcommenter