Skip to content

Instantly share code, notes, and snippets.

@DanNi0130
Created February 25, 2019 08:52
Show Gist options
  • Save DanNi0130/251d8e03297b98d1727bcee5cf6846a7 to your computer and use it in GitHub Desktop.
Save DanNi0130/251d8e03297b98d1727bcee5cf6846a7 to your computer and use it in GitHub Desktop.
def jsscrapethread(jscleanthread):
jssinglethreadlinksearch = re.compile(r'\<a class="title" href="(.+?)"')
jssinglethreadlink = jssinglethreadlinksearch.findall(str(jscleanthread))
jscommenterIDsearch = re.compile(r'#\/user\/(.+?)"')
jscommenterIDs = jscommenterIDsearch.findall(str(jscleanthread))
try:
jsfirstcommenter = jscommenterIDs[1]
except:
jsfirstcommenter = "No Commenters"
return jssinglethreadlink, jsfirstcommenter
jsresults = [] # We want our results to come back as a list
for i in range(len(jscommentlinks)):
jsthread = driver.get(jscommentlinks[i]) # Go to each link
jscleanthread = bs4.BeautifulSoup(driver.page_source, 'html.parser')
jslink, jscommenter = jsscrapethread(jscleanthread) # Scrape the data and return them to these variables
jsresults.append(jslink + [jscommenter]) # Append the results - note that the link actually returns as a list, rather than a string
time.sleep(30)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment