Skip to content

Instantly share code, notes, and snippets.

@wilspi
Created March 17, 2018 05:16
Show Gist options
  • Save wilspi/919fa1603e92947767f2a86f9a73c218 to your computer and use it in GitHub Desktop.
Save wilspi/919fa1603e92947767f2a86f9a73c218 to your computer and use it in GitHub Desktop.
Get Quotes from BrainyQuote.com
#!/usr/bin/python
from selenium import webdriver
from bs4 import BeautifulSoup
def getPage(url):
# open with chromedriver
browser = webdriver.Chrome()
browser.get(url)
html = browser.page_source
browser.quit()
return html
def main():
topics = []
topics_url = 'http://www.brainyquote.com/quotes/topics.html'
html = getPage(topics_url)
soup = BeautifulSoup(html, 'html.parser')
for foo in soup.findAll("div", {"class": "bqLn"}):
try:
topics.append({'url': foo.find('a').get('href', None), 'name': foo.find('a').get_text()})
except AttributeError:
pass
for i, topic in enumerate(topics):
if (topic['url'][0:14]=="/quotes/topics"):
print ""
print topic['name']
print topic['url']
print "################"
else:
topics.pop(i)
for i, topic in enumerate(topics):
if (i%2==1 and i==1):
html = getPage("http://www.brainyquote.com"+topic['url'])
sp = BeautifulSoup(html, 'html.parser')
topic['quotes']=[]
for foo in sp.findAll("span", {"class": "bqQuoteLink"}):
try:
topic['quotes'].append(foo.find('a').get_text())
except AttributeError:
pass
print topic
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment