Skip to content

Instantly share code, notes, and snippets.

@HereIsJade
Last active April 30, 2017 19:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save HereIsJade/77e6630ac5f9f79489e855f066a19ee2 to your computer and use it in GitHub Desktop.
Save HereIsJade/77e6630ac5f9f79489e855f066a19ee2 to your computer and use it in GitHub Desktop.
RWET Final Project: Decent Dialogues
import requests
from bs4 import BeautifulSoup
import sys
from selenium import webdriver #open webdriver for specific browser
from selenium.webdriver.common.keys import Keys # for necessary browser action
from selenium.webdriver.common.by import By # For selecting html code
import time
import os.path
def getQuotes(driver,quotes):
quoteLinks=driver.find_elements_by_class_name("b-qt");
for i in range(len(quoteLinks)):
quote=quoteLinks[i].text
# print quote
quotes.append(quote)
def getFilename(nameStr):
return "txt/"+nameStr.lower().replace(" ","_")+".txt"
def scrape(nameStr):
if(os.path.exists(getFilename(nameStr))):
print "File already exists"
else:
url="https://www.brainyquote.com/quotes/authors/"+getFilename(nameStr)[4]+"/"+getFilename(nameStr)[4:-4]+".html"
driver = webdriver.Firefox()
driver.get(url)
for i in range(0,20):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(1)
quotes=[]
getQuotes(driver,quotes)
txtFile = open(getFilename(nameStr), 'w')
for item in quotes:
txtFile.write("%s\n" % item)
return getFilename(nameStr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment