Skip to content

Instantly share code, notes, and snippets.

@shinysu
Created August 10, 2020 23:31
Show Gist options
  • Save shinysu/1a5a91dd3bb21910c9a1bb1291815081 to your computer and use it in GitHub Desktop.
Save shinysu/1a5a91dd3bb21910c9a1bb1291815081 to your computer and use it in GitHub Desktop.
to extract questions from quora
import csv
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
TOTAL_PAGES = 100
CHROME_DRIVER_PATH = '/Users/shinysuresh/Documents/chromedriver-5'
url = 'https://www.quora.com/search?q=engineering+colleges+hostel+Chennai'
OUTPUT_FILE = '/Users/shinysuresh/Projects/StudentQuestions/quora_questions.csv'
def write_csv_file(rows):
with open(OUTPUT_FILE, 'a', newline='') as file:
writer = csv.writer(file)
for row in rows:
writer.writerow([row])
def scrap_page(url):
no_of_pages = TOTAL_PAGES
browser = webdriver.Chrome(executable_path=CHROME_DRIVER_PATH)
browser.get(url)
html = browser.find_element_by_tag_name('html')
while no_of_pages:
html.send_keys(Keys.PAGE_DOWN)
time.sleep(0.5)
no_of_pages -= 1
all_ans = browser.find_elements_by_xpath("//span[@class='ui_qtext_rendered_qtext']")
questions = [s.text for s in all_ans if s.text]
print(questions)
write_csv_file(questions)
scrap_page(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment