Skip to content

Instantly share code, notes, and snippets.

@kraravind
Created March 31, 2016 21:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kraravind/43feb4963136b0d95770fb9628dcf9a3 to your computer and use it in GitHub Desktop.
Save kraravind/43feb4963136b0d95770fb9628dcf9a3 to your computer and use it in GitHub Desktop.
#Author - Aravind Kr
import time
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import unicodedata
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
driver = webdriver.Firefox()
driver.get('http://xyz.com/for-rent/nyc')
#i=2
#while i > 0: # while loop code
name = []
rent= []
type1= []
url=[]
# This portion scrapes the rental listing pages one by one and colelcts details of rent, name and Urls for 18000+ listings
for i in range(1587):
delay = 5 # seconds
try:
wait = WebDriverWait(driver, 5)
wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'next')))
print "Page is ready!"
# WebDriverWait(driver, delay).until(EC.presence_of_element_located(driver.find_element_by_class_name('next')))
html_doc = driver.page_source
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment