Skip to content

Instantly share code, notes, and snippets.

@francoisstamant
Last active May 28, 2020 15:30
Show Gist options
  • Save francoisstamant/43e70b42253ea37219172ae90100783b to your computer and use it in GitHub Desktop.
Save francoisstamant/43e70b42253ea37219172ae90100783b to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from time import sleep
from random import randint
###########################
# GET STARTING DATAFRAME
###########################
#Loop to go over all pages
pages = np.arange(1, 80, 1)
data=[]
for page in pages:
page=requests.get("https://www.kijiji.ca/b-appartement-condo/ville-de-montreal/" +str(page)+/
"/c37l1700281?radius=10.0&ad=offering&address=Montr%C3%A9al%2C+QC+H2W+1S8&ll=45.503905,-73.570856")
soup = BeautifulSoup(page.text, 'html.parser')
my_table = soup.find_all(class_=['price', 'distance', 'details'])
sleep(randint(2,10))
for tag in my_table:
data.append(tag.text.strip())
#Creating columns
location = data[1::3]
size = data[2::3]
price = data[0::3]
df=pd.DataFrame()
df['size'] = size
df['distance_center'] = location
df['price'] = price
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment