Skip to content

Instantly share code, notes, and snippets.

@sergiolucero
Created July 17, 2019 03:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sergiolucero/d8ab6f5430c3e0d24e47aa06da8ac4e4 to your computer and use it in GitHub Desktop.
Save sergiolucero/d8ab6f5430c3e0d24e47aa06da8ac4e4 to your computer and use it in GitHub Desktop.
Scraping CNTV
import requests, pandas as pd
from bs4 import BeautifulSoup
CLIO_DEF = 'https://www.yapo.cl/chile/inmuebles?ca=15_s&l=0&q=casa&cmn=&st=a'
def text_search(bs, classname):
return [p0.text.strip()
for p0 in bs.find_all('span', attrs={'class': classname})]
def scrape_yapo(url = CLIO_DEF):
bs = BeautifulSoup(requests.get(url).text,'lxml')
NmaX = 47
precios = text_search(bs, 'price')[:NmaX]
cprecios = text_search(bs, 'convertedPrice')[:NMaX]
regiones = text_search(bs, 'region')[:NMaX]
comunas = text_search(bs, 'commune')[:NMaX]
metros = text_search(bs, 'icons__element-text')
metros = [m for m in metros if 'm2' in m][:len(comunas)] # U CHEAT
descs = [p0.text for p0 in bs.find_all('a', attrs={'class':'title'})]
print([len(x) for x in [precios,cprecios,regiones,comunas,metros]])
df = pd.DataFrame(dict(descripción=descs, region=regiones, comuna=comunas,
precio=precios, UF=cprecios, m2=metros))
df['UF_m2'] = [row['UF'].split(' ')[1].split(',')[0].replace('.','') + '/' +
row['m2'].split(' ')[0]
for _, row in df.iterrows()]
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment