Skip to content

Instantly share code, notes, and snippets.

@sillasgonzaga
Created January 11, 2019 20:42
Show Gist options
  • Save sillasgonzaga/c5b2ff5ea9c0c7f6149988493298385c to your computer and use it in GitHub Desktop.
Save sillasgonzaga/c5b2ff5ea9c0c7f6149988493298385c to your computer and use it in GitHub Desktop.
library(rvest)
library(stringr)
url_zap <- 'https://www.zapimoveis.com.br/aluguel/apartamentos/sp+sao-paulo/?__zt=ad:a'
xml2::read_html("https://www.zapimoveis.com.br/")
html_obj <- httr::GET(url_zap) %>%
read_html()
# ult pagina
num_paginas <- html_obj %>%
html_nodes(".num-of") %>%
html_text()
num_paginas <- str_extract_all(num_paginas, "[0-9]+")[[1]] %>%
paste0(collapse = "") %>%
as.numeric()
### dados a extrair
# minificha de cada imovel
fichas <- html_obj %>%
html_nodes(".minificha")
# url
# bairro
fichas[1] %>%
html_nodes(".pull-right strong") %>%
html_text() %>%
str_squish()
# logradouro
endereco <- fichas[1] %>%
html_nodes("h2 span") %>%
html_text()
rua <- endereco[1]
cidade <- endereco[2]
uf <- endereco[3]
# preco aluguel
fichas[1] %>%
html_nodes(".preco strong") %>%
html_text() %>%
str_extract_all("\\d+") %>%
unlist() %>%
str_c(collapse = "") %>%
as.numeric()
# iptu + condominio | valor total
fichas[1] %>%
html_nodes(".preco span") %>%
html_text()
# quartos
fichas[1] %>%
html_nodes(".icone-quartos") %>%
html_text() %>%
str_extract_all("\\d+") %>%
unlist() %>%
as.numeric()
# vagas
fichas[1] %>%
html_nodes(".icone-vagas") %>%
html_text() %>%
str_extract_all("\\d+") %>%
unlist() %>%
as.numeric()
# m2
fichas[1] %>%
html_nodes(".icone-area") %>%
html_text() %>%
str_remove("m2") %>%
as.numeric()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment