Skip to content

Instantly share code, notes, and snippets.

@MarcinKosinski
Created May 3, 2016 22:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MarcinKosinski/d197195d849977d9d16eb3ec68425fb9 to your computer and use it in GitHub Desktop.
Save MarcinKosinski/d197195d849977d9d16eb3ec68425fb9 to your computer and use it in GitHub Desktop.
library(stringi)
library(stringr)
library(rvest)
aktualne_oferty <- function(link) {
linki<-read_html(link) %>%
html_nodes('.href-link') %>%
html_attr('href') %>%
paste0('http://www.gumtree.pl', .)
#usunięcie trzech pierwszych ofert, które są sponsorowane i z każdym odświeżeniem ulegają zmianie
linki<-linki[-c(1,2,3)]
return(linki)
}
library(pbapply)
linki <- paste('http://www.gumtree.pl/s-pokoje-do-wynajecia/warszawa/v1c9000l3200008p',1:liczba_stron,sep="")
adresy<-c(pbsapply(linki,aktualne_oferty))
adresy[1] -> x
read_html(x, encoding = "UTF-8") -> web
# data dodania
web %>%
html_nodes("li:nth-child(1) .value") %>%
html_text() %>%
str_replace_all("[\n\t]","") -> data_dodania
if(length(data_dodania)==0) data_dodania<-""
data_dodania<-as.Date(data_dodania,format="%d/%m/%Y")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment