Skip to content

Instantly share code, notes, and snippets.

@AlexSJ
Forked from jtrecenti/universal.R
Created April 13, 2016 18:05
Show Gist options
  • Save AlexSJ/df0bfa69b2e3716f6f69403915f868f8 to your computer and use it in GitHub Desktop.
Save AlexSJ/df0bfa69b2e3716f6f69403915f868f8 to your computer and use it in GitHub Desktop.
scraper igreja universal
library(httr)
library(rvest)
library(dplyr)
library(stringr)
library(tidyr)
universal_pags <- function() {
universal_pag <- function(pag) {
link <- sprintf('http://www.universal.org/enderecos/busca/brasil/todas/%04d/', pag)
r <- GET(link)
item <- function(node) {
end <- node %>% html_node('h4') %>% html_text() %>% str_trim()
link <- node %>% html_node('.btn-url') %>% html_attr('href')
nome <- node %>% html_node('.link-titulo') %>% html_text()
if(length(html_nodes(node, '.tel')) > 0) {
tel <- node %>% html_node('.tel') %>% html_text()
} else {
tel <- ''
}
data_frame(nome = nome, endereco = end, tel = tel, link = link)
}
f <- failwith(data_frame(nome = NA_character_), item)
r %>%
read_html() %>%
html_nodes('li.thumb-box.home') %>%
lapply(f) %>%
bind_rows()
}
pags <- 'http://www.universal.org/enderecos/busca/brasil/' %>%
GET() %>%
read_html() %>%
html_nodes('.pagination li') %>%
html_text() %>% {
suppressWarnings(extract_numeric(.))
} %>%
max(na.rm = TRUE)
ff <- failwith(data_frame(nome = NA_character_), universal_pag)
data_frame(pag = 1:pags) %>%
group_by(pag) %>%
do(ff(.$pag)) %>%
ungroup()
}
d_universal_pags <- universal_pags()
openxlsx::write.xlsx(d_universal_pags, 'd_universal_pags_brasil.xlsx')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment