This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A funcao readPDF depende da instalacao de pdftotext no seu computador. | |
# No Windowns - fazer download do xpdf em http://www.foolabs.com/xpdf/download.html | |
# e deixar os arquivos executaveis no working directory | |
# No Linux (Ubuntu) - vc fez a escolha certa para a vida e nao precisa fazer nada. | |
# No MAC - nao sei. | |
# install.packages("tm") | |
library(tm) | |
pastaIn <- "Caminho da pasta onde estao os pdfs" | |
pastaOut <- "Caminho da pasta para onde irao os txts" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(bRasilLegis) | |
obterVotacoes <- function(tipo, numero, ano) { | |
output <- data.frame() | |
for (i in 1:length(tipo)){ | |
if (tipo[i] == "REQ"){ | |
print("Requerimento - pular") | |
} | |
else{ | |
print(paste(i,"- Capturando votacao da proposicao", tipo[i], numero[i], "de", ano[i])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
getTse<-function(link){ | |
# Cria um nome temporario que o arquivo baixado recebera | |
pasta.temporaria = file.path(getwd(), "/temp_folder") | |
dir.create(pasta.temporaria) | |
nome.temporario = file.path(pasta.temporaria, "temp") | |
# Faz o donwload do link e armazena arquivo temporario | |
download.file(link, nome.temporario, quiet = T) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tm) | |
library(SnowballC) | |
library(wordcloud) | |
getwd() | |
pdfToText <- function(arquivo){ | |
texto <- readPDF(control = list(text = "-layout"))(elem = list(uri = arquivo), | |
language = "pt", id = "id1") | |
texto <- as.character(texto) | |
return(texto) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(XML) | |
library(tm) | |
library(SnowballC) | |
library(wordcloud) | |
url <- "http://www2.planalto.gov.br/acompanhe-o-planalto/discursos/discurso-do-presidente-da-republica-michel-temer-durante-cerimonia-de-posse-dos-novos-ministros-de-estado-palacio-do-planalto" | |
pagina <- xmlRoot(htmlParse(readLines(url))) | |
texto <- xpathSApply(pagina, "//div[@id = 'parent-fieldname-text']", xmlValue) | |
dir.create("posse_interino") | |
writeLines(texto, "~/posse_interino/posse_interino.txt") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tm) | |
library(SnowballC) | |
library(wordcloud) | |
getwd() | |
pdfToText <- function(arquivo){ | |
texto <- readPDF(control = list(text = "-layout"))(elem = list(uri = arquivo), | |
language = "pt", id = "id1") | |
texto <- as.character(texto) | |
return(texto) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(XML) | |
library(tm) | |
library(SnowballC) | |
library(wordcloud) | |
url <- "http://jota.uol.com.br/o-discurso-de-dilma-rousseff-no-julgamento-impeachment-no-senado" | |
pagina <- xmlRoot(htmlParse(readLines(url))) | |
texto_paragrafos <- xpathSApply(pagina, "//div[@class = 'first-letter']/p", xmlValue) | |
texto <- c() | |
for (i in 1:length(texto_paragrafos)){ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(XML) | |
library(tm) | |
library(SnowballC) | |
library(wordcloud) | |
url <- "http://www.otempo.com.br/capa/pol%C3%ADtica/leia-na-%C3%ADntegra-o-discurso-de-dilma-ap%C3%B3s-aprova%C3%A7%C3%A3o-do-impeachment-1.1363896" | |
pagina <- xmlRoot(htmlParse(readLines(url))) | |
texto_paragrafos <- xpathSApply(pagina, "//span[@class = 'texto-artigo']/p", xmlValue) | |
texto <- c() | |
for (i in 1:length(texto_paragrafos)){ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Primeiramente: Fora Temer | |
library(XML) | |
library(tm) | |
library(SnowballC) | |
library(wordcloud) | |
url <- "http://noticias.uol.com.br/politica/ultimas-noticias/2016/08/31/em-pronunciamento-a-tv-temer-defende-reformas-da-previdencia-e-trabalhista.htm" | |
pagina <- xmlRoot(htmlParse(readLines(url))) | |
texto_paragrafos <- xpathSApply(pagina, "//div[@id = 'texto']/p", xmlValue) | |
texto <- c() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rm(list=ls()) | |
library(XML) | |
urlTSE <- "http://www.tse.jus.br/hotSites/pesquisas-eleitorais/eleitorado_anos/perfil_eleitorado_anos/2016.html" | |
pagina <- xmlRoot(htmlParse(readLines(urlTSE))) | |
links.zip <- xpathSApply(pagina, "//a[@target = '_blank']", xmlGetAttr, name = 'href') | |
dir.create("~/perfil_eleitorado") | |
pasta <- "~/perfil_eleitorado/" | |
setwd(pasta) |
OlderNewer