Skip to content

Instantly share code, notes, and snippets.

@leobarone
Created August 31, 2016 23:35
Show Gist options
  • Save leobarone/b078fbce5bad320ae3b038b48180ea80 to your computer and use it in GitHub Desktop.
Save leobarone/b078fbce5bad320ae3b038b48180ea80 to your computer and use it in GitHub Desktop.
Nuvem de Palavras - Pronunciamento TV Temer 31/08/2016
#Primeiramente: Fora Temer
library(XML)
library(tm)
library(SnowballC)
library(wordcloud)
url <- "http://noticias.uol.com.br/politica/ultimas-noticias/2016/08/31/em-pronunciamento-a-tv-temer-defende-reformas-da-previdencia-e-trabalhista.htm"
pagina <- xmlRoot(htmlParse(readLines(url)))
texto_paragrafos <- xpathSApply(pagina, "//div[@id = 'texto']/p", xmlValue)
texto <- c()
for (i in 9:length(texto_paragrafos)){
texto <- paste0(texto, texto_paragrafos[i])
}
dir.create("temer_pos_golpe")
writeLines(texto, "~/temer_pos_golpe/dilma_senado.txt")
ponteCorpus <- VCorpus(DirSource("~/temer_pos_golpe"), readerControl = list(language = "por"))
inspect(ponteCorpus)
ponteCorpus <- tm_map(ponteCorpus, stripWhitespace)
ponteCorpus <- tm_map(ponteCorpus, content_transformer(tolower))
ponteCorpus <- tm_map(ponteCorpus, removeWords, stopwords("portuguese"))
ponteCorpus <- tm_map(ponteCorpus, removePunctuation)
ponteCorpus <- tm_map(ponteCorpus, removeNumbers)
wordcloud(ponteCorpus, max.words = 100, random.order = FALSE, min.freq = 2)
#End of code: Fora Temer
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment