Skip to content

Instantly share code, notes, and snippets.

@leobarone
Created August 31, 2016 22:01
Show Gist options
  • Save leobarone/877c786edd05731d07828970ce75afaf to your computer and use it in GitHub Desktop.
Save leobarone/877c786edd05731d07828970ce75afaf to your computer and use it in GitHub Desktop.
Nuvem de Palavras pronunciamento presidenta Dilma 31/08/2016
library(XML)
library(tm)
library(SnowballC)
library(wordcloud)
url <- "http://www.otempo.com.br/capa/pol%C3%ADtica/leia-na-%C3%ADntegra-o-discurso-de-dilma-ap%C3%B3s-aprova%C3%A7%C3%A3o-do-impeachment-1.1363896"
pagina <- xmlRoot(htmlParse(readLines(url)))
texto_paragrafos <- xpathSApply(pagina, "//span[@class = 'texto-artigo']/p", xmlValue)
texto <- c()
for (i in 1:length(texto_paragrafos)){
texto <- paste0(texto, texto_paragrafos[i])
}
dir.create("dilma_pos_golpe")
writeLines(texto, "~/dilma_pos_golpe/dilma_senado.txt")
ponteCorpus <- VCorpus(DirSource("~/dilma_pos_golpe"), readerControl = list(language = "por"))
inspect(ponteCorpus)
ponteCorpus <- tm_map(ponteCorpus, stripWhitespace)
ponteCorpus <- tm_map(ponteCorpus, content_transformer(tolower))
ponteCorpus <- tm_map(ponteCorpus, removeWords, stopwords("portuguese"))
ponteCorpus <- tm_map(ponteCorpus, removePunctuation)
ponteCorpus <- tm_map(ponteCorpus, removeNumbers)
wordcloud(ponteCorpus, max.words = 100, random.order = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment