library(tidyverse)
library(tidytext)
library(RColorBrewer)
library(wordcloud)
#install_github("romainfrancois/dplyr5000")
library(dplyr5000)
#tokenization
tib_words <- dplyr5000 %>%
select(user, branch, commit_msg) %>%
unnest_tokens(output = "word",
input = commit_msg,
token = "words")
#remove stop_words
tib_words_signifiant <- tib_words %>%
anti_join(stop_words, by=c("word", "word"))
#frequency
freq_words <- tib_words_signifiant %>%
group_by(word) %>%
summarise(freq=n()) %>%
arrange(-freq) %>%
filter(freq > 25)
#wordcloud
pal <- brewer.pal(9,"Set1")
pal <- pal[-6]
wordcloud(freq_words$word,
freq_words$freq,
min.freq=2,
colors = pal,
random.order = FALSE)
Created on 2018-10-13 by the reprex package (v0.2.1)