Skip to content

Instantly share code, notes, and snippets.

@cecilesauder
Created October 12, 2018 23:15
Show Gist options
  • Save cecilesauder/09cfd32f11c0f3ad257ec6bcc6b66a6e to your computer and use it in GitHub Desktop.
Save cecilesauder/09cfd32f11c0f3ad257ec6bcc6b66a6e to your computer and use it in GitHub Desktop.
Wordcloud of #dplyr5000 commits
library(tidyverse)
library(tidytext)
library(RColorBrewer)
library(wordcloud)
#install_github("romainfrancois/dplyr5000")
library(dplyr5000)

#tokenization 
tib_words <- dplyr5000 %>%
  select(user, branch, commit_msg) %>%
  unnest_tokens(output = "word",
                input = commit_msg,
                token = "words")


#remove stop_words

tib_words_signifiant <- tib_words %>%
  anti_join(stop_words, by=c("word", "word"))


#frequency

freq_words <- tib_words_signifiant %>%
  group_by(word) %>%
  summarise(freq=n()) %>%
  arrange(-freq) %>%
  filter(freq > 25)

#wordcloud

pal <- brewer.pal(9,"Set1")
pal <- pal[-6]

wordcloud(freq_words$word,
          freq_words$freq,
          min.freq=2, 
          colors = pal,
          random.order = FALSE)

Created on 2018-10-13 by the reprex package (v0.2.1)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment