Skip to content

Instantly share code, notes, and snippets.

@sungiven
Created April 24, 2024 18:28
Show Gist options
  • Save sungiven/ff3b61488f44beda0811021567edc018 to your computer and use it in GitHub Desktop.
Save sungiven/ff3b61488f44beda0811021567edc018 to your computer and use it in GitHub Desktop.
Create wordclouds from tweets
library(readxl)
library(dplyr)
library(lubridate)
library(ggplot2)
library(sentimentr)
library(cowplot)
library(RColorBrewer)
library(wordcloud)
library(wordcloud2)
library(tidytext)
library(webshot)
webshot::install_phantomjs()
get_wordclouds <- function(file_name) {
input_file <- paste0("./", file_name, ".xlsx")
output_file <- paste0("./", file_name, "_wordcloud.png")
output_file2 <- paste0("./", file_name, "_wordcloud2.png")
data("stop_words")
MIN_FREQ <- 1
WC_SEED <- 1234
#FILTER_WORD <- 'word to filter'
# Read XLSX file
tweets <- read_excel(input_file)
# text cleaning
tweets$tweetText <- gsub("https\\S*", "", tweets$tweetText)
tweets$tweetText <- gsub("@\\S*", "", tweets$tweetText)
tweets$tweetText <- gsub("amp", "", tweets$tweetText)
tweets$tweetText <- gsub("[\r\n]", "", tweets$tweetText)
tweets$tweetText <- gsub("[[:punct:]]", "", tweets$tweetText)
#tweets$tweetText <- gsub(FILTER_WORD, "", tweets$tweetText, ignore.case = TRUE)
# Create a dataframe containing each word in col 1 and freq in col 2
tweets_words <- tweets |>
filter(!is.na(tweetText)) |>
mutate(tweetText = gsub("https\\S*|@\\S*|amp|[\r\n]|[[:punct:]]", "", tweetText)) |>
unnest_tokens(word, tweetText) |>
anti_join(stop_words, by = "word") |>
group_by(row_number()) |>
summarize(word = paste(word, collapse = " ")) |>
ungroup()
words <- tweets_words |>
count(word, sort=TRUE) |>
filter(n >= MIN_FREQ)
# generate word-cloud
set.seed(WC_SEED)
png(output_file, width = 500, height = 500)
wordcloud(words=words$word,
freq=words$n,
#min.freq = 5,
max.words=500,
random.order=FALSE,
rot.per=0.35,
colors=brewer.pal(8, "Dark2"),
scale=c(4,0.4)
)
dev.off()
# Gives a proposed palette
wc_graph <- wordcloud2(words, size=4.6, color='random-dark')
# A vector of colors. vector must be same length than input data
#wordcloud2(words, size=1.6, color=rep_len( c("green","blue"), nrow(demoFreq) ) )
# Change the background color
#wc_graph <- wordcloud2(words, size=1.6, color='random-light', backgroundColor="black")
# Word orientation
#wc_graph <- wordcloud2(words, size = 2.3, minRotation = -pi/6, maxRotation = -pi/6, rotateRatio = 1)
# Letter or text as shape
#wc_graph <- letterCloud( words, word = "W", color='random-light' , backgroundColor="black")
#letterCloud( words, word = "PEACE", color="white", backgroundColor="pink")
library("htmlwidgets")
Sys.setenv(OPENSSL_CONF="/dev/null")
saveWidget(wc_graph, './github/data-analysis/outputs/tmp.html', selfcontained = F)
webshot("./github/data-analysis/outputs/tmp.html", output_file2, delay=60, vwidth = 1600, vheight=1200)
}
get_wordclouds("iam_Uchenna")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment