Skip to content

Instantly share code, notes, and snippets.

@low-decarie
Created December 10, 2017 20:48
Show Gist options
  • Save low-decarie/fd80316783afe5e6c956120c00d8656e to your computer and use it in GitHub Desktop.
Save low-decarie/fd80316783afe5e6c956120c00d8656e to your computer and use it in GitHub Desktop.
library("tm")
library("SnowballC")
library("wordcloud")
library("RColorBrewer")
#####
# Load ics data using code from
# https://stackoverflow.com/questions/43573982/how-to-import-ical-ics-file-in-r
###
file_path <- file.choose()
x <- readLines(file_path, warn = FALSE)
stopifnot(!any(grepl("^\\s+", x))) # disregarding value fields that have linefeeds for the sake of simplicity
keyval <- do.call(rbind, regmatches(x, regexpr(":", x, fixed = TRUE), invert = TRUE))
keyval <- keyval[which.max(keyval[,1]=="BEGIN" & keyval[,2]=="VEVENT"):tail(which(keyval[,1]=="END" & keyval[,2]=="VEVENT"), 1),]
keyval <- cbind.data.frame(keyval, id=cumsum(keyval[,1]=="BEGIN" & keyval[,2]=="VEVENT"))
df <- reshape(keyval, timevar="1", idvar="id", direction = "wide")
head(df[,c(3,4,9)])
#####
# Create word cloud using code from
# http://www.sthda.com/english/wiki/text-mining-and-word-cloud-fundamentals-in-r-5-simple-steps-you-should-know
####
docs <- Corpus(VectorSource(df[,"2.SUMMARY"]))
toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))
docs <- tm_map(docs, toSpace, "/")
docs <- tm_map(docs, toSpace, "@")
docs <- tm_map(docs, toSpace, "\\|")
# Convert the text to lower case
docs <- tm_map(docs, content_transformer(tolower))
# Remove numbers
docs <- tm_map(docs, removeNumbers)
# Remove english common stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))
# Remove your own stop word
# specify your stopwords as a character vector
docs <- tm_map(docs, removeWords, c("lecture", "plenary","can"))
# Remove punctuations
docs <- tm_map(docs, removePunctuation)
# Eliminate extra white spaces
docs <- tm_map(docs, stripWhitespace)
# Text stemming
# docs <- tm_map(docs, stemDocument)
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
set.seed(22)
jpeg("wordcloud.jpeg")
wordcloud(words = d$word, freq = d$freq, min.freq = 0,scale=c(6,.1),
colors=brewer.pal(7, "Set2"), rot.per=0.5)
graphics.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment