Skip to content

Instantly share code, notes, and snippets.

@lc0
Created August 24, 2013 21:16
Show Gist options
  • Save lc0/6330460 to your computer and use it in GitHub Desktop.
Save lc0/6330460 to your computer and use it in GitHub Desktop.
Simple way to build twitter word cloud
twitter.results <- searchTwitter('#stylight', cainfo="cacert.pem")
df <- do.call("rbind", lapply(twitter.results, as.data.frame))
# rename metadata
names.twitteR <- c("screenName", "created") # change from
names.api <- c("screen_name", "created_at") # change to
for(name in names.twitteR) {
names(df)[which(names(df)==name)] <- names.api[which(names.twitteR==name)]
}
df$from_user <- df$screen_name
And our base task to execute
# load source file first
source("semantic_analysis.R")
# construct corpus, with regular preprocessing performed
corpus <- ConstructCorpus(df$text, removeTags = TRUE, removeUsers = TRUE)
# make a word cloud
MakeWordCloud(corpus)
reqURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "http://api.twitter.com/oauth/access_token"
authURL <- "http://api.twitter.com/oauth/authorize"
consumerKey <- 'consumerKey'
consumerSecret <- 'consumerSecret'
twitCred <- OAuthFactory$new(consumerKey=consumerKey,
consumerSecret=consumerSecret,
requestURL=reqURL,
accessURL=accessURL,
authURL=authURL)
twitCred$handshake()
ConstructCorpus <- function(textVec,
toLower = TRUE,
removePunctuations = TRUE,
removeStopwords = TRUE,
removeNumbers = FALSE,
stemming = FALSE,
removeTags = FALSE,
removeUsers = FALSE) {
# Construct text corpus
more.stopwords <- c("via", "rt", "mt", "amp")
EnsurePackage("tm")
# create a object
corpus <- Corpus(VectorSource(textVec))
if(toLower) corpus <- tm_map(corpus, tolower)
if(removeTags) {
source("utilities.R")
corpus <- tm_map(corpus, TrimHashtags)
}
if(removeUsers) {
source("utilities.R")
corpus <- tm_map(corpus, TrimUsers)
}
if(removePunctuations) corpus <- tm_map(corpus, removePunctuation)
if(removeNumbers) corpus <- tm_map(corpus, removeNumbers)
if(removeStopwords) corpus <- tm_map(corpus, function(x)
removeWords(x, append(stopwords("english"), more.stopwords)))
if(stemming) {
EnsurePackage("rJava")
EnsurePackage("Snowball")
corpus <- tm_map(corpus, stemDocument, language = "english")
}
return(corpus)
}
MakeWordCloud <- function(corpus) {
# Make a word cloud
#
# Args:
# textVec: a text vector
#
# Returns:
# A word cloud created from the text vector
EnsurePackage("tm")
EnsurePackage("wordcloud")
EnsurePackage("RColorBrewer")
corpus <- tm_map(corpus, function(x) {
removeWords(x, c("via", "rt", "mt"))
})
ap.tdm <- TermDocumentMatrix(corpus)
ap.m <- as.matrix(ap.tdm)
ap.v <- sort(rowSums(ap.m), decreasing=TRUE)
ap.d <- data.frame(word = names(ap.v), freq=ap.v)
table(ap.d$freq)
pal2 <- brewer.pal(8, "Dark2")
wordcloud(ap.d$word, ap.d$freq,
scale=c(8, .2), min.freq = 3,
max.words = Inf, random.order = FALSE,
rot.per = .15, colors = pal2)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment