Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Wordles for #BES12 tweets
#
install.packages(c("twitteR","wordcloud","tm"))
library(twitteR); library(wordcloud); library(tm)
# Search for #bes12 tweets
bestweets <- searchTwitter("#bes12", n=5000)
length(bestweets) # ends up with 1344 as of 21-Dec-12 at 17:00 London time
# make into a data.frame
bestweets_df <- twListToDF(bestweets)
tail(bestweets_df)
# Words used
cleaned <- sapply(bestweets_df$text, function(x) str_trim(gsub("\"|@[A-Za-z.-_]+|(RT)|(MT)|[!:;]\\s+|http[s]?://[A-Za-z0-9]+\\.?[A-Za-z0-9]+/[A-Za-z0-9]+\\.?[A-Za-z0-9]+|#[A-Za-z0-9]+", "", x), "both"), USE.NAMES=F)
cleaned_coll <- paste(cleaned, collapse=" ")
corpus <- Corpus(VectorSource(cleaned_coll))
bescorpus <- tm_map(corpus, function(x)removeWords(x,stopwords()))
bes_ <- TermDocumentMatrix(bescorpus)
ap.m <- as.matrix(bes_)
ap.v <- sort(rowSums(ap.m), decreasing=TRUE)
ap.d <- data.frame(word = names(ap.v),freq=ap.v)
table(ap.d$freq)
pal2 <- brewer.pal(8,"Dark2")
png("~/beswordsused.png", width=800, height=600)
wordcloud(ap.d$word,ap.d$freq, scale=c(8,.2),min.freq=2, max.words=100,
random.order=FALSE, rot.per=.15, colors=pal2)
dev.off()
# Users
userscorpus <- Corpus(VectorSource(bestweets_df$screenName))
userscorpus_ <- tm_map(userscorpus, function(x)removeWords(x,stopwords()))
bes_ <- TermDocumentMatrix(userscorpus_)
ap.m <- as.matrix(bes_)
ap.v <- sort(rowSums(ap.m), decreasing=TRUE)
ap.d <- data.frame(word = names(ap.v),freq=ap.v)
table(ap.d$freq)
pal2 <- brewer.pal(8,"Dark2")
png("~/besusers.png", width=800,height=600)
wordcloud(ap.d$word,ap.d$freq, scale=c(8,.2),min.freq=2, max.words=100,
random.order=FALSE, rot.per=.15, colors=pal2)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment