Skip to content

Instantly share code, notes, and snippets.

@timelyportfolio
Created May 16, 2012 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timelyportfolio/2710706 to your computer and use it in GitHub Desktop.
Save timelyportfolio/2710706 to your computer and use it in GitHub Desktop.
require(twitteR)
require(ggplot2)
#get CFA12 tweets for exploration
#appears to limit n to 1500
cfatweets.1 <- searchTwitter("#cfa12", n=1500, since="2012-05-04",until="2012-05-07")
cfatweets.2 <- searchTwitter("#cfa12", n=1500, since="2012-05-07",until="2012-05-08")
cfatweets.3 <- searchTwitter("#cfa12", n=1500, since="2012-05-08",until="2012-05-15")
cfatweets.df <- rbind(
twListToDF(cfatweets.1),
twListToDF(cfatweets.2),
twListToDF(cfatweets.3))
tweeter <- as.data.frame(table(cfatweets.df$screenName),stringsAsFactors=FALSE)
colnames(tweeter) <- c("tweeter","count")
o <- order(tweeter$count,decreasing=TRUE)[1:50]
tweeter.ranked <- tweeter[o,]
tweeter.ranked$tweeter <- reorder(x=tweeter.ranked$tweeter,X=tweeter.ranked$count)
#credit for these charts goes to
#http://isomorphismes.tumblr.com/post/20362455367/twitter
ggplot(data=tweeter.ranked,
aes(y=count, x=tweeter, fill=tweeter)) +
geom_bar(stat="identity") + coord_flip() +
geom_text(aes(x=tweeter,y=0,label=paste("@",tweeter," ",count,sep=""),size=1,hjust=0)) +
theme_bw() +
opts(title="Prolific #cfa12 Tweeters", legend.position="none",
axis.text.y = theme_blank(), axis.ticks = theme_blank(), axis.title.y = theme_blank() )
ggplot(data=tweeter.ranked,
aes(y=count, x=tweeter, fill=tweeter)) + coord_polar() +
geom_bar(stat="identity") + #coord_flip() +
theme_bw() +
opts(title="Prolific #cfa12 Tweeters", legend.position="none",
axis.title.y = theme_blank() )
ggplot(data=cfatweets.df,aes(x=created,fill=format(cfatweets.df$created,"%Y-%m-%d"))) +
geom_density() + theme_bw() +
opts(title="#cfa12 Tweets by Time", legend.position="none",
axis.title.y = theme_blank() )
######################do word cloud#############################
#all credit goes to http://blog.ouseful.info/2012/02/15/generating-twitter-wordclouds-in-r-prompted-by-an-open-learning-blogpost/
#thanks for the very fine example
#Use a handy helper function to put the tweets into a dataframe
tw.df=cfatweets.df$text
##Note: there are some handy, basic Twitter related functions here:
##https://github.com/matteoredaelli/twitter-r-utils
#For example:
RemoveAtPeople <- function(tweet) {
gsub("@\\w+", "", tweet)
}
RemoveHash <- function(tweet) {
gsub("#\\w+", "", tweet)
}
#Then for example, remove @'d names
tweets <- as.vector(RemoveAtPeople(tw.df))
tweets <- as.vector(RemoveHash(tweets))
##Wordcloud - scripts available from various sources; I used:
#http://rdatamining.wordpress.com/2011/11/09/using-text-mining-to-find-out-what-rdatamining-tweets-are-about/
#Install the textmining library
require(tm)
#Call with eg: tw.c=generateCorpus(tw.df$text)
generateCorpus= function(df,my.stopwords=c()){
#The following is cribbed and seems to do what it says on the can
tw.corpus= Corpus(VectorSource(df))
# remove punctuation
tw.corpus = tm_map(tw.corpus, removePunctuation)
#normalise case
tw.corpus = tm_map(tw.corpus, tolower)
# remove stopwords
tw.corpus = tm_map(tw.corpus, removeWords, stopwords('english'))
tw.corpus = tm_map(tw.corpus, removeWords, my.stopwords)
tw.corpus
}
twCorpus <- generateCorpus(tweets)
tdm <- TermDocumentMatrix(twCorpus)
m <- as.matrix(tdm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
require(wordcloud)
#wordcloud(d$word,d$freq,min.freq=5)
##### with colors #####
if(require(RColorBrewer)){
pal <- brewer.pal(9,"BuGn")
pal <- pal[-(1:4)]
wordcloud(d$word,d$freq,c(2,.5),min.freq=10,,FALSE,,.1,pal)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment