emhart/gist:3272216

## gistfile1.r
#include libraries
require(twitteR)
require(ggplot2)
require(tm)
require(wordcloud)
require(RColorBrewer)
require(reshape)

# we can only access 1500 tweets without oauth
twit.data <- searchTwitter("#ESA2012",n=1500)

#Extract twitter data
twit.vec <- sapply(twit.data, function(x) x$getText())

# create a corpus
esa.corpus<- Corpus(VectorSource(twit.vec))
esa.corpus <- tm_map(esa.corpus, removePunctuation)
esa.corpus <- tm_map(esa.corpus, tolower)
esa.corpus <- tm_map(esa.corpus, function(x) removeWords(x, c("esa","esaorg","esa2012","#esa2012",stopwords("english"))))
esa.tdm <- TermDocumentMatrix(esa.corpus)

esa.mat <- as.matrix(esa.tdm)

word_freqs <- sort(rowSums(esa.mat), decreasing=TRUE)
dm <- data.frame(word=names(word_freqs), freq=word_freqs)

### Plot your word cloud
wordcloud(dm$word,dm$freq, scale=c(4,.5),min.freq=5,max.words=Inf, random.order=FALSE, rot.per=.15, colors= brewer.pal(8,"Dark2"))

#### Now I'll use some code to extract the user data
d.size <- length(twit.data)
##### Create matrices to hold text output
tweet.df <- data.frame(matrix(NA,ncol=4,nrow=d.size))

for(i in 1:d.size){
  tweet.df[i,]<- c(twobj[[i]]$id,twobj[[i]]$screenName,twobj[[i]]$text,as.POSIXct(twobj[[i]]$created,tz="GMT"))

}

colnames(tweet.df) <- c("TweetID","ScreenName","Text","Timestamp")
# Count number of tweets and remove those less than 3
tweet.count <- table(tweet.df$Screen)
tweet.count <- tweet.count[tweet.count > 3]
tc.df <- melt(tweet.count)
colnames(tc.df) <- c("ScreenName","Count")
tc.df[order(tc.df$Count,decreasing=T),]
	#include libraries
	require(twitteR)
	require(ggplot2)
	require(tm)
	require(wordcloud)
	require(RColorBrewer)
	require(reshape)

	# we can only access 1500 tweets without oauth
	twit.data <- searchTwitter("#ESA2012",n=1500)

	#Extract twitter data
	twit.vec <- sapply(twit.data, function(x) x$getText())

	# create a corpus
	esa.corpus<- Corpus(VectorSource(twit.vec))
	esa.corpus <- tm_map(esa.corpus, removePunctuation)
	esa.corpus <- tm_map(esa.corpus, tolower)
	esa.corpus <- tm_map(esa.corpus, function(x) removeWords(x, c("esa","esaorg","esa2012","#esa2012",stopwords("english"))))
	esa.tdm <- TermDocumentMatrix(esa.corpus)

	esa.mat <- as.matrix(esa.tdm)

	word_freqs <- sort(rowSums(esa.mat), decreasing=TRUE)
	dm <- data.frame(word=names(word_freqs), freq=word_freqs)

	### Plot your word cloud
	wordcloud(dm$word,dm$freq, scale=c(4,.5),min.freq=5,max.words=Inf, random.order=FALSE, rot.per=.15, colors= brewer.pal(8,"Dark2"))

	#### Now I'll use some code to extract the user data
	d.size <- length(twit.data)
	##### Create matrices to hold text output
	tweet.df <- data.frame(matrix(NA,ncol=4,nrow=d.size))

	for(i in 1:d.size){
	tweet.df[i,]<- c(twobj[[i]]$id,twobj[[i]]$screenName,twobj[[i]]$text,as.POSIXct(twobj[[i]]$created,tz="GMT"))

	}

	colnames(tweet.df) <- c("TweetID","ScreenName","Text","Timestamp")
	# Count number of tweets and remove those less than 3
	tweet.count <- table(tweet.df$Screen)
	tweet.count <- tweet.count[tweet.count > 3]
	tc.df <- melt(tweet.count)
	colnames(tc.df) <- c("ScreenName","Count")
	tc.df[order(tc.df$Count,decreasing=T),]