Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Plot some figures for the #wiunion Twitter tag.
# @author: Michael J Bommarito II
# @date: Feb 21, 2011
# @email:
# @packages: ggplot2
# Load and pre-process #tweets_wiunion
tweets <- unique(read.table('data/tweets_wiunion.csv', sep="\t", quote="", comment.char="",
stringsAsFactors=FALSE, header=FALSE, nrows=200000))
names(tweets) <- c("id", "date", "user", "text")
tweets$date <- as.POSIXct(strptime(tweets$date, "%a, %d %b %Y %H:%M:%S %z", tz = "GMT"))
# Plot the figure after binning tweets within 5 minutes
ggplot(data=tweets, aes(date)) +
geom_bar(aes(fill=..count.., colour=..count..), alpha=0.5, size=0.5, binwidth=60*5) +
scale_x_datetime("Date", major="1 day", minor="6 hours", tz="EST") +
scale_y_continuous("Number of tweets") +
opts(title="#wiunion", legend.position="none")
ggsave(file='fig/ts_cwiunion.pdf', width=12, height=8)
# Now build the table of most frequent tweeters
numTop <- 30
userFrequency <- arrange($user)), -Freq)
names(userFrequency) <- c("Name", "Freq")
#userFrequency$Name <- factor(userFrequency$Name, levels=userFrequency$Name)
userFrequencyTop <- userFrequency[1:numTop, ]
userFrequencyTop$Name <- factor(userFrequencyTop$Name, levels=userFrequencyTop$Name)
ggplot(data=userFrequencyTop, aes(x=Name, y=Freq)) +
scale_x_discrete("User") +
scale_y_continuous("Frequency") +
geom_bar() +
opts(title="Highest Frequency Users") +
ggsave(file='fig/top_users.pdf', width=12, height=8)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment