Skip to content

Instantly share code, notes, and snippets.

View geoffjentry's full-sized avatar

Jeff Gentry geoffjentry

View GitHub Profile
from_db = load_tweets_db()
tweets = searchTwitter("#rstats", n=500)
store_tweets_db(tweets)
register_db_backend(dbi_connection)
# or create a sqlite connection
register_sqlite_backend("/path/to/sqlite/file")
# or create a mysql connection
register_mysql_backend("my_database", "hostname", "username", "password")
load("code2013.rda") # 6028 tweets
filtered_tweets = strip_retweets(code2013) # 5006 tweets
statuses = sapply(filtered_tweets, function(x) x$getText())
# Read in the TIOBE data
tiobe = read.csv("tiobe.csv", stringsAsFactors=FALSE)
tiobe_langs = tolower(tiobe[, "lang"])
# Looking at the TIOBE listings and some of the tweet data, massage some of the entries
code2013_lang_table$code2013_rank = 1:nrow(code2013_lang_table)
code2013_lang_table$tiobe_rank = match(code2013_lang_table$code2013_langs, tiobe[, "lang"])
# Make a scatterplot of the ranking differences
png(file="code2013_tiobe_scatter.png", width=640, height=640)
ggplot(code2013_lang_table, aes(x=code2013_rank, y=tiobe_rank, color=code2013_tier)) +
geom_text(aes(label=code2013_langs), size=3) +
ylab("TIOBE Rank") + xlab("#code2013 rank") +
ggtitle("#code2013 vs TIOBE rankings")
dev.off()
library(ggplot2)
png(file="code2013_tiobe.png", width=640, height=640)
ggplot(code2013_lang_table, aes(x=code2013_langs, y=Count, fill=code2013_tier)) +
geom_bar(stat="identity") +
xlab("Language") + ylab("Count") +
ggtitle("#code2013 Languages Sorted By TIOBE Rankings") +
coord_flip()
dev.off()
@geoffjentry
geoffjentry / gist:8226512
Created January 2, 2014 20:46
create data.frame
# tokenize each status. split on comma period or whitespace
status_tokens = strsplit(statuses, ",|\\.|\\s+")
matching_tokens = sapply(status_tokens, function(x) {
x[which(x %in% tiobe_langs)]
})
# Now have the languages mentioned in #code2013 which are in TIOBE
code2013_langs = unlist(matching_tokens)
code2013_lang_table = as.data.frame(sort(table(code2013_langs), decreasing=TRUE))
@geoffjentry
geoffjentry / gist:8226425
Created January 2, 2014 20:42
remove weird encodings
# I want to convert this all to lowercase but there are 67 with weird encodings
bad_statuses = numeric()
lowercase_statuses = character()
for (i in seq_along(statuses)) {
tl = try(tolower(statuses[[i]]), silent=TRUE)
if (inherits(tl, "try-error")) {
bad_statuses = c(bad_statuses, i)
} else {
lowercase_statuses = c(lowercase_statuses, tl)
}
@geoffjentry
geoffjentry / gist:8226310
Created January 2, 2014 20:35
massage data
# Read in the TIOBE data
tiobe = read.csv("tiobe.csv", stringsAsFactors=FALSE)
tiobe_langs = tolower(tiobe[, "lang"])
# Looking at the TIOBE listings and some of the tweet data, massage some of the entries
# here. This won't be perfect but will help a little bit
replace_statuses = function(statuses, was, is) {
gsub(was, is, statuses, ignore.case=TRUE)
}
@geoffjentry
geoffjentry / gist:8226180
Last active March 2, 2021 06:36
Remove retweets
load("code2013.rda")
# Find/remove the tweets flagged as retweets
is_retweets = which(sapply(code2013, function(x) x$getIsRetweet()))
if (length(is_retweets) > 0) {
filtered_tweets = code2013[-is_retweets]
} else {
filtered_tweets = code2013
}