Skip to content

Instantly share code, notes, and snippets.

Jeff Gentry geoffjentry

Block or report user

Report or block geoffjentry

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View gist:9178541
tweets = searchTwitter("#rstats", n=500)
store_tweets_db(tweets)
View gist:9178523
register_db_backend(dbi_connection)
# or create a sqlite connection
register_sqlite_backend("/path/to/sqlite/file")
# or create a mysql connection
register_mysql_backend("my_database", "hostname", "username", "password")
View gist:8620150
load("code2013.rda") # 6028 tweets
filtered_tweets = strip_retweets(code2013) # 5006 tweets
statuses = sapply(filtered_tweets, function(x) x$getText())
# Read in the TIOBE data
tiobe = read.csv("tiobe.csv", stringsAsFactors=FALSE)
tiobe_langs = tolower(tiobe[, "lang"])
# Looking at the TIOBE listings and some of the tweet data, massage some of the entries
View gist:8230289
code2013_lang_table$code2013_rank = 1:nrow(code2013_lang_table)
code2013_lang_table$tiobe_rank = match(code2013_lang_table$code2013_langs, tiobe[, "lang"])
# Make a scatterplot of the ranking differences
png(file="code2013_tiobe_scatter.png", width=640, height=640)
ggplot(code2013_lang_table, aes(x=code2013_rank, y=tiobe_rank, color=code2013_tier)) +
geom_text(aes(label=code2013_langs), size=3) +
ylab("TIOBE Rank") + xlab("#code2013 rank") +
ggtitle("#code2013 vs TIOBE rankings")
dev.off()
View gist:8226975
library(ggplot2)
png(file="code2013_tiobe.png", width=640, height=640)
ggplot(code2013_lang_table, aes(x=code2013_langs, y=Count, fill=code2013_tier)) +
geom_bar(stat="identity") +
xlab("Language") + ylab("Count") +
ggtitle("#code2013 Languages Sorted By TIOBE Rankings") +
coord_flip()
dev.off()
View gist:8226512
# tokenize each status. split on comma period or whitespace
status_tokens = strsplit(statuses, ",|\\.|\\s+")
matching_tokens = sapply(status_tokens, function(x) {
x[which(x %in% tiobe_langs)]
})
# Now have the languages mentioned in #code2013 which are in TIOBE
code2013_langs = unlist(matching_tokens)
code2013_lang_table = as.data.frame(sort(table(code2013_langs), decreasing=TRUE))
@geoffjentry
geoffjentry / gist:8226425
Created Jan 2, 2014
remove weird encodings
View gist:8226425
# I want to convert this all to lowercase but there are 67 with weird encodings
bad_statuses = numeric()
lowercase_statuses = character()
for (i in seq_along(statuses)) {
tl = try(tolower(statuses[[i]]), silent=TRUE)
if (inherits(tl, "try-error")) {
bad_statuses = c(bad_statuses, i)
} else {
lowercase_statuses = c(lowercase_statuses, tl)
}
View gist:8226310
# Read in the TIOBE data
tiobe = read.csv("tiobe.csv", stringsAsFactors=FALSE)
tiobe_langs = tolower(tiobe[, "lang"])
# Looking at the TIOBE listings and some of the tweet data, massage some of the entries
# here. This won't be perfect but will help a little bit
replace_statuses = function(statuses, was, is) {
gsub(was, is, statuses, ignore.case=TRUE)
}
@geoffjentry
geoffjentry / gist:8226180
Last active Jan 2, 2016
Remove retweets
View gist:8226180
load("code2013.rda")
# Find/remove the tweets flagged as retweets
is_retweets = which(sapply(code2013, function(x) x$getIsRetweet()))
if (length(is_retweets) > 0) {
filtered_tweets = code2013[-is_retweets]
} else {
filtered_tweets = code2013
}
You can’t perform that action at this time.