Skip to content

Instantly share code, notes, and snippets.

@gousiosg
Last active Dec 17, 2015
Embed
What would you like to do?
select a.month,
a.total_commits - b.commits_from_pull_reqs as direct,
b.commits_from_pull_reqs as pullreq
from
(select strftime("%Y-%m-01", substr(c.created_at, 0, 20)) as month,
p.id as prid, count(c.id) as total_commits
from commits c, projects p, project_commits pc
where p.id = pc.project_id
and c.id = pc.commit_id
group by month, p.id) as a,
(select strftime("%Y-%m-01", substr(c.created_at, 0, 20)) as month,
p.id as prid, count(prc.commit_id) as commits_from_pull_reqs
from projects p, pull_requests pr, pull_request_commits prc,
commits c, project_commits pc
where p.id = pr.base_repo_id
and prc.commit_id = c.id
and pc.project_id = p.id
and pc.commit_id = c.id
and pr.id = prc.pull_request_id
group by month, p.id) as b,
projects p, users u
where
a.prid = b.prid
and a.prid = p.id
and a.month = b.month
and u.id = p.owner_id
and u.login = 'Netflix'
and p.name = 'RxJava'
order by a.month desc
select strftime("%Y-%m-01", substr(p.created_at, 0, 20)) as month, count(*) as contributing
from projects p
where p.forked_from = (
select p.id
from projects p, users u
where p.name = 'RxJava'
and u.login = 'Netflix'
and p.owner_id = u.id
)
and exists (
select *
from pull_requests pr
where pr.head_repo_id = p.id
)
group by month
select strftime("%Y-%m-01", substr(p.created_at, 0, 20)) as month, count(*) as created
from projects p
where p.forked_from = (
select p.id
from projects p, users u
where p.name = 'RxJava'
and u.login = 'Netflix'
and p.owner_id = u.id
)
group by month
rm(list = ls(all = TRUE))
if (! "ggplot2" %in% installed.packages()) install.packages("ggplot2")
if (! "RSQLite" %in% installed.packages()) install.packages("RSQLite")
if (! "reshape" %in% installed.packages()) install.packages("reshape")
if (! "sqldf" %in% installed.packages()) install.packages("sqldf")
library(RSQLite)
library(ggplot2)
library(reshape)
library(sqldf)
# change those to the name of your repo
owner = "Netflix"
project = "RxJava"
# change dbname to whereever you stored your DB
con <- dbConnect("SQLite", dbname = "~/Desktop/rxjava.db")
store.png <- function(data, name, dir = "~/Desktop")
{
png(filename = paste(dir, name, sep="/"), width = 500, height = 500)
plot(data)
dev.off()
}
# Opened and merged pull reqs per month
res <- dbSendQuery(con, sprintf("select strftime(\"%%Y-%%m-01\", substr(prh.created_at, 0, 20)) as month, count(*) as opened from pull_requests pr, pull_request_history prh, projects p, users u where pr.id = prh.pull_request_id and p.id = pr.base_repo_id and p.owner_id = u.id and p.name = '%s' and u.login = '%s' and prh.action = 'opened' group by month", project, owner))
opened <- fetch(res, n = -1)
res <- dbSendQuery(con, sprintf("select strftime(\"%%Y-%%m-01\", substr(prh.created_at, 0, 20)) as month, count(*) as merged from pull_requests pr, pull_request_history prh, projects p, users u where pr.id = prh.pull_request_id and p.id = pr.base_repo_id and p.owner_id = u.id and p.name = '%s' and u.login = '%s' and prh.action = 'merged' group by month", project, owner))
merged <- fetch(res, n = -1)
df <- merge(merged, opened, by = 'month')
df$month <- as.POSIXct(df$month)
df <- melt(df, id=c('month'))
df <- rename(df, c("variable"="status"))
p <- ggplot(df) + aes(x = month, y = value) + scale_x_datetime() + geom_freqpoly(aes(group = status, colour = status), stat="identity", size = 2) + xlab("Date") + ylab("Number of pull requests")
store.png(p, "pull-req-stats.png")
# Opened and closed issues per month
res <- dbSendQuery(con, sprintf("select strftime(\"%%Y-%%m-01\", substr(i.created_at, 0, 20)) as month, count(*) as opened from issues i, projects p, users u where i.pull_request = 'f' and u.id = p.owner_id and p.id = i.repo_id and p.name ='%s' and u.login = '%s' group by month", project, owner))
opened <- fetch(res, n = -1)
res <- dbSendQuery(con, sprintf("select strftime(\"%%Y-%%m-01\", substr(ie.created_at, 0, 20)) as month, count(*) as closed from issues i, issue_events ie, projects p, users u where ie.issue_id = i.id and ie.action = 'closed' and p.id = i.repo_id and p.name ='%s' and u.login = '%s' group by month", project, owner))
closed <- fetch(res, n = -1)
df <- merge(closed, opened, by = 'month')
df$month <- as.POSIXct(df$month)
df <- melt(df, id=c('month'))
df <- rename(df, c("variable"="status"))
p <- ggplot(df) + aes(x = month, y = value, fill = status) + scale_x_datetime() + geom_freqpoly(aes(group = status, colour = status), stat="identity", size = 2) + xlab("Date") + ylab("Number of issues")
store.png(p, "issue-stats.png")
# Direct vs pullreq commits
res <- dbSendQuery(con, sprintf("select a.month, a.total_commits - b.commits_from_pull_reqs as direct, b.commits_from_pull_reqs as pullreq from (select strftime(\"%%Y-%%m-01\", substr(c.created_at, 0, 20)) as month, p.id as prid, count(c.id) as total_commits from commits c, projects p, project_commits pc where p.id = pc.project_id and c.id = pc.commit_id group by month, p.id) as a, (select strftime(\"%%Y-%%m-01\", substr(c.created_at, 0, 20)) as month, p.id as prid, count(prc.commit_id) as commits_from_pull_reqs from projects p, pull_requests pr, pull_request_commits prc, commits c, project_commits pc where p.id = pr.base_repo_id and prc.commit_id = c.id and pc.project_id = p.id and pc.commit_id = c.id and pr.id = prc.pull_request_id group by month, p.id) as b, projects p, users u where a.prid = b.prid and a.prid = p.id and a.month = b.month and u.id = p.owner_id and u.login = '%s' and p.name = '%s' order by a.month desc", owner, project))
df <- fetch(res, n = -1)
df$month <- as.POSIXct(df$month)
df$commit_source <- df$value
df <- melt(df, id=c('month'))
df <- rename(df, c("variable"="commit_source"))
p <- ggplot(df) + aes(x = month, y = value, fill = commit_source) + scale_x_datetime() + geom_bar(stat="identity") + xlab("Date") + ylab("Commits") + scale_colour_identity(name = "source")
store.png(p, "commit-source.png")
# Forks vs contributing forks
res <- dbSendQuery(con, sprintf("select strftime(\"%%Y-%%m-01\", substr(p.created_at, 0, 20)) as month, count(*) as created from projects p where p.forked_from = (select p.id from projects p, users u where p.name = '%s' and u.login = '%s' and p.owner_id = u.id) group by month", project, owner))
forks <- fetch(res, n = -1)
res <- dbSendQuery(con, sprintf("select strftime(\"%%Y-%%m-01\", substr(p.created_at, 0, 20)) as month, count(*) as contributing from projects p where p.forked_from = (select p.id from projects p, users u where p.name = '%s' and u.login = '%s' and p.owner_id = u.id) and exists (select * from pull_requests pr where pr.head_repo_id = p.id) group by month", project, owner))
contrib <- fetch(res, n = -1)
df <- merge(forks, contrib, by = 'month')
df$month <- as.POSIXct(df$month)
df <- melt(df, id=c('month'))
df <- rename(df, c("variable"="forks"))
p <- ggplot(df) + aes(x = month, y = value, fill = forks) + scale_x_datetime() + geom_freqpoly(aes(group = forks, colour = forks), stat="identity", size = 2) + xlab("Date") + ylab("Number of forks")
store.png(p, "fork-stats.png")
# Comments and commenters
res <- dbSendQuery(con, sprintf("select a.month as month, (select count(pm.user_id) from project_members pm where pm.user_id = a.user_id and pm.repo_id = a.p_id) as is_member, count(distinct user_id) as num_users, sum(a.cnt) as num_comments from (select strftime(\"%%Y-%%m-01\", substr(ic.created_at, 0, 20)) as month, pr.base_repo_id as p_id, ic.user_id as user_id, count(ic.comment_id) as cnt from projects p join pull_requests pr on p.id = pr.base_repo_id left outer join issues i on pr.pullreq_id = i.issue_id left outer join issue_comments ic on i.id = ic.issue_id where p.forked_from is null and p.id = (select p.id from projects p, users u where u.id = p.owner_id and u.login='%s' and p.name = '%s') and pr.base_repo_id = i.repo_id group by month, pr.base_repo_id, ic.user_id) as a, users u, projects p where p.owner_id = u.id and p.id = a.p_id group by month, is_member", owner, project))
df <- fetch(res, n = -1)
df$is_member <- factor(df$is_member)
df$month <- as.POSIXct(df$month)
df <- subset(df, !is.na(month))
df <- sqldf("select d.month, (select sum(df1.num_comments) from df df1 where df1.month = d.month and df1.is_member = 0) *100/sum(d.num_comments) as comments, (select sum(df1.num_users) from df df1 where df1.month = d.month and df1.is_member = 0) * 100/sum(d.num_users) as commenters from df d group by d.month")
df <- melt(df, 'month', na.rm = TRUE)
df$variable <- as.factor(df$variable)
df$value <- as.numeric(as.character(df$value))
p <- ggplot(df, aes(x = month, y = value, fill = variable)) + scale_x_datetime() + geom_bar(position = 'dodge', stat = "identity") + xlab("Date") + ylab("%") + facet_grid(. ~ variable) + theme(legend.position="none") + scale_y_continuous(limits = c(0, 100))
store.png(p, "comments-commenters-external.png")
select strftime("%Y-%m-01", substr(ie.created_at, 0, 20)) as month, count(*) as closed
from issues i, issue_events ie, projects p, users u
where ie.issue_id = i.id
and ie.action = 'closed'
and p.id = i.repo_id
and u.login = 'Netflix'
and p.name ='RxJava'
group by month
select strftime("%Y-%m-01", substr(i.created_at, 0, 20)) as month, count(*) as opened
from issues i, projects p, users u
where i.pull_request = 'f'
and u.id = p.owner_id
and p.id = i.repo_id
and u.login = 'Netflix'
and p.name ='RxJava'
group by month
select a.month as month,
(select count(pm.user_id) from project_members pm where pm.user_id = a.user_id and pm.repo_id = a.p_id) as is_member,
count(distinct user_id) as num_users,
sum(a.cnt) as num_comments
from (
select strftime("%Y-%m-01", substr(ic.created_at, 0, 20)) as month,
pr.base_repo_id as p_id, ic.user_id as user_id, count(ic.comment_id) as cnt
from projects p join pull_requests pr on p.id = pr.base_repo_id
left outer join issues i on pr.pullreq_id = i.issue_id
left outer join issue_comments ic on i.id = ic.issue_id
where p.forked_from is null
and p.id = (select p.id from projects p, users u where u.id = p.owner_id and u.login='Netflix' and p.name = 'RxJava')
and pr.base_repo_id = i.repo_id
group by month, pr.base_repo_id, ic.user_id
) as a, users u, projects p
where p.owner_id = u.id
and p.id = a.p_id
group by month, is_member
select strftime("%Y-%m-01", substr(prh.created_at, 0, 20)) as month,
count(*) as opened
from pull_requests pr, pull_request_history prh, projects p, users u
where pr.id = prh.pull_request_id
and p.id = pr.base_repo_id
and p.owner_id = u.id
and p.name = 'RxJava'
and u.login = 'Netflix'
and prh.action = 'merged'
group by month
select strftime("%Y-%m-01", substr(prh.created_at, 0, 20)) as month,
count(*) as opened
from pull_requests pr, pull_request_history prh, projects p, users u
where pr.id = prh.pull_request_id
and p.id = pr.base_repo_id
and p.owner_id = u.id
and p.name = 'RxJava'
and u.login = 'Netflix'
and prh.action = 'opened'
group by month
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment