|
#!/usr/bin/env ruby |
|
|
|
require 'bundler/setup' |
|
require 'yajl' |
|
require 'zlib' |
|
require 'uri' |
|
require 'time' |
|
require 'open-uri' |
|
require 'optparse' |
|
|
|
class DateRange |
|
include Enumerable |
|
|
|
attr_reader :from, :to |
|
def initialize(from, to) |
|
@from, @to = from, to |
|
end |
|
|
|
def hour_blocks |
|
return [hour_block(from)] if seconds_between == 0 |
|
@hour_blocks ||= (seconds_between/3600).ceil.times.map do |i| |
|
hour_block(from + (i * 3600)) |
|
end |
|
end |
|
|
|
def seconds_between |
|
to - from |
|
end |
|
|
|
def hour_block(time) |
|
time.strftime("%Y-%m-%d-") + time.hour.to_s |
|
end |
|
|
|
def each |
|
hour_blocks.each {|block| yield block } |
|
end |
|
|
|
end |
|
|
|
module Github |
|
module Events |
|
class Event |
|
attr_reader :attrs |
|
def initialize(attrs) |
|
@attrs = attrs |
|
end |
|
|
|
def event_name |
|
attrs['type'] |
|
end |
|
|
|
def created_at |
|
Time.parse(attrs['created_at']) |
|
end |
|
end |
|
|
|
module Repository |
|
def repository |
|
attrs['repository'] |
|
end |
|
|
|
def owner_name |
|
repository['owner'] |
|
end |
|
|
|
def repo_name |
|
repository['name'] |
|
end |
|
|
|
def key |
|
"#{owner_name}/#{repo_name}" |
|
end |
|
end |
|
|
|
class PushEvent < Event |
|
include Repository |
|
end |
|
|
|
class PullRequestEvent < Event |
|
include Repository |
|
end |
|
|
|
# Had I time, or were so inclined, I would define an Event subclass for each |
|
# type at http://developer.github.com/v3/activity/events/types that responds |
|
# to #key. |
|
# Why are we doing this? Because, regardless of what Github claims, the JSON |
|
# schema for each event type is not all that similar. Since we only ever filter |
|
# by one event type, it's reasonable to expect different result types. |
|
|
|
class Index < Hash |
|
def <<(event) |
|
if self[event.key] |
|
self[event.key][:count] += 1 |
|
self[event.key][:events] << event |
|
else |
|
self[event.key] = { |
|
:count => 1, |
|
:events => [event] |
|
} |
|
end |
|
end |
|
|
|
def sort |
|
to_a.sort { |a,b| a[1][:count] <=> b[1][:count] } |
|
end |
|
end |
|
end |
|
end |
|
|
|
class Github::Stats |
|
# WAT - http://data.githubarchive.org expects keys in Mountain time zone, |
|
# and returns events in it as well. |
|
GITHUB_ARCHIVE_TOTALLY_ARBITRARY_TIMEZONE_WAT = "-07:00" |
|
|
|
attr_reader :options, :errors |
|
def initialize(options) |
|
@options = options |
|
@errors = [] |
|
end |
|
|
|
def to |
|
Time.parse(options[:to]).localtime(GITHUB_ARCHIVE_TOTALLY_ARBITRARY_TIMEZONE_WAT) |
|
end |
|
|
|
def from |
|
Time.parse(options[:from]).localtime(GITHUB_ARCHIVE_TOTALLY_ARBITRARY_TIMEZONE_WAT) |
|
end |
|
|
|
def limit |
|
options[:limit].to_i |
|
end |
|
|
|
def base_uri |
|
URI.parse("http://data.githubarchive.org/") |
|
end |
|
|
|
def date_range |
|
DateRange.new(from, to) |
|
end |
|
|
|
def event_index |
|
@event_index ||= Github::Events::Index.new |
|
end |
|
|
|
def gather |
|
date_range.each do |block| |
|
uri = base_uri |
|
uri.path = "/#{block}.json.gz" |
|
gzip = Zlib::GzipReader.new(open(uri)).read |
|
Yajl::Parser.parse(gzip) do |hash| |
|
if hash['type'] == options[:event_type] |
|
event = event_class.new(hash) |
|
event_index << event if (from <= event.created_at && event.created_at < to) |
|
end |
|
end |
|
end |
|
end |
|
|
|
def report |
|
event_index.sort.reverse.take(limit).each do |event| |
|
reporter.puts "#{event[0].to_s} - #{event[1][:count]} events" |
|
end |
|
end |
|
|
|
attr_writer :reporter |
|
def reporter |
|
@reporter ||= STDOUT |
|
end |
|
|
|
# Let's just say every input is valid, and if the program blows up, it's user error. |
|
def valid? |
|
true |
|
end |
|
|
|
def event_class |
|
@event_class ||= Github::Events::const_get(options[:event_type]) |
|
end |
|
end |
|
|
|
options = {} |
|
OptionParser.new do |opts| |
|
opts.banner = "Usage gh_repo_stats --after 2012-11-01T13:00:00Z --before 2012-11-02T03:12:14-03:00 --event PushEvent --count 42" |
|
opts.on("--after DATE", "Query dates on or after DATE, an ISO-8601 formatted date string") do |a| |
|
options[:from] = a |
|
end |
|
|
|
opts.on("--before DATE", "Query dates before DATE, an ISO-8601 formatted date string") do |b| |
|
options[:to] = b |
|
end |
|
|
|
opts.on("--event TYPE", "Filter event type; see http://developer.github.com/v3/activity/events/types/#gistevent for details") do |e| |
|
options[:event_type] = e |
|
end |
|
|
|
opts.on("--count N", "Report on the top (n) results") do |c| |
|
options[:limit] = c |
|
end |
|
end.parse! |
|
|
|
stats = Github::Stats.new(options) |
|
|
|
if stats.valid? |
|
stats.gather |
|
stats.report |
|
else |
|
stats.errors.each do |error| |
|
puts error |
|
end |
|
end |