Skip to content

Instantly share code, notes, and snippets.

@rizwanreza
Created June 24, 2013 20:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rizwanreza/5853561 to your computer and use it in GitHub Desktop.
Save rizwanreza/5853561 to your computer and use it in GitHub Desktop.
Assuming all files are in the same folder, you can run the command like this: ./gh_repo_stats.rb --after 2012-11-01T13:00:00Z --before 2012-11-01T16:12:14Z --event PushEvent --count 42
#!/usr/bin/env ruby
# encoding: utf-8
require_relative 'github_archive'
require 'optparse'
# gh_repo_stats --after 2012-11-01T13:00:00Z --before 2012-11-02T03:12:14-03:00 --event PushEvent --count 42
options = {}
OptionParser.new do |opts|
opts.banner = "Usage: github_archive.rb [options]"
opts.on("-a", "--after DATETIME", "Start searching from this time") do |after|
options[:from] = after
end
opts.on("-b", "--before DATETIME", "Stop searching at this time") do |before|
options[:to] = before
end
opts.on("-e", "--event EVENT", "Filter by event name") do |event_name|
options[:event] = event_name
end
opts.on("-c", "--count COUNT", "Restrict the number of entries") do |count|
options[:count] = count.to_i
end
end.parse!
puts GithubArchive::Displayer.new(
event: options[:event],
count: options[:count],
uri: GithubArchive::URI.new(from: options[:from], to: options[:to])).display
# encoding: utf-8
require 'zlib'
require 'open-uri'
require 'yajl'
module GithubArchive
class URI
URL = "http://data.githubarchive.org/"
attr_reader :from, :to
def initialize(options)
@from = DateTime.parse(options[:from]).to_time.utc
@to = DateTime.parse(options[:to]).to_time.utc
end
def build_uri
uris = []
hour = from
while hour < to
uris << "#{URL}#{hour.year}-#{hour.month}-#{hour.strftime("%d")}-#{hour.hour}.json.gz"
hour += 3600
end
uris
end
end
class Displayer
attr_reader :event, :count, :uri
attr_accessor :json
def initialize(options)
@event = options[:event]
@count = options[:count]
@uri = options[:uri]
@json = ''
end
def display
parse.collect {|e| e.join(" - ") }.join(" events\n")
end
private
def parse
download_all
puts "Parsing"
event_counts = Hash.new(0)
Yajl::Parser.new.parse @json do |activity|
if activity["type"] =~ /#{event}/
url = repository_owner_and_name(activity["repository"]["url"])
event_counts[url] += 1
end
end
event_counts.sort_by {|repo, event_count| event_count }.reverse[0..count]
end
def download_all
uri.build_uri.each do |uri|
puts "Downloading and unzipping #{uri}"
json << Zlib::GzipReader.new(open uri).read
end
end
def repository_owner_and_name(url)
url.split('/')[-2..-1].join('/')
end
end
end
# encoding: utf-8
require_relative 'github_archive'
describe GithubArchive::Displayer do
describe "#display" do
it "downloads the archive, untars it and returns JSON" do
displayer = GithubArchive::Displayer.new(
event: "PushEvent",
count: 42,
uri: GithubArchive::URI.new(from: "2012-11-01T13:00:00Z", to: "2012-11-01T15:12:14Z"))
displayer.should_receive(:download_all)
displayer.json = File.open('fixtures/github_archive.json').read
expect(displayer.display).to match("antage/formtastic-plus-bootstrap - 2 events")
end
end
end
describe GithubArchive::URI do
context "when from and to are different in days" do
subject :archive do
GithubArchive::URI.new from: "2012-11-01",
to: "2012-11-02"
end
it "builds up the correct URI" do
archive.build_uri.should include('http://data.githubarchive.org/2012-11-01-0.json.gz')
archive.build_uri.should include('http://data.githubarchive.org/2012-11-01-23.json.gz')
end
end
context "when time in from and to aren't equal" do
subject :archive do
GithubArchive::URI.new(from: "2012-11-01T13:00:00Z", to: "2012-11-01T16:12:14-03:00")
end
it "builds up the correct string given parameters" do
archive.build_uri.should include('http://data.githubarchive.org/2012-11-01-15.json.gz')
end
end
context "when day in from and to aren't equal" do
subject :archive do
GithubArchive::URI.new(from: "2012-11-01T13:00:00Z", to: "2012-11-03T16:12:14-03:00")
end
it "builds up the correct string given parameters" do
archive.build_uri.should include('http://data.githubarchive.org/2012-11-03-13.json.gz')
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment