Skip to content

Instantly share code, notes, and snippets.

@chrisdobler
Last active January 13, 2016 13:46
Show Gist options
  • Save chrisdobler/bfb74742d03f07d064f6 to your computer and use it in GitHub Desktop.
Save chrisdobler/bfb74742d03f07d064f6 to your computer and use it in GitHub Desktop.
gh_archive_challenge
#!/usr/bin/env ruby
#
# Gist Leaderboard Challenge
# By Chris Dobler 2016
#
# TODO: add defaults for when no args are sent, add storage of event type for multiple event types,
# use big query to increase speed, move request to seperate thread,
require 'optparse'
require 'open-uri'
require 'zlib'
require 'json'
require 'yajl'
require 'time'
##
## Gh_archive
##
## This object can connect to the github archive, and store a list of events. This object can also
## display a list of formatted events.
##################################
class Gh_archive
attr_reader :options, :counter, :gz, :combos
def initialize
@combos = Array.new
@events = Array.new
@gz = nil
@counter = 0
@options = {}
opt_parser = OptionParser.new do |opt|
opt.banner = "Usage: gh_repo_stats [--after DATETIME] [--before DATETIME] [--event EVENT_NAME] [-n COUNT]"
opt.separator ""
opt.separator "Options"
opt.separator ""
opt.on("--after DATETIME","The starting DATETIME") do |datetime|
@options[:after] = datetime
end
opt.on("--before DATETIME","The ending DATETIME") do |datetime|
@options[:before] = datetime
end
opt.on("--event EVENT_NAME","The event name") do |event_name|
@options[:event_name] = event_name
end
opt.on("-n","--count COUNT","The number of records to retrieve") do |count|
@options[:count] = count
end
opt.on("-h","--help","help") do
puts opt_parser
end
end
opt_parser.parse!
end
def connect
puts "Retrieving archives for the selected range. This could take a while..."
index = 0
#TODO: Clean up this section.
before_date = Date._strptime(@options[:before], '%FT%T%z')
before = DateTime.strptime(before_date[:year].to_s+','+before_date[:mon].to_s+','+before_date[:mday].to_s+','+before_date[:hour].to_s, '%Y,%m,%e,%k')
after_date = Date._strptime(@options[:after], '%FT%T%z')
after = DateTime.strptime(after_date[:year].to_s+','+after_date[:mon].to_s+','+after_date[:mday].to_s+','+after_date[:hour].to_s, '%Y,%m,%e,%k')
hour_step = (1.to_f/24)
before.step(after,hour_step).each{|e| @combos.push [e.year, e.month, e.day, e.hour]}
total = @combos.count
@combos.each do | year, month, day, hour |
percent = index.to_f/total.to_f*100
puts "["+percent.to_i.to_s+"%]..."
index = index + 1
url = 'http://data.githubarchive.org/'+year.to_s+'-'+'%02d' % month.to_s+'-'+'%02d' % day.to_s+'-'+'%02d' % hour.to_s+'.json.gz'
@gz = open url
js = Zlib::GzipReader.new(@gz).read
Yajl::Parser.parse(js) do |event|
exists = 0
if event['type'] == @options[:event_name] then
@events.each do |stored_event|
if stored_event.name == event['repo']['name'] then
exists = 1
#increment
stored_event.event_count = 1
end
end
if exists == 0 then
#create new unique project-event
new_e = EventObject.new
new_e.name = event['repo']['name']
new_e.event_count = 1
@events.push new_e
end
end
end
end
end
#sort values least to greatest
def sort
@events.sort_by! { |k| k.event_count }
@events.reverse!
end
def display
@events.each do |event|
if @counter < @options[:count].to_i then
puts event.name + " - " + event.event_count.to_s + " events"
@counter = @counter + 1
end
end
end
end
##
## The Project's Event Type Object
##################################
class EventObject
attr_accessor :name
attr_reader :event_count
def initialize
@name = ""
@event_count = 0
end
def event_count=(additional_count)
@event_count = @event_count + additional_count
end
end
##
## Main application section
##################################
if !ARGV.include? File.basename(__FILE__)
gh = Gh_archive.new
if gh.options[:before] then
gh.connect
gh.sort
gh.display
end
end
require_relative 'gh_repo_stats'
describe Gh_archive do
before do
ARGV.push("--event","PushEvent","--count","30","--before","2015-01-11T16:00:00Z","--after","2015-01-11T16:00:00Z")
@gh = Gh_archive.new
@gh.connect
@gh.sort
@gh.display
end
describe "parameters" do
it "are correctly set for event type" do
expect(@gh.options).to include(:event_name)
expect(@gh.options[:event_name]).to eq("PushEvent")
end
it "are correctly set for count" do
expect(@gh.options).to include(:count)
expect(@gh.options[:count]).to eq("30")
end
it "are correctly set for before_date" do
expect(@gh.options).to include(:before)
expect(@gh.options[:before]).to eq("2015-01-11T16:00:00Z")
end
it "are correctly set for after_date" do
expect(@gh.options).to include(:after)
expect(@gh.options[:after]).to eq("2015-01-11T16:00:00Z")
end
end
describe "combinations" do
it "should contain at least one datetime reference" do
expect(@gh.instance_variable_get(:@combos).count).to be > 0
end
end
describe "output" do
it "quantity of output should be equal to the count parameter." do
expect(@gh.counter).to be >= @gh.options[:count].to_i
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment