Created
October 21, 2012 15:13
-
-
Save brainopia/3927237 to your computer and use it in GitHub Desktop.
github crawler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'delegate' | |
class Task | |
extend Forwardable | |
def_delegators 'self.class', :collection, :indication | |
class << self | |
def queues | |
[OrgRepos, Members, Orgs, Projects, Users, Stars, Contributions] | |
end | |
def next | |
queues.inject(false) do |task, queue| | |
task or queue.fetch | |
end | |
end | |
def fetch | |
task = new_tasks.modify indication(:process) | |
new task if task | |
end | |
def new_tasks | |
collection.without indicator | |
end | |
def collection | |
mongodb.send collection_name | |
end | |
def collection_name(value=nil) | |
if value | |
@collection_name = value | |
else | |
@collection_name ||= name.split('::').last.downcase | |
end | |
end | |
def indicator(value=nil) | |
if value | |
@indicator = value | |
else | |
@indicator ||= :state | |
end | |
end | |
def indication(value, additional={}) | |
{ '$set' => { indicator => value.to_s }.merge!(additional) } | |
end | |
end | |
def initialize(doc) | |
@doc = doc | |
end | |
def target | |
@doc['_id'] | |
end | |
def finished(info={}) | |
indication :ready, info | |
end | |
def wrap_array(object) | |
object.is_a?(Array) ? object : [object] | |
end | |
def scope | |
collection.find(_id: target) | |
end | |
def pause | |
scope.update indication(:pause) | |
end | |
def extract_repos(type=:repos) | |
repos = wrap_array Github.send type, target | |
repos.each do |repo| | |
mongodb.projects.upsert repo['full_name'] | |
end | |
end | |
class Projects < Task | |
def perform | |
info = Github.repo target | |
scope.update finished info | |
if info['parent'] | |
collection.upsert info['parent']['full_name'] | |
end | |
end | |
end | |
class Users < Task | |
def perform | |
extract_repos | |
info = Github.user target | |
scope.update finished info | |
end | |
end | |
class Stars < Task | |
collection_name :users | |
indicator :star_state | |
def perform | |
extract_repos :starred | |
scope.update finished | |
end | |
end | |
class Contributions < Task | |
collection_name :projects | |
indicator :contribute_state | |
def perform | |
return scope.update indication :wait unless @doc['state'] == 'ready' | |
return scope.update indication :fork if @doc['fork'] | |
contributors = wrap_array Github.contributors target | |
debug "count: #{contributors.size}" | |
contributors.each do |it| | |
next unless it['login'] | |
next if mongodb.users.find(_id: it['login'], 'commits.to' => target).one | |
commits = { commits: { to: target, count: it['contributions'] }} | |
mongodb.users.upsert it['login'], '$push' => commits | |
end | |
scope.update finished | |
end | |
end | |
class Orgs < Task | |
def perform | |
info = Github.org target | |
members = extract_members | |
scope.update finished info.merge!(members: members) | |
end | |
private | |
def extract_members | |
members = wrap_array Github.org_members target | |
members.each do |it| | |
mongodb.users.upsert it['login'] | |
end | |
end | |
end | |
class Members < Task | |
collection_name :users | |
indicator :member_state | |
def perform | |
orgs = wrap_array Github.orgs target | |
orgs.each do |it| | |
mongodb.orgs.upsert it['login'] | |
end | |
scope.update finished | |
end | |
end | |
class OrgRepos < Task | |
collection_name :orgs | |
indicator :repo_state | |
def perform | |
extract_repos :org_repos | |
scope.update finished | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Worker | |
def self.start | |
new.run | |
end | |
def run | |
loop { perform_task } | |
end | |
def perform_task | |
task = Task.next | |
retryable_debug do | |
task ? task.debug.perform : sleep_debug(1) | |
end | |
rescue Octokit::Error, Faraday::Error::ClientError | |
task.pause | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment