Last active
November 19, 2015 22:17
-
-
Save jpmckinney/56632f96808ef1ee326d to your computer and use it in GitHub Desktop.
Prints the top users who contribute to the most civic tech repositories outside their home organization(s).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Prints the top 10 users who contribute to the most civic tech repositories | |
# outside their home organization(s). If you'd like to print the top 15, run: | |
# | |
# ruby stefan_baack.rb 15 | |
# | |
# To see the top 3 organizations to which each user contributed, run: | |
# | |
# ruby stefan_baack.rb 10 3 | |
# | |
# @see http://sbaack.com/2015/11/19/scraping-the-global-civic-tech-community-on-github-part-2.html | |
# @see https://www.dropbox.com/sh/y2dcr9dy96xg7uc/AAAobZh8G36C4gtuJfCt7z08a?dl=0 | |
require 'set' | |
require 'nokogiri' | |
# Some real organizations have multiple GitHub organizations, and some users | |
# have been employed at multiple organizations over time. | |
secondary = { | |
'chrismytton' => ['everypolitician'], | |
'danmelton' => ['Granicus'], | |
'davewhiteland' => ['everypolitician'], | |
'duncanparkes' => ['everypolitician'], | |
'pudo' => ['CodeForAfrica', 'okfde'], | |
'sebbacon' => ['mysociety'], | |
'stefanw' => ['okfn'], | |
'tmtmtmtm' => ['mysociety'], | |
'zarino' => ['everypolitician'], | |
} | |
data = File.read('github_scrape_civic-tech_2015-11-19/contributor-network_2015-11-19_11:38:55.gexf') | |
document = Nokogiri::XML(data) | |
document.remove_namespaces! | |
# Get the number of repositories contributed to per user by organization. | |
counts_by_organization = {} | |
document.xpath('//edge').each do |edge| | |
source = edge[:source] | |
organization = edge.at_xpath('.//attvalue/@value').value | |
counts_by_organization[source] ||= Hash.new(0) | |
counts_by_organization[source][organization] += 1 | |
end | |
# Eliminate the organization to which the user has contributed the most (e.g. day job). | |
counts_excluding_home_organization = {} | |
counts_by_organization.each do |source,organizations| | |
organizations.delete(organizations.max_by{|_,v| v}[0]) | |
if secondary.key?(source) | |
secondary[source].each do |organization| | |
organizations.delete(organization) | |
end | |
end | |
counts_excluding_home_organization[source] = organizations.values.reduce(0, :+) | |
end | |
# Print the top 10 contributors. | |
puts 'By the number of repositories contributed to outside their home organization(s):' | |
counts_excluding_home_organization.sort_by{|_,v| -v}.first(Integer(ARGV[0] || 10)).each do |source,count| | |
puts '%3d %s' % [count, source] | |
counts_by_organization[source].sort_by{|_,v| -v}.first(Integer(ARGV[1] || 0)).each do |organization,count| | |
puts ' %3d %s' % [count, organization] | |
end | |
end | |
puts '---' | |
puts 'By the of organizations contributed to besides their home organization(s):' | |
counts_by_organization.sort_by{|_,v| -v.size}.first(Integer(ARGV[0] || 10)).each do |source,organizations| | |
puts '%3d %s' % [organizations.size, source] | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment