Skip to content

Instantly share code, notes, and snippets.

@k9ert
Forked from gcmurphy/github_pom.rb
Last active August 29, 2015 14:10
Show Gist options
  • Save k9ert/931b5ee8412549e28ceb to your computer and use it in GitHub Desktop.
Save k9ert/931b5ee8412549e28ceb to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'pp'
require 'uri'
require 'net/http'
require 'octokit'
require 'nokogiri'
require 'celluloid'
require 'mongo'
require 'logger'
logger = Logger.new(STDOUT)
module Enumerable
def pmap(&block)
futures = map { |elem| Celluloid::Future.new(elem, &block)}
futures.map { |future| future.value }
end
end
def fetch_raw(uri)
page = Nokogiri::HTML(Net::HTTP.get(uri))
page.css('a#raw-url').each do |link|
redirect = Nokogiri::HTML(Net::HTTP.get(URI("https://github.com#{link['href']}")))
return redirect.css('a').first['href']
end
return nil
end
def fetch_pom(uri)
page = Nokogiri::HTML(Net::HTTP.get(uri))
page.css('a').each do |link|
if link['href'].to_s.end_with?('/pom.xml')
return fetch_raw(URI("https://github.com#{link['href']}"))
end
end
return nil
end
def search_github(file, pg)
client = Octokit::Client.new
client.search_repositories(file, {:start_page => pg})
end
gav = ['groupId', 'artifactId', 'version']
@mongo = MongoClient.new('localhost', 27017)
@db = @client['github']
@collection = @db['poms']
pg = 1
while repos = search_github('pom.xml', pg) do
log.info("Processing search result page #{pg}")
urls = repos.pmap { |repo| fetch_pom( URI(repo.url)) }
urls.select! {|x| x != nil}
urls.each do |url|
log.info("Found pom at #{url}")
pom = Nokogiri::XML(Net::HTTP.get(URI(url)))
pom.remove_namespaces!
entry = { :url => url }
pom.xpath("//dependency").each do |dep|
dep.children().each do |child|
entry[child.name] = child.content if gav.include?(child.name)
end
end
@collection.insert(entry) if (entry & gav).any?
end
pg += 1
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment