Skip to content

Instantly share code, notes, and snippets.

@SunDi3yansyah
Last active November 29, 2018 17:51
Show Gist options
  • Save SunDi3yansyah/1446a7a259eb3926c4df4e73b3cc0cdd to your computer and use it in GitHub Desktop.
Save SunDi3yansyah/1446a7a259eb3926c4df4e73b3cc0cdd to your computer and use it in GitHub Desktop.
Find Gems on Repository with Scraping

Find Gems on Repository with Scraping

Get Gems from Local

~/ruby via 💎 v2.5.3 
➜ gem list --no-versions

then put in array gems.rb

~/ruby via 💎 v2.5.3 
➜ ruby get-repo-gems.rb

screenshot for Find Gems on Repository with Scraping

GEMS = ['active_median', 'active_model_serializers', 'annotate', 'api-pagination', 'awesome_print', 'aws-sdk', 'barby', 'bcrypt', 'betterlorem', 'bigdecimal', 'bootsnap', 'bootstrap-sass', 'bundler-audit', 'byebug', 'capistrano-rails', 'capybara', 'chartkick', 'chromedriver-helper', 'clipboard-rails', 'cloudinary', 'cocoon', 'coderay', 'coffee-rails', 'data-confirm-modal', 'data-confirm-modal-semantic-ui', 'device_detector', 'devise', 'dotenv-rails', 'exception_notification', 'faker', 'faye', 'font-awesome-sass', 'friendly_id', 'groupdate', 'httparty', 'jbuilder', 'jquery-rails', 'jwt', 'kaminari', 'listen', 'mechanize', 'meta_request', 'mini_magick', 'mini_racer', 'mysql2', 'omniauth', 'omniauth-facebook', 'omniauth-github', 'omniauth-gitlab', 'omniauth-google-oauth2', 'omniauth-linkedin', 'omniauth-oauth2', 'omniauth-twitter', 'paperclip', 'pg', 'puma', 'pusher', 'rack-cache', 'rack-cors', 'rails', 'rails-controller-testing', 'rails-i18n', 'rails-timeago', 'redcarpet', 'redis', 'render_sync', 'request-log-analyzer', 'resque', 'resque-web', 'rest-client', 'rqrcode', 'rspec-rails', 'sass-rails', 'sdoc', 'seed_dump', 'selenium-webdriver', 'sentry-raven', 'simplecov', 'sitemap_generator', 'slack-notifier', 'spring', 'spring-watcher-listen', 'sprockets-rails', 'sqlite3', 'telegram-bot', 'therubyracer', 'thin', 'toastr-rails', 'turbolinks', 'turnout', 'tzinfo-data', 'uglifier', 'validates_email_format_of', 'veritrans', 'web-console', 'webpacker', 'whenever', 'wicked_pdf', 'will_paginate', 'wkhtmltopdf-binary']
# = Find Gems on Repository
# Author: Cahyadi Triyansyah <sundi3yansyah@gmail.com>
beginning_time = Time.now
require 'nokogiri'
require 'open-uri'
require 'uri'
require 'cgi'
require 'terminal-table'
require 'awesome_print'
require_relative 'gems'
def colorize(text, color_code)
"\e[#{color_code}m#{text}\e[0m"
end
def red(text)
colorize(text, 31)
end
def find_gems
result = []
GEMS.map {|g|
begin
rubygems = Nokogiri::HTML(open("https://rubygems.org/gems/#{g}"))
iframe = rubygems.css('iframe.gem__ghbtn').map {|iframe|
user = CGI::parse(URI(iframe.attributes['src'].value).query)['user'].join
repo = CGI::parse(URI(iframe.attributes['src'].value).query)['repo'].join
result << [g, "https://rubygems.org/gems/#{g}", "https://github.com/#{user}/#{repo}"]
}
rescue OpenURI::HTTPError => exception
begin
find_on_github = Nokogiri::HTML(open("https://github.com/search?q=#{g}"))
anchors = find_on_github.css('a.v-align-middle').map {|anchor|
result << [g, red('404 Not Found'), "https://github.com#{anchor.attributes['href'].value} (maybe)"]
}
rescue OpenURI::HTTPError => exception_github
result << [red(g), red(exception), red(exception_github)]
end
end
}
puts Terminal::Table.new title: 'Find Gems on Repository', headings: ['Name', 'Rubygems Repository', 'GitHub Repository'], rows: result
end
find_gems
end_time = Time.now
puts "Time elapsed (#{(((end_time - beginning_time) * 1000) / 1000).to_s[0..3]} sec)"
# = Find Gems Specification on Repository (local version)
# Author: Cahyadi Triyansyah <sundi3yansyah@gmail.com>
beginning_time = Time.now
require 'terminal-table'
require 'awesome_print'
def colorize(text, color_code)
"\e[#{color_code}m#{text}\e[0m"
end
def red(text)
colorize(text, 31)
end
def find_gems
result = []
`gem list --no-versions`.split(' ').map {|g|
homepage = `gem spec #{g} | grep homepage:`.delete("\n").gsub('homepage: ', '').gsub('homepage_uri: ', '').split(' ').first
if homepage == "''" || homepage == nil
homepage = red('404 Not Found')
end
result << [g, homepage]
}
puts Terminal::Table.new title: 'Find Gems Specification on Repository (local version)', headings: ['Name', 'Homepage or Source GitHub Repository'], rows: result
end
find_gems
end_time = Time.now
puts "Time elapsed (#{(((end_time - beginning_time) * 1000) / 1000).to_s[0..3]} sec)"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment