Skip to content

Instantly share code, notes, and snippets.

@periode
Last active March 9, 2021 18:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save periode/b7240e5797933d2dbae2dea30716a841 to your computer and use it in GitHub Desktop.
Save periode/b7240e5797933d2dbae2dea30716a841 to your computer and use it in GitHub Desktop.
Uses the GitHub API to get issues data from the JS styleguide repositories
#!/usr/bin/ruby
require 'octokit'
require 'json'
require 'date'
REPOS = ["airbnb/javascript", "standard/standard", "prettier/prettier"]
client = nil
TIME_LIMIT = DateTime.new(2020,9,1,0,0)
COMMENTS_THRESHOLD = 15
def parse_issues(repo)
puts "\nparsing issues for #{repo}:"
issues = JSON.parse(File.read("#{repo.sub '/', '_'}.json"))
number_of_issues_until_time_limit = 0
highly_commented_issues = []
number_of_total_comments = 0
issues.each do |issue|
created_at = DateTime.parse(issue['created_at'])
if created_at < TIME_LIMIT
number_of_issues_until_time_limit += 1
end
if issue['comments'].to_i >= COMMENTS_THRESHOLD
highly_commented_issues << issue
end
number_of_total_comments += issue['comments']
end
final_data = "- found #{number_of_issues_until_time_limit} issues up to 01/09/2020.
- found #{number_of_total_comments} total comments. that's about #{number_of_total_comments / number_of_issues_until_time_limit} comments per issue.
- found #{highly_commented_issues.length} highly commented issues (#{COMMENTS_THRESHOLD} comments or more)
- the top 10 most commented issues are:\n"
# sorted = highly_commented_issues.sort_by { |issue| issue[:comments].to_i }
sorted = highly_commented_issues.sort_by { |h| -h['comments'] }
(0..20).each do |i|
final_data += "- - ##{sorted[i]['number']} - #{sorted[i]['comments']} comments - #{sorted[i]['title']} - (permalink: #{sorted[i]['url']})\n"
end
puts final_data
final_data += "EOF"
File.write("#{repo.sub '/', '_'}_results.txt", final_data, mode: 'w+')
end
def fetch_issues(repo, client)
if client.nil?
if ARGV.length != 2
puts "github credentials are required to increase the rate limit of their API! please provide a username and a password:\n\n
./fetcher.rb USERNAME ACCESS_TOKEN"
exit
end
username = ARGV[0]
token = ARGV[1]
puts "authenticating with #{username} and #{token}"
client = Octokit::Client.new(:login => username, :password => token)
client.user.login
puts "...success!"
client.auto_paginate = true
end
puts "fetching #{repo}..."
# we get issues, since the api treats PRs as a subset of issues
issues = client.issues repo, query: { per_page: 100, state: :all }
# need to build another data structure,
# since github doesn't make it easy to manipulate their data offline
# (they return an array of Sawyer::Resource objects)
serialized = []
issues.each do |i|
issue = {
id: i.id,
created_at: i.created_at,
url: i.html_url,
json_url: i.url,
number: i.number,
state: i.state,
title: i.title,
user_id: i.user.id,
user_name: i.user.login,
user_type: i.user.type,
comments: i.comments.to_i,
author_association: i.author_association
}
puts issue
serialized << issue
end
File.write("#{repo.sub '/', '_'}.json", serialized.to_json, mode: 'w+')
parse_issues(repo)
end
REPOS.each do |repo|
if File::exist? ("#{repo.sub '/', '_'}.json")
parse_issues(repo)
else
fetch_issues(repo, client)
end
end
# written by pierre depaz in 2020
# no rights reserved whatsoever
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment