Last active
March 9, 2021 18:27
-
-
Save periode/b7240e5797933d2dbae2dea30716a841 to your computer and use it in GitHub Desktop.
Uses the GitHub API to get issues data from the JS styleguide repositories
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
require 'octokit' | |
require 'json' | |
require 'date' | |
REPOS = ["airbnb/javascript", "standard/standard", "prettier/prettier"] | |
client = nil | |
TIME_LIMIT = DateTime.new(2020,9,1,0,0) | |
COMMENTS_THRESHOLD = 15 | |
def parse_issues(repo) | |
puts "\nparsing issues for #{repo}:" | |
issues = JSON.parse(File.read("#{repo.sub '/', '_'}.json")) | |
number_of_issues_until_time_limit = 0 | |
highly_commented_issues = [] | |
number_of_total_comments = 0 | |
issues.each do |issue| | |
created_at = DateTime.parse(issue['created_at']) | |
if created_at < TIME_LIMIT | |
number_of_issues_until_time_limit += 1 | |
end | |
if issue['comments'].to_i >= COMMENTS_THRESHOLD | |
highly_commented_issues << issue | |
end | |
number_of_total_comments += issue['comments'] | |
end | |
final_data = "- found #{number_of_issues_until_time_limit} issues up to 01/09/2020. | |
- found #{number_of_total_comments} total comments. that's about #{number_of_total_comments / number_of_issues_until_time_limit} comments per issue. | |
- found #{highly_commented_issues.length} highly commented issues (#{COMMENTS_THRESHOLD} comments or more) | |
- the top 10 most commented issues are:\n" | |
# sorted = highly_commented_issues.sort_by { |issue| issue[:comments].to_i } | |
sorted = highly_commented_issues.sort_by { |h| -h['comments'] } | |
(0..20).each do |i| | |
final_data += "- - ##{sorted[i]['number']} - #{sorted[i]['comments']} comments - #{sorted[i]['title']} - (permalink: #{sorted[i]['url']})\n" | |
end | |
puts final_data | |
final_data += "EOF" | |
File.write("#{repo.sub '/', '_'}_results.txt", final_data, mode: 'w+') | |
end | |
def fetch_issues(repo, client) | |
if client.nil? | |
if ARGV.length != 2 | |
puts "github credentials are required to increase the rate limit of their API! please provide a username and a password:\n\n | |
./fetcher.rb USERNAME ACCESS_TOKEN" | |
exit | |
end | |
username = ARGV[0] | |
token = ARGV[1] | |
puts "authenticating with #{username} and #{token}" | |
client = Octokit::Client.new(:login => username, :password => token) | |
client.user.login | |
puts "...success!" | |
client.auto_paginate = true | |
end | |
puts "fetching #{repo}..." | |
# we get issues, since the api treats PRs as a subset of issues | |
issues = client.issues repo, query: { per_page: 100, state: :all } | |
# need to build another data structure, | |
# since github doesn't make it easy to manipulate their data offline | |
# (they return an array of Sawyer::Resource objects) | |
serialized = [] | |
issues.each do |i| | |
issue = { | |
id: i.id, | |
created_at: i.created_at, | |
url: i.html_url, | |
json_url: i.url, | |
number: i.number, | |
state: i.state, | |
title: i.title, | |
user_id: i.user.id, | |
user_name: i.user.login, | |
user_type: i.user.type, | |
comments: i.comments.to_i, | |
author_association: i.author_association | |
} | |
puts issue | |
serialized << issue | |
end | |
File.write("#{repo.sub '/', '_'}.json", serialized.to_json, mode: 'w+') | |
parse_issues(repo) | |
end | |
REPOS.each do |repo| | |
if File::exist? ("#{repo.sub '/', '_'}.json") | |
parse_issues(repo) | |
else | |
fetch_issues(repo, client) | |
end | |
end | |
# written by pierre depaz in 2020 | |
# no rights reserved whatsoever |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment