periode/fetch.rb

## fetch.rb
#!/usr/bin/ruby

require 'octokit'
require 'json'
require 'date'

REPOS = ["airbnb/javascript", "standard/standard", "prettier/prettier"]
client = nil

TIME_LIMIT = DateTime.new(2020,9,1,0,0)
COMMENTS_THRESHOLD = 15

def parse_issues(repo)
  puts "\nparsing issues for #{repo}:"

  issues = JSON.parse(File.read("#{repo.sub '/', '_'}.json"))

  number_of_issues_until_time_limit = 0
  highly_commented_issues = []
  number_of_total_comments = 0

  issues.each do |issue|

    created_at = DateTime.parse(issue['created_at'])
    if created_at < TIME_LIMIT
      number_of_issues_until_time_limit += 1
    end

    if issue['comments'].to_i >= COMMENTS_THRESHOLD
      highly_commented_issues << issue
    end

    number_of_total_comments += issue['comments']
  end

  final_data = "- found #{number_of_issues_until_time_limit} issues up to 01/09/2020.
- found #{number_of_total_comments} total comments. that's about #{number_of_total_comments / number_of_issues_until_time_limit} comments per issue.
- found #{highly_commented_issues.length} highly commented issues (#{COMMENTS_THRESHOLD} comments or more)
- the top 10 most commented issues are:\n"

  # sorted = highly_commented_issues.sort_by { |issue| issue[:comments].to_i }
  sorted = highly_commented_issues.sort_by { |h| -h['comments'] }
  (0..20).each do |i|
    final_data += "- - ##{sorted[i]['number']} - #{sorted[i]['comments']} comments - #{sorted[i]['title']} - (permalink: #{sorted[i]['url']})\n"
  end

  puts final_data

  final_data += "EOF"
  File.write("#{repo.sub '/', '_'}_results.txt", final_data, mode: 'w+')
end

def fetch_issues(repo, client)
  if client.nil?

    if ARGV.length != 2
      puts "github credentials are required to increase the rate limit of their API! please provide a username and a password:\n\n
        ./fetcher.rb USERNAME ACCESS_TOKEN"
      exit
    end

    username = ARGV[0]
    token = ARGV[1]

    puts "authenticating with #{username} and #{token}"

    client = Octokit::Client.new(:login => username, :password => token)
    client.user.login

    puts "...success!"

    client.auto_paginate = true
  end

  puts "fetching #{repo}..."

  # we get issues, since the api treats PRs as a subset of issues
  issues = client.issues repo, query: { per_page: 100, state: :all }

  # need to build another data structure,
  # since github doesn't make it easy to manipulate their data offline
  # (they return an array of Sawyer::Resource objects)
  serialized = []
  issues.each do |i|
    issue = {
      id: i.id,
      created_at: i.created_at,
      url: i.html_url,
      json_url: i.url,
      number: i.number,
      state: i.state,
      title: i.title,
      user_id: i.user.id,
      user_name: i.user.login,
      user_type: i.user.type,
      comments: i.comments.to_i,
      author_association: i.author_association
    }
    puts issue
    serialized << issue
  end

  File.write("#{repo.sub '/', '_'}.json", serialized.to_json, mode: 'w+')

  parse_issues(repo)
end

REPOS.each do |repo|
  if File::exist? ("#{repo.sub '/', '_'}.json")
    parse_issues(repo)
  else
    fetch_issues(repo, client)
  end
end

# written by pierre depaz in 2020
# no rights reserved whatsoever
	#!/usr/bin/ruby

	require 'octokit'
	require 'json'
	require 'date'

	REPOS = ["airbnb/javascript", "standard/standard", "prettier/prettier"]
	client = nil

	TIME_LIMIT = DateTime.new(2020,9,1,0,0)
	COMMENTS_THRESHOLD = 15

	def parse_issues(repo)
	puts "\nparsing issues for #{repo}:"

	issues = JSON.parse(File.read("#{repo.sub '/', '_'}.json"))

	number_of_issues_until_time_limit = 0
	highly_commented_issues = []
	number_of_total_comments = 0

	issues.each do \|issue\|

	created_at = DateTime.parse(issue['created_at'])
	if created_at < TIME_LIMIT
	number_of_issues_until_time_limit += 1
	end

	if issue['comments'].to_i >= COMMENTS_THRESHOLD
	highly_commented_issues << issue
	end

	number_of_total_comments += issue['comments']
	end

	final_data = "- found #{number_of_issues_until_time_limit} issues up to 01/09/2020.
	- found #{number_of_total_comments} total comments. that's about #{number_of_total_comments / number_of_issues_until_time_limit} comments per issue.
	- found #{highly_commented_issues.length} highly commented issues (#{COMMENTS_THRESHOLD} comments or more)
	- the top 10 most commented issues are:\n"

	# sorted = highly_commented_issues.sort_by { \|issue\| issue[:comments].to_i }
	sorted = highly_commented_issues.sort_by { \|h\| -h['comments'] }
	(0..20).each do \|i\|
	final_data += "- - ##{sorted[i]['number']} - #{sorted[i]['comments']} comments - #{sorted[i]['title']} - (permalink: #{sorted[i]['url']})\n"
	end

	puts final_data

	final_data += "EOF"
	File.write("#{repo.sub '/', '_'}_results.txt", final_data, mode: 'w+')
	end

	def fetch_issues(repo, client)
	if client.nil?

	if ARGV.length != 2
	puts "github credentials are required to increase the rate limit of their API! please provide a username and a password:\n\n
	./fetcher.rb USERNAME ACCESS_TOKEN"
	exit
	end

	username = ARGV[0]
	token = ARGV[1]

	puts "authenticating with #{username} and #{token}"

	client = Octokit::Client.new(:login => username, :password => token)
	client.user.login

	puts "...success!"

	client.auto_paginate = true
	end

	puts "fetching #{repo}..."

	# we get issues, since the api treats PRs as a subset of issues
	issues = client.issues repo, query: { per_page: 100, state: :all }

	# need to build another data structure,
	# since github doesn't make it easy to manipulate their data offline
	# (they return an array of Sawyer::Resource objects)
	serialized = []
	issues.each do \|i\|
	issue = {
	id: i.id,
	created_at: i.created_at,
	url: i.html_url,
	json_url: i.url,
	number: i.number,
	state: i.state,
	title: i.title,
	user_id: i.user.id,
	user_name: i.user.login,
	user_type: i.user.type,
	comments: i.comments.to_i,
	author_association: i.author_association
	}
	puts issue
	serialized << issue
	end

	File.write("#{repo.sub '/', '_'}.json", serialized.to_json, mode: 'w+')

	parse_issues(repo)
	end

	REPOS.each do \|repo\|
	if File::exist? ("#{repo.sub '/', '_'}.json")
	parse_issues(repo)
	else
	fetch_issues(repo, client)
	end
	end

	# written by pierre depaz in 2020
	# no rights reserved whatsoever