ahawkins/backpor-cycle-times.rb

## backpor-cycle-times.rb
#!/usr/bin/env ruby
# vim: set expandtab sts=2 tw=2 sw=2:

$stdout.sync = true
$stderr.sync = true

require 'octokit'
require 'dalli'
require 'json'
require 'time'
require 'csv'
require 'pp'
require 'irb'
require 'forwardable'

GOOGLE_SHEET_TIMESTAMP_FORMAT = '%Y-%m-%d %H:%M:%S'

CYCLE_TIME_EVENTS = [
  'head_ref_force_pushed',
  'committed',
  'reviewed',
  'review_requested',
  'deployed',
  'merged'
]

PullRequest = Struct.new(:number, :html_url, :base, :head, :user, :merged_at, :merge_commit_sha, :title, :created_at, keyword_init: true) do
  def merged?
    !!merged_at
  end
end

class Stats
  extend Forwardable

  def_delegators :stats, :to_s, :inspect

  attr_reader :stats

  def initialize
    @stats = { }
  end

  def increment(key)
    @stats[key] = @stats.fetch(key, 0) + 1
  end
end

stats = Stats.new

cache = Dalli::Client.new('localhost:11211', {
  namespace: :cycle_time,
  compress: true
})

github = Octokit::Client.new({
  access_token: ENV.fetch('SKILLSHARE_GITHUB_ACCESS_TOKEN')
})

repos = [
  'skillshare',
  # 'chatops',
  'nginx-router',
  'api-gateway',
  'skillshare-api',
  'skillshare-web',
  'api-gateway',
  'analytics-router'
]

repos.each do |name|
  pull_requests = cache.fetch([ name, :pulls, :v6 ], ttl = 60 * 60 * 4) do
    $stderr.puts("Fetching PRs for #{name}")

    list = github.pull_requests("skillshare/#{name}", {
      state: 'closed',
      base: 'master',
      per_page: 100
    })

    while github.last_response.rels[:next] do
      $stderr.puts("Fetching #{github.last_response.rels[:next].href}")
      list.concat(github.get(github.last_response.rels[:next].href))
    end

    # XXX: Create a drastically smaller object to fit into Memcached
    list.map do |item|
      PullRequest.new({
        title: item.title,
        number: item.number,
        html_url: item.html_url,
        base: item.base.label.split(':')[1],
        head: item.head.label.split(':')[1],
        user: item.user.login,
        created_at: item.created_at,
        merged_at: item.merged_at,
        merge_commit_sha: item.merge_commit_sha
      })
    end
  end

  pull_requests.each do |pr|
    stats.increment(:processed)

    if !pr.merged?
      stats.increment(:skip_unmerged)
      $stderr.puts("Skipping unmerged #{pr.title} #{pr.html_url}")
      next
    end

    # if pr.title =~ /no merge/i
    #   stats.increment(:skip_no)
    #   $stderr.puts("Skipping NO MERGE #{pr.title} #{pr.html_url}")
    #   next
    # end

    $stderr.puts("Processing #{pr.title} #{pr.html_url}")

    timeline = cache.fetch([ name, pr.number, :timeline ]) do
      $stderr.puts("Loading timeline for #{pr.html_url}")

      list = github.issue_timeline("skillshare/#{name}", pr.number, accept: 'application/vnd.github.mockingbird-preview')

      while github.last_response.rels[:next] do
        $stderr.puts("Fetching #{github.last_response.rels[:next].href}")
        list.concat(github.get(github.last_response.rels[:next].href, accept: 'application/vnd.github.mockingbird-preview'))
      end

      list
    end

    # XXX: Sanitize the timeline by removing the stuff we don't need and
    # sorting it. It's suprising that the data is not sorted out of the
    # API...ya know because it's a timeline.
    timeline = timeline.select do |entry|
      CYCLE_TIME_EVENTS.include?(entry.event)
    end.sort do |a, b|
      (a.submitted_at || a.created_at || a.author.date) <=> (b.submitted_at || b.created_at || b.author.date)
    end

    timestamps = [ ]

    # XXX: find the first event that relates to a commit
    commit_event = timeline.find do |entry|
      entry.event == 'committed' || entry.event == 'head_ref_force_pushed'
    end

    commit_timestamp = commit_event.event == 'committed' ? commit_event.author.date : commit_event.created_at

    # XXX: It seems that these events may be chronological after the PR is
    # opened. This may(?) occur due to force pushing and other things. Either
    # way, the first timestamp must be the chronologically earliest point in
    # the timeline. So pick the earlier one between the PR and commit
    # timestamps
    if pr.created_at < commit_timestamp
      $stderr.puts("Preferring PR timestamps over commit timestamp for #{pr.html_url}")
      stats.increment(:warn_commit_timestamp_out_of_order)
      timestamps << pr.created_at
    else
      timestamps << commit_timestamp
    end

    # XXX: find "ready for review" or "review requested" event after the first commit
    ready_for_review = timeline.find do |entry|
      entry.event == 'ready_for_review' || entry.event == 'review_requested' && entry.created_at >= timestamps.last
    end

    # XXX: If the PR was opened as a draft then it will have a ready for review event,
    # otherwise use the first commit timestamps since there's no way to determine
    # when the code entered the ready state.
    if ready_for_review
      timestamps << ready_for_review.created_at
    else
      timestamps << timestamps.last
    end

    # XXX: Now find the first review after the "ready for review" timestamp.
    # It's key to filter on events after the previous event because code reviews may
    # happen before they're explicitly requested. So, we care about the first code
    # review _after_ the code is marked as ready for review.
    first_review_event = timeline.find do |entry|
      entry.event == 'reviewed' && entry.submitted_at >= timestamps.last
    end

    if first_review_event
      timestamps << first_review_event.submitted_at
    else
      $stderr.puts("#{pr.html_url} no review events. skipping")
      stats.increment(:skip_no_reviews)
      next
    end

    required_reviews = 2

    # XXX: Find all the approved reviews laster in the timeline
    approved_review_events = timeline.select do |entry|
      entry.event === 'reviewed' && entry.state == 'approved' && entry.submitted_at >= timestamps.last
    end

    if approved_review_events.length < required_reviews
      $stderr.puts("#{pr.html_url} not recieve required approvals. Skipping")
      stats.increment(:skip_no_approved_reviews)
      next
    else
      timestamps <<  approved_review_events[required_reviews - 1].submitted_at
    end

    # XXX: Find all the deployments after the approved reviews
    deploy_events = timeline.select do |entry|
      entry.event == 'deployed' && entry.created_at >= timestamps.last
    end

    if deploy_events.empty?
      $stderr.puts("#{pr.html_url} was never deployed. Skipping.")
      stats.increment(:skip_never_deployed)
      next
    end

    deployments = cache.fetch([ name, pr.head, :deployments, :v2 ]) do
      $stderr.puts("Fetching deployments for #{name} #{pr.head}")

      list = github.deployments("skillshare/#{name}", {
        ref: pr.head,
        environment: :prod,
      })

      while github.last_response.rels[:next] do
        $stderr.puts("Fetching #{github.last_response.rels[:next].href}")
        list.concat(github.get(github.last_response.rels[:next].href))
      end

      list.sort_by(&:created_at)
    end

    if deployments.empty?
      $stderr.puts("No prod deployments found for #{pr.html_url}")
      stats.increment(:skip_no_prod_deploys)
      next
    end

    deployment = deployments.find do |deployment|
      deployment.created_at >= timestamps.last
    end

    if deployment.nil?
      $stderr.puts("No prod deployment found for #{pr.html_url}")
      stats.increment(:skip_no_matching_prod)
      next
    end

    timestamps << deployment.created_at

    merge_event = timeline.find do |entry|
      entry.event == 'merged'
    end

    timestamps << merge_event.created_at

    raise "#{timestamps.length} calculated. 6 required." unless timestamps.length == 6

    in_order = timestamps.reject(&:nil?).sort
    calculated = timestamps.reject(&:nil?)

    if calculated != in_order
      $stderr.puts("Broken timeline: #{timeline.map(&:event)}")
      raise "#{timestamps} are non-chronicological for #{pr.html_url}"
    end

    stats.increment(:ok)
    $stdout.puts([
      name,
      pr.base,
      pr.head,
      pr.user,
      timestamps[0].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT),                       # First Commit
      timestamps[1].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT),                       # Opened
      timestamps[2] ? timestamps[2].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT) : nil, # First Review
      timestamps[3] ? timestamps[3].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT) : nil, # Approved
      timestamps[4] ? timestamps[4].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT) : nil, # Deployed
      timestamps[5].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT),                       # Merged
      pr.html_url
    ].to_csv)
  end
end

$stderr.puts(stats)
	#!/usr/bin/env ruby
	# vim: set expandtab sts=2 tw=2 sw=2:

	$stdout.sync = true
	$stderr.sync = true

	require 'octokit'
	require 'dalli'
	require 'json'
	require 'time'
	require 'csv'
	require 'pp'
	require 'irb'
	require 'forwardable'

	GOOGLE_SHEET_TIMESTAMP_FORMAT = '%Y-%m-%d %H:%M:%S'

	CYCLE_TIME_EVENTS = [
	'head_ref_force_pushed',
	'committed',
	'reviewed',
	'review_requested',
	'deployed',
	'merged'
	]

	PullRequest = Struct.new(:number, :html_url, :base, :head, :user, :merged_at, :merge_commit_sha, :title, :created_at, keyword_init: true) do
	def merged?
	!!merged_at
	end
	end

	class Stats
	extend Forwardable

	def_delegators :stats, :to_s, :inspect

	attr_reader :stats

	def initialize
	@stats = { }
	end

	def increment(key)
	@stats[key] = @stats.fetch(key, 0) + 1
	end
	end

	stats = Stats.new

	cache = Dalli::Client.new('localhost:11211', {
	namespace: :cycle_time,
	compress: true
	})

	github = Octokit::Client.new({
	access_token: ENV.fetch('SKILLSHARE_GITHUB_ACCESS_TOKEN')
	})

	repos = [
	'skillshare',
	# 'chatops',
	'nginx-router',
	'api-gateway',
	'skillshare-api',
	'skillshare-web',
	'api-gateway',
	'analytics-router'
	]

	repos.each do \|name\|
	pull_requests = cache.fetch([ name, :pulls, :v6 ], ttl = 60 * 60 * 4) do
	$stderr.puts("Fetching PRs for #{name}")

	list = github.pull_requests("skillshare/#{name}", {
	state: 'closed',
	base: 'master',
	per_page: 100
	})

	while github.last_response.rels[:next] do
	$stderr.puts("Fetching #{github.last_response.rels[:next].href}")
	list.concat(github.get(github.last_response.rels[:next].href))
	end

	# XXX: Create a drastically smaller object to fit into Memcached
	list.map do \|item\|
	PullRequest.new({
	title: item.title,
	number: item.number,
	html_url: item.html_url,
	base: item.base.label.split(':')[1],
	head: item.head.label.split(':')[1],
	user: item.user.login,
	created_at: item.created_at,
	merged_at: item.merged_at,
	merge_commit_sha: item.merge_commit_sha
	})
	end
	end

	pull_requests.each do \|pr\|
	stats.increment(:processed)

	if !pr.merged?
	stats.increment(:skip_unmerged)
	$stderr.puts("Skipping unmerged #{pr.title} #{pr.html_url}")
	next
	end

	# if pr.title =~ /no merge/i
	# stats.increment(:skip_no)
	# $stderr.puts("Skipping NO MERGE #{pr.title} #{pr.html_url}")
	# next
	# end

	$stderr.puts("Processing #{pr.title} #{pr.html_url}")

	timeline = cache.fetch([ name, pr.number, :timeline ]) do
	$stderr.puts("Loading timeline for #{pr.html_url}")

	list = github.issue_timeline("skillshare/#{name}", pr.number, accept: 'application/vnd.github.mockingbird-preview')

	while github.last_response.rels[:next] do
	$stderr.puts("Fetching #{github.last_response.rels[:next].href}")
	list.concat(github.get(github.last_response.rels[:next].href, accept: 'application/vnd.github.mockingbird-preview'))
	end

	list
	end

	# XXX: Sanitize the timeline by removing the stuff we don't need and
	# sorting it. It's suprising that the data is not sorted out of the
	# API...ya know because it's a timeline.
	timeline = timeline.select do \|entry\|
	CYCLE_TIME_EVENTS.include?(entry.event)
	end.sort do \|a, b\|
	(a.submitted_at \|\| a.created_at \|\| a.author.date) <=> (b.submitted_at \|\| b.created_at \|\| b.author.date)
	end

	timestamps = [ ]

	# XXX: find the first event that relates to a commit
	commit_event = timeline.find do \|entry\|
	entry.event == 'committed' \|\| entry.event == 'head_ref_force_pushed'
	end

	commit_timestamp = commit_event.event == 'committed' ? commit_event.author.date : commit_event.created_at

	# XXX: It seems that these events may be chronological after the PR is
	# opened. This may(?) occur due to force pushing and other things. Either
	# way, the first timestamp must be the chronologically earliest point in
	# the timeline. So pick the earlier one between the PR and commit
	# timestamps
	if pr.created_at < commit_timestamp
	$stderr.puts("Preferring PR timestamps over commit timestamp for #{pr.html_url}")
	stats.increment(:warn_commit_timestamp_out_of_order)
	timestamps << pr.created_at
	else
	timestamps << commit_timestamp
	end

	# XXX: find "ready for review" or "review requested" event after the first commit
	ready_for_review = timeline.find do \|entry\|
	entry.event == 'ready_for_review' \|\| entry.event == 'review_requested' && entry.created_at >= timestamps.last
	end

	# XXX: If the PR was opened as a draft then it will have a ready for review event,
	# otherwise use the first commit timestamps since there's no way to determine
	# when the code entered the ready state.
	if ready_for_review
	timestamps << ready_for_review.created_at
	else
	timestamps << timestamps.last
	end

	# XXX: Now find the first review after the "ready for review" timestamp.
	# It's key to filter on events after the previous event because code reviews may
	# happen before they're explicitly requested. So, we care about the first code
	# review _after_ the code is marked as ready for review.
	first_review_event = timeline.find do \|entry\|
	entry.event == 'reviewed' && entry.submitted_at >= timestamps.last
	end

	if first_review_event
	timestamps << first_review_event.submitted_at
	else
	$stderr.puts("#{pr.html_url} no review events. skipping")
	stats.increment(:skip_no_reviews)
	next
	end

	required_reviews = 2

	# XXX: Find all the approved reviews laster in the timeline
	approved_review_events = timeline.select do \|entry\|
	entry.event === 'reviewed' && entry.state == 'approved' && entry.submitted_at >= timestamps.last
	end

	if approved_review_events.length < required_reviews
	$stderr.puts("#{pr.html_url} not recieve required approvals. Skipping")
	stats.increment(:skip_no_approved_reviews)
	next
	else
	timestamps << approved_review_events[required_reviews - 1].submitted_at
	end

	# XXX: Find all the deployments after the approved reviews
	deploy_events = timeline.select do \|entry\|
	entry.event == 'deployed' && entry.created_at >= timestamps.last
	end

	if deploy_events.empty?
	$stderr.puts("#{pr.html_url} was never deployed. Skipping.")
	stats.increment(:skip_never_deployed)
	next
	end

	deployments = cache.fetch([ name, pr.head, :deployments, :v2 ]) do
	$stderr.puts("Fetching deployments for #{name} #{pr.head}")

	list = github.deployments("skillshare/#{name}", {
	ref: pr.head,
	environment: :prod,
	})

	while github.last_response.rels[:next] do
	$stderr.puts("Fetching #{github.last_response.rels[:next].href}")
	list.concat(github.get(github.last_response.rels[:next].href))
	end

	list.sort_by(&:created_at)
	end

	if deployments.empty?
	$stderr.puts("No prod deployments found for #{pr.html_url}")
	stats.increment(:skip_no_prod_deploys)
	next
	end

	deployment = deployments.find do \|deployment\|
	deployment.created_at >= timestamps.last
	end

	if deployment.nil?
	$stderr.puts("No prod deployment found for #{pr.html_url}")
	stats.increment(:skip_no_matching_prod)
	next
	end

	timestamps << deployment.created_at

	merge_event = timeline.find do \|entry\|
	entry.event == 'merged'
	end

	timestamps << merge_event.created_at

	raise "#{timestamps.length} calculated. 6 required." unless timestamps.length == 6

	in_order = timestamps.reject(&:nil?).sort
	calculated = timestamps.reject(&:nil?)

	if calculated != in_order
	$stderr.puts("Broken timeline: #{timeline.map(&:event)}")
	raise "#{timestamps} are non-chronicological for #{pr.html_url}"
	end

	stats.increment(:ok)
	$stdout.puts([
	name,
	pr.base,
	pr.head,
	pr.user,
	timestamps[0].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT), # First Commit
	timestamps[1].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT), # Opened
	timestamps[2] ? timestamps[2].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT) : nil, # First Review
	timestamps[3] ? timestamps[3].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT) : nil, # Approved
	timestamps[4] ? timestamps[4].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT) : nil, # Deployed
	timestamps[5].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT), # Merged
	pr.html_url
	].to_csv)
	end
	end

	$stderr.puts(stats)