justinabrahms/query-github-pr-stats.org

## query-github-pr-stats.org

      
    Raw
  

              query-github-pr-stats.org
            
          
    github pull request querying

I’m querying review statuses for team so they can get a sense of who is doing all of the PR approvals and how long things are taking.
URL: https://github-url-here/api/graphql
  Header: Authorization: Bearer
query {
  search(query: "org:ORGHERE is:pr created:>2021-01-01", type: ISSUE, last: 50) {
    edges {
      node {
        ... on PullRequest {
          url
          title
          createdAt
          author { login }

          reviewDecision

          reviews(first:100) {
            edges {
              node {
                author { login }
                publishedAt
                state

            	}
            }
          }
        }
      }
    }
  }
}
Script to query the results
from datetime import datetime, timedelta
from collections import defaultdict
import json

def toDatetime(s):
    return datetime.strptime(s, "%Y-%m-%dT%H:%M:%S%z")


reviewers = defaultdict(list)
oldestPr = None

with open('/tmp/data.json') as f:
    data = json.loads(''.join(f.readlines()))


    for pr in data['data']['search']['edges']:
        created = toDatetime(pr['node']['createdAt'])
        if oldestPr is None:
            oldestPr = created
        oldestPr = min(oldestPr, created)
        oldestByPerson = defaultdict(list)
        for review in pr['node']['reviews']['edges']:
            person = review['node']['author']['login']
            reviewedAt = toDatetime(review['node']['publishedAt'])
            oldestByPerson[person].append(reviewedAt-created)
        oldestByPerson = dict(oldestByPerson)
        for person, times in oldestByPerson.items():
            # if min(times).total_seconds() > 250000:
            #     print(f"WARNING: Something looks off. Should {pr['node']['title']} really have taken {min(times).total_seconds()} to review?")
            reviewers[person].append(min([x.total_seconds() for x in times]))
    print(f"Total data set is {len(data['data']['search']['edges'])} PRs since {oldestPr}")


import statistics as stats
sorted_reviewers = sorted(reviewers.items(), key=lambda x: -len(x[1]))
def print_time(td):

    t = td.total_seconds()
    days = int(t/(60*60*24))
    t -= days * 60 * 60 * 24
    hours = int(t/(60*60))
    t -= hours * 60 * 60
    minutes = int(t/60)
    out = []

    if days != 0:
        out.append('%sd' % days)
    if days != 0 or hours != 0:
        out.append('%sh' % hours)
    if days != 0 or hours != 0 or minutes != 0:
        out.append('%sm' % minutes)

    return ' '.join(out)

for reviewer, times in sorted_reviewers:
    if len(times) < 2:
        avg = 'N/A'
        stdev = 'N/A'
    else:
        avg = print_time(timedelta(seconds=stats.mean(times)))
        stdev = print_time(timedelta(seconds=stats.stdev(times)))
    print(f"{reviewer}:  {len(times)} reviews.  Time to first review: (avg: {avg}; stddev {stdev})")