Skip to content

Instantly share code, notes, and snippets.

@j450h1
Forked from alysonla/active-issues.sql
Created April 28, 2017 07:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save j450h1/fc1b1b7b843130a1e79eecc12ac6b6b5 to your computer and use it in GitHub Desktop.
Save j450h1/fc1b1b7b843130a1e79eecc12ac6b6b5 to your computer and use it in GitHub Desktop.
Queries that power the open source section of the 2016 Octoverse report https://octoverse.github.com/
-- Active issues
-- Count of total active issues in the specified time frame
-- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
COUNT(DISTINCT JSON_EXTRACT_SCALAR(events.payload, '$.issue.id')) AS events_issue_count
FROM (SELECT * FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31')))
AS events
-- 10,723,492 active issues
-- Active Orgs
-- Count of total orgs in which any event was triggered (https://developer.github.com/v3/activity/events/types/) in the specified time frame
-- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
COUNT(DISTINCT events.org.id) AS events_org_count
FROM (SELECT * FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31')))
AS events
-- 331,797 total active organizations
-- Active repositories
-- Count of total repositories in which any event was triggered (https://developer.github.com/v3/activity/events/types/) in the specified time frame
-- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
COUNT(DISTINCT events.repo.id) AS events_repo_count
FROM (SELECT * FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31')))
AS events
-- 19,410,242 total active repositories
-- Active users
-- Count of total users which triggered an event (https://developer.github.com/v3/activity/events/types/) in the specified time frame
-- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
COUNT(DISTINCT events.actor.id) AS events_actor_count
FROM (SELECT * FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31')))
AS events
-- 5,865,497 total active users
--- Total Language Count
--- Count of all recognized programming languages on github
--- Source: GitHub Public Data Set https://cloud.google.com/bigquery/public-data/github
SELECT
COUNT(DISTINCT language.name)
FROM
[bigquery-public-data:github_repos.languages]
-- 316 total languages
-- Most Popular Languages by Pull requests
-- Top programming languages by number of opened pull requests in the specified time frame
-- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
JSON_EXTRACT_SCALAR(events.payload, '$.pull_request.base.repo.language') AS events_repo_language,
COUNT(CASE WHEN (events.type = 'PullRequestEvent') THEN 1 ELSE NULL END) AS events_count_pull_request
FROM (
SELECT
*
FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31'))) AS events
WHERE
JSON_EXTRACT_SCALAR(events.payload, '$.action') = 'opened'
AND JSON_EXTRACT_SCALAR(events.payload, '$.pull_request.base.repo.language') IS NOT NULL
GROUP BY
1
ORDER BY
2 DESC
LIMIT
25
-- events_repo_language events_count_pull_request
-- JavaScript 1604219
-- Java 763783
-- Python 744045
-- Ruby 740610
-- HTML 546211
-- PHP 478153
-- C++ 330259
-- CSS 271782
-- C# 229985
-- C 202295
-- Go 188121
-- Shell 143071
-- Objective-C 75478
-- Scala 70216
-- Swift 62284
-- TypeScript 55587
-- CoffeeScript 46216
-- Rust 36874
-- PowerShell 32207
-- Perl 32134
-- Lua 25742
-- R 24909
-- Makefile 24803
-- DM 23531
-- TeX 21782
SELECT
JSON_EXTRACT_SCALAR(events.payload, '$.pull_request.base.repo.language') AS events_repo_language,
COUNT(CASE WHEN (events.type = 'PullRequestEvent') THEN 1 ELSE NULL END) AS events_count_pull_request
FROM (
SELECT
*
FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2014-09-01'),TIMESTAMP('2015-08-31'))) AS events
WHERE
JSON_EXTRACT_SCALAR(events.payload, '$.action') = 'opened'
AND JSON_EXTRACT_SCALAR(events.payload, '$.pull_request.base.repo.language') IS NOT NULL
GROUP BY
1
ORDER BY
2 DESC
LIMIT
25
-- Repository Language Events Count Pull Request
-- JavaScript 813911
-- Python 483174
-- Java 467928
-- Ruby 447256
-- PHP 335357
-- C++ 223951
-- CSS 199677
-- HTML 159518
-- C 137242
-- C# 122476
-- Go 97420
-- Shell 81393
-- Objective-C 55272
-- Scala 45729
-- CoffeeScript 39161
-- Perl 29687
-- Rust 29199
-- Lua 18466
-- Swift 17193
-- TypeScript 15876
-- R 15576
-- DM 15423
-- Clojure 15046
-- Haskell 14912
-- TeX 13797
-- Top licenses used on open source projects
-- List of the most commonly used licenses on GitHub open source projects
-- Source: GitHub Public Data Set https://cloud.google.com/bigquery/public-data/github
SELECT
license,
COUNT(*)
FROM
[bigquery-public-data:github_repos.licenses]
GROUP BY
1
ORDER BY
2 DESC
LIMIT
5
--- Organizations with the most Contributors
--- List of organizations that have the most unique users contributing via commits or opening or commenting on an issue or PR in the specified time frame
--- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
events.org.login AS events_org_login,
COUNT(DISTINCT events.actor.id) AS events_actor_count
FROM (
SELECT
*
FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31'))) AS events
WHERE
events.type = 'CommitCommentEvent'
OR events.type = 'IssueCommentEvent'
OR events.type = 'IssueEvent'
OR events.type = 'PullRequestEvent'
OR events.type = 'PullRequestReviewCommentEvent'
OR events.type = 'PushEvent'
AND events.org.login IS NOT NULL
GROUP BY
1
ORDER BY
2 DESC
LIMIT
10
-- events_org_login events_actor_count
-- Microsoft 16419
-- facebook 15682
-- docker 14059
-- angular 12841
-- google 12140
-- atom 9698
-- FortAwesome 9617
-- elastic 7220
-- apache 6999
-- npm 6815
--- Repositories with the most Contributors
--- List of repositories that have the most unique users contributing via commits or opening or commenting on an issue or PR in the specified time frame
--- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
events.repo.name AS events_repo_name,
COUNT(DISTINCT events.actor.id) AS events_actor_count
FROM (
SELECT
*
FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31'))) AS events
WHERE
events.type = 'CommitCommentEvent'
OR events.type = 'PushEvent'
OR events.type = 'IssueCommentEvent'
OR events.type = 'PullRequestEvent'
OR events.type = 'PullRequestReviewCommentEvent'
OR events.type = 'IssuesEvent'
GROUP BY
1
ORDER BY
2 DESC
LIMIT
10
-- events_repo_name events_actor_count
-- FortAwesome/Font-Awesome 10654
-- docker/docker 8253
-- npm/npm 7041
-- jlord/patchwork 6806
-- facebook/react-native 6250
-- Microsoft/vscode 5855
-- atom/atom 5745
-- FreeCodeCamp/FreeCodeCamp 5622
-- angular/material 4355
-- angular/angular 4217
-- Repositories with the most Forks
--- List of repositories that have the most Forks in the specified time frame
--- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
events.repo.name AS events_repo_name,
COUNT(DISTINCT events.actor.id) AS events_actor_count
FROM (
SELECT
*
FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31'))) AS events
WHERE
events.type = 'ForkEvent'
GROUP BY
1
ORDER BY
2 DESC
LIMIT
10
-- events_repo_name events_actor_count
-- jtleek/datasharing 38020
-- octocat/Spoon-Knife 28738
-- rdpeng/ProgrammingAssignment2 27499
-- twbs/bootstrap 15669
-- tensorflow/tensorflow 14070
-- rdpeng/ExData_Plotting1 12458
-- udacity/frontend-nanodegree-resume 11553
-- LarryMad/recipes 10229
-- barryclark/jekyll-now 10070
-- angular/angular.js 9334
--- Repositories with the most code reviewers
--- List of repositories that have the most unique users reviewing code via PullRequestReviewCommentEvent in the specified time frame
--- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
events.repo.name AS events_repo_name,
COUNT(DISTINCT events.actor.id) AS events_actor_count
FROM (
SELECT
*
FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31'))) AS events
WHERE
events.type = 'PullRequestReviewCommentEvent'
AND events.org.login) IS NOT NULL
GROUP BY
1
ORDER BY
2 DESC
LIMIT
10
-- events_repo_name events_actor_count
-- Homebrew/homebrew 523
-- kubernetes/kubernetes 503
-- apache/spark 478
-- nodejs/node 358
-- rails/rails 341
-- NixOS/nixpkgs 331
-- docker/docker 330
-- rust-lang/rust 329
-- symfony/symfony 323
-- facebook/react-native 307
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment