Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Queries that power the open source section of the 2016 Octoverse report https://octoverse.github.com/2016/
-- Active issues
-- Count of total active issues in the specified time frame
-- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
COUNT(DISTINCT JSON_EXTRACT_SCALAR(events.payload, '$.issue.id')) AS events_issue_count
FROM (SELECT * FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31')))
AS events
-- 10,723,492 active issues
-- Active Orgs
-- Count of total orgs in which any event was triggered (https://developer.github.com/v3/activity/events/types/) in the specified time frame
-- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
COUNT(DISTINCT events.org.id) AS events_org_count
FROM (SELECT * FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31')))
AS events
-- 331,797 total active organizations
-- Active repositories
-- Count of total repositories in which any event was triggered (https://developer.github.com/v3/activity/events/types/) in the specified time frame
-- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
COUNT(DISTINCT events.repo.id) AS events_repo_count
FROM (SELECT * FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31')))
AS events
-- 19,410,242 total active repositories
-- Active users
-- Count of total users which triggered an event (https://developer.github.com/v3/activity/events/types/) in the specified time frame
-- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
COUNT(DISTINCT events.actor.id) AS events_actor_count
FROM (SELECT * FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31')))
AS events
-- 5,865,497 total active users
--- Total Language Count
--- Count of all recognized programming languages on github
--- Source: GitHub Public Data Set https://cloud.google.com/bigquery/public-data/github
SELECT
COUNT(DISTINCT language.name)
FROM
[bigquery-public-data:github_repos.languages]
-- 316 total languages
-- Most Popular Languages by Pull requests
-- Top programming languages by number of opened pull requests in the specified time frame
-- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
JSON_EXTRACT_SCALAR(events.payload, '$.pull_request.base.repo.language') AS events_repo_language,
COUNT(CASE WHEN (events.type = 'PullRequestEvent') THEN 1 ELSE NULL END) AS events_count_pull_request
FROM (
SELECT
*
FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31'))) AS events
WHERE
JSON_EXTRACT_SCALAR(events.payload, '$.action') = 'opened'
AND JSON_EXTRACT_SCALAR(events.payload, '$.pull_request.base.repo.language') IS NOT NULL
GROUP BY
1
ORDER BY
2 DESC
LIMIT
25
-- events_repo_language events_count_pull_request
-- JavaScript 1604219
-- Java 763783
-- Python 744045
-- Ruby 740610
-- HTML 546211
-- PHP 478153
-- C++ 330259
-- CSS 271782
-- C# 229985
-- C 202295
-- Go 188121
-- Shell 143071
-- Objective-C 75478
-- Scala 70216
-- Swift 62284
-- TypeScript 55587
-- CoffeeScript 46216
-- Rust 36874
-- PowerShell 32207
-- Perl 32134
-- Lua 25742
-- R 24909
-- Makefile 24803
-- DM 23531
-- TeX 21782
SELECT
JSON_EXTRACT_SCALAR(events.payload, '$.pull_request.base.repo.language') AS events_repo_language,
COUNT(CASE WHEN (events.type = 'PullRequestEvent') THEN 1 ELSE NULL END) AS events_count_pull_request
FROM (
SELECT
*
FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2014-09-01'),TIMESTAMP('2015-08-31'))) AS events
WHERE
JSON_EXTRACT_SCALAR(events.payload, '$.action') = 'opened'
AND JSON_EXTRACT_SCALAR(events.payload, '$.pull_request.base.repo.language') IS NOT NULL
GROUP BY
1
ORDER BY
2 DESC
LIMIT
25
-- Repository Language Events Count Pull Request
-- JavaScript 813911
-- Python 483174
-- Java 467928
-- Ruby 447256
-- PHP 335357
-- C++ 223951
-- CSS 199677
-- HTML 159518
-- C 137242
-- C# 122476
-- Go 97420
-- Shell 81393
-- Objective-C 55272
-- Scala 45729
-- CoffeeScript 39161
-- Perl 29687
-- Rust 29199
-- Lua 18466
-- Swift 17193
-- TypeScript 15876
-- R 15576
-- DM 15423
-- Clojure 15046
-- Haskell 14912
-- TeX 13797
-- Top licenses used on open source projects
-- List of the most commonly used licenses on GitHub open source projects
-- Source: GitHub Public Data Set https://cloud.google.com/bigquery/public-data/github
SELECT
license,
COUNT(*)
FROM
[bigquery-public-data:github_repos.licenses]
GROUP BY
1
ORDER BY
2 DESC
LIMIT
5
--- Organizations with the most Contributors
--- List of organizations that have the most unique users contributing via commits or opening or commenting on an issue or PR in the specified time frame
--- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
events.org.login AS events_org_login,
COUNT(DISTINCT events.actor.id) AS events_actor_count
FROM (
SELECT
*
FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31'))) AS events
WHERE
events.type = 'CommitCommentEvent'
OR events.type = 'IssueCommentEvent'
OR events.type = 'IssueEvent'
OR events.type = 'PullRequestEvent'
OR events.type = 'PullRequestReviewCommentEvent'
OR events.type = 'PushEvent'
AND events.org.login IS NOT NULL
GROUP BY
1
ORDER BY
2 DESC
LIMIT
10
-- events_org_login events_actor_count
-- Microsoft 16419
-- facebook 15682
-- docker 14059
-- angular 12841
-- google 12140
-- atom 9698
-- FortAwesome 9617
-- elastic 7220
-- apache 6999
-- npm 6815
--- Repositories with the most Contributors
--- List of repositories that have the most unique users contributing via commits or opening or commenting on an issue or PR in the specified time frame
--- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
events.repo.name AS events_repo_name,
COUNT(DISTINCT events.actor.id) AS events_actor_count
FROM (
SELECT
*
FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31'))) AS events
WHERE
events.type = 'CommitCommentEvent'
OR events.type = 'PushEvent'
OR events.type = 'IssueCommentEvent'
OR events.type = 'PullRequestEvent'
OR events.type = 'PullRequestReviewCommentEvent'
OR events.type = 'IssuesEvent'
GROUP BY
1
ORDER BY
2 DESC
LIMIT
10
-- events_repo_name events_actor_count
-- FortAwesome/Font-Awesome 10654
-- docker/docker 8253
-- npm/npm 7041
-- jlord/patchwork 6806
-- facebook/react-native 6250
-- Microsoft/vscode 5855
-- atom/atom 5745
-- FreeCodeCamp/FreeCodeCamp 5622
-- angular/material 4355
-- angular/angular 4217
-- Repositories with the most Forks
--- List of repositories that have the most Forks in the specified time frame
--- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
events.repo.name AS events_repo_name,
COUNT(DISTINCT events.actor.id) AS events_actor_count
FROM (
SELECT
*
FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31'))) AS events
WHERE
events.type = 'ForkEvent'
GROUP BY
1
ORDER BY
2 DESC
LIMIT
10
-- events_repo_name events_actor_count
-- jtleek/datasharing 38020
-- octocat/Spoon-Knife 28738
-- rdpeng/ProgrammingAssignment2 27499
-- twbs/bootstrap 15669
-- tensorflow/tensorflow 14070
-- rdpeng/ExData_Plotting1 12458
-- udacity/frontend-nanodegree-resume 11553
-- LarryMad/recipes 10229
-- barryclark/jekyll-now 10070
-- angular/angular.js 9334
--- Repositories with the most code reviewers
--- List of repositories that have the most unique users reviewing code via PullRequestReviewCommentEvent in the specified time frame
--- Source: githubarchive public data set via Google BigQuery http://githubarchive.org/
SELECT
events.repo.name AS events_repo_name,
COUNT(DISTINCT events.actor.id) AS events_actor_count
FROM (
SELECT
*
FROM TABLE_DATE_RANGE([githubarchive:day.],TIMESTAMP('2015-09-01'),TIMESTAMP('2016-08-31'))) AS events
WHERE
events.type = 'PullRequestReviewCommentEvent'
AND events.org.login) IS NOT NULL
GROUP BY
1
ORDER BY
2 DESC
LIMIT
10
-- events_repo_name events_actor_count
-- Homebrew/homebrew 523
-- kubernetes/kubernetes 503
-- apache/spark 478
-- nodejs/node 358
-- rails/rails 341
-- NixOS/nixpkgs 331
-- docker/docker 330
-- rust-lang/rust 329
-- symfony/symfony 323
-- facebook/react-native 307
@bordz11
Copy link

bordz11 commented Oct 24, 2016

9a5de882-78b8-46b2-b7c0-8b6dae3bc2d8

@mansheik
Copy link

mansheik commented Apr 26, 2022

that is fantastic knowledge about GitHub

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment