Skip to content

Instantly share code, notes, and snippets.

@eriwen
Last active June 24, 2017 03:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eriwen/397916232694cded92ed13c96a068c6c to your computer and use it in GitHub Desktop.
Save eriwen/397916232694cded92ed13c96a068c6c to your computer and use it in GitHub Desktop.
Gradle BigQueries for analyzing usage
-- Save to build-scripts table
SELECT
f.id id,
f.repo_name repo,
f.path path
FROM
[bigquery-public-data:github_repos.files] f
WHERE
f.path LIKE '%.gradle%'
-- save to imports table
SELECT
id,
content
FROM
[bigquery-public-data:github_repos.contents]
WHERE
content CONTAINS 'import org.gradle'
SELECT
class,
COUNT(*) count
FROM (
SELECT
REGEXP_REPLACE(line, r'import |;', '') AS class,
c.id
FROM (
SELECT
SPLIT(c.content, '\n') line,
c.id
FROM
[gradle.gradle_imports] c
JOIN
[bigquery-public-data:github_repos.files] f
ON
c.id = f.id
WHERE
f.path LIKE '%.java'
OR f.path LIKE '%.groovy'
OR f.path LIKE '%.gradle'
HAVING
LEFT(line, 17)='import org.gradle' )
GROUP BY
class,
c.id )
WHERE
NOT REGEXP_MATCH(class, r'org.gradle.(api|authentication|buildinit|caching|external|javadoc|ide|ivy|jvm|language|maven|nativeplatform|platform|play|plugin.devel|plugin.repository|plugin.use|plugins|process|testfixtures|testing.jacoco|tooling|model|testkit|testing).')
OR class CONTAINS '.internal'
GROUP BY
1
ORDER BY
count DESC
LIMIT
1000;
-- save to languages table
SELECT
repos.repo,
langs.language.name AS language_name,
langs.language.bytes AS language_bytes
FROM
builds.repos repos
LEFT JOIN
[bigquery-public-data.github_repos.languages] langs
ON
repos.repo = langs.repo_name
WHERE
langs.language.name IS NOT NULL
SELECT
STRING(YEAR(created_date)) + '-' + RIGHT('00' + STRING(MONTH(created_date)), 2) + '-15' AS month,
COUNT(DISTINCT project_name) AS projects
FROM
[builds.repos_20170614]
WHERE
created_date > '2016-01-01 00:00'
AND created_date < '2017-06-01 00:00'
AND build_tool = 'Gradle'
AND repo IN (
SELECT
repo
FROM
[gradle.contents_20170614]
WHERE
path = 'build.gradle'
AND content CONTAINS 'com.android'
GROUP BY
repo)
GROUP BY
1
ORDER BY
1 ASC
-- save to repos table
SELECT
repo_name AS repo,
LAST(SPLIT(repo_name, '/')) AS project_name,
created_date,
'Gradle' AS build_tool
FROM (
SELECT
repo_name,
MIN(committer.date) AS created_date
FROM
FLATTEN([bigquery-public-data:github_repos.commits],repo_name)
WHERE
repo_name IN (
SELECT
repo_name
FROM
[bigquery-public-data:github_repos.files]
WHERE
path = 'build.gradle'
GROUP BY
repo_name)
GROUP BY
repo_name
HAVING
COUNT(committer.date) >= 5) AS t
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment