Created
March 24, 2020 19:19
-
-
Save pepetox/10e65713f4f6008e25f67a64b2622b92 to your computer and use it in GitHub Desktop.
Queries Webinar BigQuery
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT count(repo_name) as numberOfRepos, lang_data.name | |
FROM `bigquery-public-data.github_repos.languages`, UNNEST(language) AS lang_data | |
WHERE lang_data.name IN ("JavaScript","Java","Python","Ruby","PHP","C","Go") AND | |
repo_name IN ( | |
SELECT repo_name | |
FROM `bigquery-public-data.github_repos.sample_repos` | |
WHERE watch_count>10 | |
ORDER BY watch_count | |
) | |
GROUP BY lang_data.name | |
ORDER BY count(lang_data.bytes) DESC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT avg(lang_data.bytes)/count(repo_name) as size, lang_data.name | |
FROM `bigquery-public-data.github_repos.languages`, UNNEST(language) AS lang_data | |
WHERE lang_data.name IN ("JavaScript","Java","Python","Ruby","PHP","C","Go") | |
GROUP BY lang_data.name | |
ORDER BY size ASC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT COUNT(commit) as commits_count, Table_langs.lang_data.name, COUNT (DISTINCT Table_langs.repo_name) as repo_count, COUNT(commit) / COUNT (DISTINCT Table_langs.repo_name) as commits_pre_repo | |
FROM `bigquery-public-data.github_repos.commits` as Table_commits | |
INNER JOIN (SELECT repo_name, lang_data FROM `bigquery-public-data.github_repos.languages`, UNNEST( language ) AS lang_data) as Table_langs ON Table_commits.repo_name[ORDINAL(1)] = Table_langs.repo_name | |
WHERE lang_data.name IN ("JavaScript","Java","Python","Ruby","PHP","C","Go") | |
GROUP BY Table_langs.lang_data.name | |
ORDER BY commits_pre_repo ASC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT REGEXP_EXTRACT(path, r'\.([^\.]*)$') as extension, REGEXP_EXTRACT(content, r"\w+") as word, COUNT(REGEXP_EXTRACT(content, r"\w+")) as word_count | |
FROM ( | |
SELECT id, max(path) as path, max(repo_name) as repo_name | |
FROM `bigquery-public-data.github_repos.sample_files` | |
WHERE REGEXP_EXTRACT(path, r'\.([^\.]*)$') IN ('java','h','js','c','php','py','cpp','rb','cc','go') | |
GROUP BY id | |
) a | |
JOIN `bigquery-public-data.github_repos.sample_contents` b | |
ON a.id=b.id | |
WHERE REGEXP_EXTRACT(content, r"\w+") IN ('happy', 'perfect', 'easy') | |
GROUP BY extension, word | |
Order by word_count DESC |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment