You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
SELECTCount(*) count
FROM
[fh-bigquery:github_extracts.contents_net_cs]
WHERE
REGEXP_MATCH(content, r'\sasync\s|\sawait\s')
Brace_Placement
SELECTSUM(CASE WHEN (REGEXP_MATCH(line, r'^\s*{\s*$')) THEN 1 ELSE 0 END) seperate_line,
SUM(CASE WHEN (REGEXP_MATCH(line, r'^\s*{\s*$')) THEN 0 ELSE 1 END) same_line,
## Catch object initializers, i.e. 'new MyClass { }', new { ... }, or 'new [] { 1, 3 }'SUM(CASE WHEN (REGEXP_MATCH(line, r'^.*\b(new\s?{|new\s+\S*\s*{).*$')) THEN 1 ELSE 0 END) same_line_initializer,
COUNT(*) total
FROM (
SELECT*FROM (
SELECT
SPLIT(content, '\n') line,
FROM
[fh-bigquery:github_extracts.contents_net_cs] )
WHERE
REGEXP_MATCH(line, r'.*{.*') ) ## only lines containing '{'
Class_Names
SELECT
class,
COUNT(class) count,
FROM (
SELECT
REGEXP_EXTRACT(line, r'(\bclass\s+[@_a-zA-Z][^ \t]*)') class,
FROM (
SELECT
SPLIT(content, '\n') line,
size,
binary,
sample_path
FROM
[fh-bigquery:github_extracts.contents_net_cs]
WHERE
binary = FALSE
AND size >100AND REGEXP_MATCH(sample_path, r"^wp-content.*") = FALSE
AND REGEXP_MATCH(sample_path, r".*AssemblyInfo.cs$") == FALSE )
WHERE
REGEXP_MATCH(line, r'(\bclass\s+[@_a-zA-Z][^ \t]*)')
AND REGEXP_MATCH(line, r'^\s*\/\/') == FALSE ## comment lineANDSUBSTRING(line, 0, 2) !='@"' ) ## Roslyn testsGROUP BY
class
ORDER BY
count DESC
CSharp-Most-Popular-Filenames
SELECT filename, COUNT(*) as count
FROM (
SELECT LAST(SPLIT(sample_path, '/')) as filename
FROM [fh-bigquery:github_extracts.contents_net_cs]
)
GROUP BY filename
HAVING count >1ORDER BY count DESCLIMIT1000
CSharp-Using-Statements
SELECTline,
COUNT(*) AS count,
FROM (
SELECT
id,
SPLIT(content, "\n") ASlineFROM
[fh-bigquery:github_extracts.contents_net_cs] )
WHERE
REGEXP_MATCH(line, r"^\w*using.*;\w*$")
GROUP BYlineORDER BY
count DESCLIMIT10000
FileLength-Quantiles
SELECT## See https://cloud.google.com/bigquery/docs/reference/legacy-sql#quantiles## 101, gives percentiles (including Min/Max)
QUANTILES(lines, 101) AS Quantiles
FROM (
SELECTCOUNT(SPLIT(content, '\n')) AS lines,
size,
binary
FROM
[fh-bigquery:github_extracts.contents_net_cs] )
WHERE## try and filter our any 'binary' files that sneak in
binary = false AND
(lines >1OR (lines =1 && size <250))
FileLength-Longest
SELECT*FROM (
SELECTCOUNT(SPLIT(content, '\n')) lines,
size,
binary,
CONCAT('https://github.com/', sample_repo_name, '/blob/master/', sample_path) url
FROM
[fh-bigquery:github_extracts.contents_net_cs] )
WHERE## try and filter our any 'binary' files that sneak in
FileLength-Longest
(lines >1OR (lines =1 && size <250))
ORDER BY
lines DESC, size DESCLIMIT1000
Files_Per_Repo_Most
SELECT*FROM (
SELECT
CONCAT("https://github.com/", sample_repo_name) repo,
COUNT(*) num_cs_files
FROM (
SELECT
binary,
sample_repo_name,
FROM
[fh-bigquery:github_extracts.contents_net_cs]
WHERE
binary = FALSE )
GROUP BY
repo )
ORDER BY
num_cs_files DESCLIMIT1000
Files_Per_Repo_Quantiles
SELECT## See https://cloud.google.com/bigquery/docs/reference/legacy-sql#quantiles## 101, gives percentiles (including Min/Max)
QUANTILES(num_cs_files, 101) AS Quantiles
FROM (
SELECT
sample_repo_name,
COUNT(*) num_cs_files
FROM (
SELECT
binary,
sample_repo_name,
FROM
[fh-bigquery:github_extracts.contents_net_cs]
WHERE
binary = FALSE )
GROUP BY
sample_repo_name )
Lambdas
SELECT## See https://cloud.google.com/bigquery/docs/reference/legacy-sql#quantiles## 101, gives percentiles (including Min/Max)
QUANTILES(lambdas / num_lines *100.0, 101) AS Quantiles
FROM (
SELECTSUM(REGEXP_MATCH(line, r'^.*=>.*\b')) WITHIN RECORD lambdas,
COUNT(line) WITHIN RECORD num_lines,
*FROM (
SELECT
SPLIT(content, '\n') line,
binary,
sample_repo_name,
sample_path
FROM
[fh-bigquery:github_extracts.contents_net_cs] )
WHERE
binary = FALSE)
WHERE
lambdas >0
Most-Popular-Execeptions
SELECT
exception,
COUNT(exception) count
FROM (
SELECT
sample_path,
sample_repo_name,
REGEXP_EXTRACT(line, r'(throw\snew\s\w+Exception)(?:\()') exception,
FROM (
SELECT
SPLIT(content, '\n') line,
sample_path,
sample_repo_name
FROM
[fh-bigquery:github_extracts.contents_net_cs]
HAVING
REGEXP_MATCH(line, r'throw\snew\s\w+Exception(?:\()') ) )
GROUP BY
exception
ORDER BY
count DESC
Most_Popular_C#_Repos
SELECT*FROM (
SELECT
CONCAT("https://github.com/", sample_repo_name) repo,
sample_stars stars,
COUNT(*) files ## as "C# files"FROM (
SELECT
binary,
sample_repo_name,
sample_stars,
FROM
[fh-bigquery:github_extracts.contents_net_cs]
WHERE
binary = FALSE )
GROUP BY
repo,
stars
HAVING
files >50 )
WHERE
stars >1000ORDER BY
stars DESCLIMIT1000
Regions
SELECTCOUNT(*) files_with_a_region
FROM
[fh-bigquery:github_extracts.contents_net_cs]
WHERE## new-line, zero or more spaces, '#region', word-boundary
REGEXP_MATCH(content, r'(?:\n|\r\n)\s*(#region)\b')
Tabs_v_Spaces
SELECTSUM(best='tab') tabs,
SUM(best='space') spaces,
COUNT(*) total
FROM (
SELECT
sample_path,
sample_repo_name,
COUNT(lineStart) WITHIN RECORD lineCount,
IF(SUM(lineStart='') >SUM(lineStart='\t'), 'space', 'tab') WITHIN RECORD best,
FROM (
SELECT
LEFT(SPLIT(content, '\n'), 1) lineStart,
sample_path,
sample_repo_name
FROM
[fh-bigquery:github_extracts.contents_net_cs]
HAVING
REGEXP_MATCH(lineStart, r'[ \t]') ) # only lines with a space or tab in themHAVING
lineCount >10 ) # at least 10 lines that start with space or tab sample_repo_name, sample_repo_name
var-Keyword-Usage
SELECTCount(*) count
FROM
[fh-bigquery:github_extracts.contents_net_cs]
WHERE
REGEXP_MATCH(content, r'[\t ]*var\s\w*\s*=\s*(\d|\d.\d)+.*;')
I'm not sure I undertand the right part of the
var
regex.I would expect a line like that to match:
But it doesn't (see https://regex101.com/r/EjbeMj/1)
Why not using something simpler at the right of the
=
? Something like:(I'm not sure we care about what's on the right of the
=
)Which in that case works (see https://regex101.com/r/EjbeMj/2)