Skip to content

Instantly share code, notes, and snippets.

@sAbakumoff
Created August 16, 2016 13:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sAbakumoff/33aa5b8911d5437d3aa6e4cd19e4b8c4 to your computer and use it in GitHub Desktop.
Save sAbakumoff/33aa5b8911d5437d3aa6e4cd19e4b8c4 to your computer and use it in GitHub Desktop.
function extract_package_name(r, emit) {
var output = Object.assign(r);
try{
var package=JSON.parse(r.content);
emit(Object.assign({}, r, {name : package.name}));
}
catch(ex){
emit(Object.assign({}, r, {name : 'JSON_ERROR'}));
}
}
bigquery.defineFunction(
'extract_package_name', // Name of the function exported to SQL
['content', 'copies', 'url', 'id'], // Names of input columns
[{'name': 'copies', 'type': 'integer'},
{'name' : 'name', 'type' : 'string'},
{'name' : 'url', 'type' : 'string'},
{'name' : 'id', 'type' : 'string'}], // Output schema
extract_package_name // Reference to JavaScript UDF
);
SELECT
id,
copies,
name,
url
FROM
extract_package_name(
SELECT
contents.id as id,
CONCAT("https://github.com/", files.repo_name, "/blob/", REGEXP_EXTRACT(files.ref, r"refs/heads/(.*)$"), "/", files.path) AS url,
contents.content as content,
contents.copies as copies
FROM
[githubdataqueries:NpmStat.package_json_content] AS contents
INNER JOIN
[githubdataqueries:NpmStat.package_json_files] AS files
ON
files.id = contents.id
WHERE
contents.copies > 100 )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment