Skip to content

Instantly share code, notes, and snippets.

@lucemia
Last active October 14, 2015 23:28
Show Gist options
  • Save lucemia/4a2e7a854caa7d6ec81f to your computer and use it in GitHub Desktop.
Save lucemia/4a2e7a854caa7d6ec81f to your computer and use it in GitHub Desktop.
run mapreduce in bigquery
function mapper(row, emit) {
if(row.comment) {
keywords = row.comment.split(' ');
for(var i=0; i<keywords.length; i++) {
emit({keyword: keywords[i], count: 1});
}
}
}
function reducer(row, emit) {
var total = 0;
for(var i=0;i<row.count.length; i++) {
total += row.count[i];
}
emit({keyword: row.keyword, total: total});
}
bigquery.defineFunction(
'mapper', // Name of the function exported to SQL
['comment'], // Names of input columns
[{'name': 'keyword', 'type': 'string'}, // Output schema
{'name': 'count', 'type': 'integer'}],
mapper // Reference to JavaScript UDF
);
bigquery.defineFunction(
'reducer', // Name of the function exported to SQL
['keyword', 'count'], // Names of input columns
[{'name': 'keyword', 'type': 'string'}, // Output schema
{'name': 'total', 'type': 'integer'}],
reducer // Reference to JavaScript UDF
);
select keyword, total
from reducer(select keyword, nest(count) as count from
mapper(select comment from [publicdata:samples.wikipedia] limit 1000)
group by keyword
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment