Skip to content

Instantly share code, notes, and snippets.

@greenido
Last active March 29, 2018 15:55
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save greenido/6068994 to your computer and use it in GitHub Desktop.
Save greenido/6068994 to your computer and use it in GitHub Desktop.
function runQueryThirdPartyUsage(startRow, startCol) {
if (startRow === undefined) startRow = 25;
if (startCol === undefined) startCol = 1;
var projectNumber = 'bq-httparchive-1';
var sheet = SpreadsheetApp.getActiveSheet();
var dates = ['2013_06','2013_05','2013_04','2013_03','2013_02','2013_01',
'2012_12','2012_11','2012_10','2012_09','2012_08','2012_07',
'2012_06','2012_05','2012_04','2012_03','2012_02','2012_01'];
var sql = 'SELECT date, third_party, num_requests FROM ';
for (var i=0; i<dates.length; i++) {
sql += '(SELECT "'+dates[i]+'" date, DOMAIN(req.url) third_party, COUNT(*) num_requests FROM [httparchive:runs.'+dates[i]+'_01_requests] as req JOIN ( \
SELECT DOMAIN(url) self, pageid \
FROM [httparchive:runs.'+dates[i]+'_01_pages] \
) as pages ON pages.pageid = req.pageid \
WHERE DOMAIN(req.url) != pages.self \
GROUP BY third_party ORDER BY num_requests desc LIMIT 10),'
}
var queryResults;
// Inserts a Query Job
try {
Logger.log("Our SQL: " + sql);
queryResults = BigQuery.Jobs.query(projectNumber, sql);
}
catch (err) {
Logger.log(err);
Browser.msgBox(err);
return;
}
// Check on status of the Query Job
while (queryResults.getJobComplete() == false) {
try {
queryResults = BigQuery.Jobs.getQueryResults(projectNumber, queryJob.getJobReference().getJobId());
}
catch (err) {
Logger.log(err);
Browser.msgBox(err);
return;
}
}
// Update the amount of results
var resultCount = queryResults.getTotalRows();
var resultSchema = queryResults.getSchema();
var resultValues = new Array(resultCount);
var tableRows = queryResults.getRows();
// Iterate through query results
for (var i = 0; i < tableRows.length; i++) {
var cols = tableRows[i].getF();
resultValues[i] = new Array(cols.length);
// For each column, add values to the result array
for (var j = 0; j < cols.length; j++) {
resultValues[i][j] = cols[j].getV();
}
}
// Update the Spreadsheet with data from the resultValues array, starting from cell A1
sheet.getRange(startRow, startCol, resultCount, tableRows[0].getF().length).setValues(resultValues);
Browser.msgBox("Done! All the answers should be on the sheet");
}
SELECT pages.pageid, url, cnt, libs, pages.rank rank FROM [httparchive:runs.2013_06_01_pages] as pages JOIN (
SELECT pageid, count(distinct(type)) cnt, GROUP_CONCAT(type) libs FROM (
SELECT REGEXP_EXTRACT(url, r'(dojo|angular|prototype|backbone|emberjs|sencha|scriptaculous).*\.js') type, pageid
FROM [httparchive:runs.2013_06_01_requests]
WHERE REGEXP_MATCH(url, r'dojo|angular|prototype|backbone|emberjs|sencha|scriptaculous.*\.js')
GROUP BY pageid, type
)
GROUP BY pageid
HAVING cnt >= 2
) as lib ON lib.pageid = pages.pageid
WHERE rank IS NOT NULL
ORDER BY rank asc
SELECT DOMAIN(req.url) third_party, COUNT(*) num_requests
FROM [httparchive:runs.2013_06_01_requests] as req JOIN (
SELECT DOMAIN(url) self, pageid
FROM [httparchive:runs.2013_06_01_pages]
) as pages ON pages.pageid = req.pageid
WHERE DOMAIN(req.url) != pages.self
GROUP BY third_party
ORDER BY num_requests desc LIMIT 10;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment