Skip to content

Instantly share code, notes, and snippets.

@tunetheweb
Last active May 23, 2020 14:08
Show Gist options
  • Save tunetheweb/0b894e7e645e077431c67dc744431224 to your computer and use it in GitHub Desktop.
Save tunetheweb/0b894e7e645e077431c67dc744431224 to your computer and use it in GitHub Desktop.
Desktop pages that include link[rel=stylesheet][media=print][onload="*this.media*"]
#standardSQL
# Desktop pages that include link[rel=stylesheet][media=print][onload="*this.media*"]
# Warning this uses 250GB of BigQuery usage so can be expensive to run multiple times!
CREATE TEMP FUNCTION hasPrintCSSWithOnload(payload STRING)
RETURNS BOOLEAN LANGUAGE js AS '''
try {
var $ = JSON.parse(payload);
var almanac = JSON.parse($._almanac);
return !!almanac['link-nodes'].find(
e => e.rel.toLowerCase() == 'stylesheet' && e.media.toLowerCase() == 'print' && e.onload.toLowerCase().includes('this.media')
);
} catch (e) {
return false;
}
''';
SELECT
url
FROM
`httparchive.pages.2020_05_01_desktop`
WHERE
hasPrintCSSWithOnload(payload) = true