Skip to content

Instantly share code, notes, and snippets.

@fike
Created December 15, 2014 20:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fike/4d1550650f5e50f81161 to your computer and use it in GitHub Desktop.
Save fike/4d1550650f5e50f81161 to your computer and use it in GitHub Desktop.
HTTP archive (Using Bigqueries) queries to know "What are TOP CDNs"
// These queries were used this HTTP archive (Bigqueries)
// http://bigqueri.es/t/whats-the-percent-of-http-request-that-are-outside-of-the-cdns/477
// HTTP requests with and without CDN
SELECT cdn, num, ROUND(ratio*100) as percent FROM (
SELECT cdn, COUNT(cdn) as num, RATIO_TO_REPORT(num) OVER() ratio FROM (
SELECT CASE
WHEN _cdn_provider IN ('')
THEN 'None'
ELSE 'CDN'
END as cdn
FROM httparchive:runs.2014_11_01_requests,
httparchive:runs.2014_11_15_requests,
httparchive:runs.2014_11_01_requests_mobile,
httparchive:runs.2014_11_15_requests_mobile
) GROUP BY cdn
) ORDER BY percent DESC
// TOP 10
SELECT provider, round(100*ratio) as percent, num
FROM (SELECT REGEXP_REPLACE(_cdn_provider,r'^$', 'None') as provider, count(*) as num, RATIO_TO_REPORT(num) OVER() ratio
FROM httparchive:runs.2014_11_01_requests,
httparchive:runs.2014_11_15_requests,
httparchive:runs.2014_11_01_requests_mobile,
httparchive:runs.2014_11_15_requests_mobile
WHERE _cdn_provider != ''
GROUP BY provider
)
ORDER BY num desc LIMIT 10
// TOP 20 hostnames appears with Google CDN
SELECT req_host, count(req_host) as num
FROM httparchive:runs.2014_11_01_requests,
httparchive:runs.2014_11_15_requests,
httparchive:runs.2014_11_01_requests_mobile,
httparchive:runs.2014_11_15_requests_mobile
WHERE _cdn_provider = 'Google'
GROUP BY req_host
ORDER BY num DESC
LIMIT 20
// CDN Ranking discard HTTP requests for Google CDN and without CDN
SELECT provider, round(100*ratio) as percent, num
FROM (SELECT REGEXP_REPLACE(_cdn_provider,r'^$', 'None') as provider, count(*) as num, RATIO_TO_REPORT(num) OVER() ratio
FROM httparchive:runs.2014_11_01_requests,
httparchive:runs.2014_11_15_requests,
httparchive:runs.2014_11_01_requests_mobile,
httparchive:runs.2014_11_15_requests_mobile
WHERE _cdn_provider != 'Google' AND _cdn_provider != ''
GROUP BY provider
)
ORDER BY num desc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment