SELECTfile.project,
COUNT(*) as total_downloads,
FROM
TABLE_DATE_RANGE(
[the-psf:pypi.downloads],
TIMESTAMP("20160114"),
CURRENT_TIMESTAMP()
)
GROUP BYfile.projectORDER BY
total_downloads DESCLIMIT100
OpenSSL versions
SELECTdetails.system.name,
REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) ") as openssl_version,
COUNT(*) as download_count,
FROM
TABLE_DATE_RANGE(
[the-psf:pypi.downloads],
TIMESTAMP("20160114"),
CURRENT_TIMESTAMP()
)
WHEREdetails.openssl_versionIS NOT NULLGROUP BYdetails.system.name,
openssl_version,
HAVING
download_count >=100ORDER BY
download_count DESCLIMIT100
Which python versions install cryptography
SELECT
REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") as python_version,
COUNT(*) as download_count,
FROM
TABLE_DATE_RANGE(
[the-psf:pypi.downloads],
TIMESTAMP("20160114"),
CURRENT_TIMESTAMP()
)
WHEREfile.project='cryptography'GROUP BY
python_version,
ORDER BY
download_count DESCLIMIT100
Which platforms are generating 2.6 downloads
SELECTdetails.distro.name,
details.distro.version,
details.system.release,
COUNT(*) as download_count,
FROM
TABLE_DATE_RANGE(
[the-psf:pypi.downloads],
TIMESTAMP("20160114"),
CURRENT_TIMESTAMP()
)
WHERE
REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") ="2.6"GROUP BYdetails.distro.name,
details.distro.version,
details.system.release
ORDER BY
download_count DESCLIMIT100
Which packages are downloaded by Python 2.6 most often (percent)
SELECTfile.project,
ROUND(100*SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") ="2.6" THEN 1 ELSE 0 END) /COUNT(*), 1) AS percent_26,
SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") ="2.6" THEN 1 ELSE 0 END) as total_26_downloads,
COUNT(*) as total_downloads,
FROM
TABLE_DATE_RANGE(
[the-psf:pypi.downloads],
TIMESTAMP("20160114"),
CURRENT_TIMESTAMP()
)
WHEREdetails.pythonIS NOT NULLAND# Exclude things which are stdlib backports *for* Python 2.6file.project NOT IN ("argparse", "ordereddict")
GROUP BYfile.project,
HAVING
total_downloads >5000ORDER BY
percent_26 DESCLIMIT250
Which packages are downloaded by Python 2.6 most often (absolute)
SELECTfile.project,
COUNT(*) as total_downloads,
FROM
TABLE_DATE_RANGE(
[the-psf:pypi.downloads],
TIMESTAMP("20160114"),
CURRENT_TIMESTAMP()
)
WHERE
REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") =="2.6"AND# Exclude things which are stdlib backports *for* Python 2.6file.project NOT IN ("argparse", "ordereddict")
GROUP BYfile.project,
ORDER BY
total_downloads DESCLIMIT100
Most used installers
SELECTdetails.installer.name,
details.installer.version,
COUNT(*) as total_downloads
FROM
TABLE_DATE_RANGE(
[the-psf:pypi.downloads],
TIMESTAMP("20160114"),
CURRENT_TIMESTAMP()
)
GROUP BYdetails.installer.name,
details.installer.version
ORDER BY
total_downloads DESCLIMIT100
Packages installed with OpenSSL 0.9.8 (excluding Windows and OS X)
SELECTfile.project,
ROUND(100*SUM(CASE WHEN SUBSTR(REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) "), 0, 5) =="0.9.8" THEN 1 ELSE 0 END) /COUNT(*), 1) as percent_098,
ROUND(100*SUM(CASE WHEN SUBSTR(REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) "), 0, 5) =="1.0.0" THEN 1 ELSE 0 END) /COUNT(*), 1) as percent_100,
ROUND(100*SUM(CASE WHEN SUBSTR(REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) "), 0, 5) =="1.0.1" THEN 1 ELSE 0 END) /COUNT(*), 1) as percent_101,
ROUND(100*SUM(CASE WHEN SUBSTR(REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) "), 0, 5) =="1.0.2" THEN 1 ELSE 0 END) /COUNT(*), 1) as percent_102,
COUNT(*) as total_downloads,
FROM
TABLE_DATE_RANGE(
[the-psf:pypi.downloads],
TIMESTAMP("20160114"),
CURRENT_TIMESTAMP()
)
WHEREdetails.openssl_versionIS NOT NULLAND# Exclude Windows and OS X where Cryptography ships a static wheeldetails.system.name NOT IN ("Windows", "Darwin")
GROUP BYfile.project,
HAVING
total_downloads >5000ORDER BY
percent_098 DESCLIMIT250
SELECT
STRFTIME_UTC_USEC(timestamp, "%Y-%m") AS month,
SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") ='2.7' THEN 1 ELSE 0 END) ASpy2.7,
SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") ='3.2' THEN 1 ELSE 0 END) ASpy3.2,
SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") ='3.3' THEN 1 ELSE 0 END) ASpy3.3,
SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") ='3.4' THEN 1 ELSE 0 END) ASpy3.4,
SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") ='3.5' THEN 1 ELSE 0 END) ASpy3.5,
SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") ='3.6' THEN 1 ELSE 0 END) ASpy3.6,
SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") ='3.7' THEN 1 ELSE 0 END) ASpy3.7FROM
TABLE_DATE_RANGE(
[the-psf:pypi.downloads],
TIMESTAMP("20160101"),
CURRENT_TIMESTAMP()
)
GROUP BY
month
ORDER BY
month
an example for 2 vs 3 usage (month-over-month, perhaps) for a given library would be awesome.