Skip to content

Instantly share code, notes, and snippets.

@alex alex/pypi.md
Last active Jul 9, 2019

Embed
What would you like to do?

Most downloaded projects

SELECT
  file.project,
  COUNT(*) as total_downloads,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
GROUP BY
  file.project
ORDER BY
  total_downloads DESC
LIMIT 100

OpenSSL versions

SELECT
  details.system.name,
  REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) ") as openssl_version,
  COUNT(*) as download_count,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
WHERE
  details.openssl_version IS NOT NULL
GROUP BY
  details.system.name,
  openssl_version,
HAVING
  download_count >= 100
ORDER BY
  download_count DESC
LIMIT 100

Which python versions install cryptography

SELECT
  REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") as python_version,
  COUNT(*) as download_count,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
WHERE
  file.project = 'cryptography'
GROUP BY
  python_version,
ORDER BY
  download_count DESC
LIMIT 100

Which platforms are generating 2.6 downloads

SELECT
  details.distro.name,
  details.distro.version,
  details.system.release,
  COUNT(*) as download_count,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
WHERE
  REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") = "2.6"
GROUP BY
  details.distro.name,
  details.distro.version,
  details.system.release
ORDER BY
  download_count DESC
LIMIT 100

Which packages are downloaded by Python 2.6 most often (percent)

SELECT
  file.project,
  ROUND(100 * SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") = "2.6" THEN 1 ELSE 0 END) / COUNT(*), 1) AS percent_26,
  SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") = "2.6" THEN 1 ELSE 0 END) as total_26_downloads,
  COUNT(*) as total_downloads,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
WHERE
  details.python IS NOT NULL AND
  # Exclude things which are stdlib backports *for* Python 2.6
  file.project NOT IN ("argparse", "ordereddict")
GROUP BY
  file.project,
HAVING
  total_downloads > 5000
ORDER BY
  percent_26 DESC
LIMIT 250

Which packages are downloaded by Python 2.6 most often (absolute)

SELECT
  file.project,
  COUNT(*) as total_downloads,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
WHERE
  REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") == "2.6" AND
  # Exclude things which are stdlib backports *for* Python 2.6
  file.project NOT IN ("argparse", "ordereddict")
GROUP BY
  file.project,
ORDER BY
  total_downloads DESC
LIMIT 100

Most used installers

SELECT
  details.installer.name,
  details.installer.version,
  COUNT(*) as total_downloads
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
GROUP BY
  details.installer.name,
  details.installer.version
ORDER BY
  total_downloads DESC
LIMIT 100

Packages installed with OpenSSL 0.9.8 (excluding Windows and OS X)

SELECT
  file.project,
  ROUND(100 * SUM(CASE WHEN SUBSTR(REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) "), 0, 5) == "0.9.8" THEN 1 ELSE 0 END) / COUNT(*), 1) as percent_098,
  ROUND(100 * SUM(CASE WHEN SUBSTR(REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) "), 0, 5) == "1.0.0" THEN 1 ELSE 0 END) / COUNT(*), 1) as percent_100,
  ROUND(100 * SUM(CASE WHEN SUBSTR(REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) "), 0, 5) == "1.0.1" THEN 1 ELSE 0 END) / COUNT(*), 1) as percent_101,
  ROUND(100 * SUM(CASE WHEN SUBSTR(REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) "), 0, 5) == "1.0.2" THEN 1 ELSE 0 END) / COUNT(*), 1) as percent_102,
  COUNT(*) as total_downloads,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
WHERE
  details.openssl_version IS NOT NULL AND
  # Exclude Windows and OS X where Cryptography ships a static wheel
  details.system.name NOT IN ("Windows", "Darwin")
GROUP BY
  file.project,
HAVING
  total_downloads > 5000
ORDER BY
  percent_098 DESC
LIMIT 250
@kenneth-reitz

This comment has been minimized.

Copy link

commented Jun 11, 2016

an example for 2 vs 3 usage (month-over-month, perhaps) for a given library would be awesome.

@diimdeep

This comment has been minimized.

Copy link

commented Aug 6, 2016

this is awesome !

@linwoodc3

This comment has been minimized.

Copy link

commented Oct 30, 2016

Is there a way to distinguish mirror bots vs true user downloads? Just asking but I agree with poster above, THIS IS AWESOME!!

@browniebroke

This comment has been minimized.

Copy link

commented Jun 12, 2018

Total downloads by main Python version over time:

SELECT
  STRFTIME_UTC_USEC(timestamp, "%Y-%m") AS month,
  SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") = '2.7' THEN 1 ELSE 0 END) AS py2.7,
  SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") = '3.2' THEN 1 ELSE 0 END) AS py3.2,
  SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") = '3.3' THEN 1 ELSE 0 END) AS py3.3,
  SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") = '3.4' THEN 1 ELSE 0 END) AS py3.4,
  SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") = '3.5' THEN 1 ELSE 0 END) AS py3.5,
  SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") = '3.6' THEN 1 ELSE 0 END) AS py3.6,
  SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") = '3.7' THEN 1 ELSE 0 END) AS py3.7
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160101"),
    CURRENT_TIMESTAMP()
  )
GROUP BY
  month
ORDER BY
  month
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.