Skip to content

Instantly share code, notes, and snippets.

@yunruse
Last active January 29, 2023 12:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yunruse/326481cd75800c3824f9b63206d350a3 to your computer and use it in GitHub Desktop.
Save yunruse/326481cd75800c3824f9b63206d350a3 to your computer and use it in GitHub Desktop.
Python 3.x minor version usage stats, by proxy of PyPI downloads
'''
Python 3.x minor version usage stats, by proxy of PyPI downloads.
Placed into public domain by Mia yun Ruse.
(I wouldn't recommend using it though. It's a tad hacky.)
'''
from datetime import date as Date, timedelta
from requests import get
from json import loads
URL = "https://pypistats.org/api/packages/{package}/python_minor"
VersionData = dict[str, int]
def get_version_stats(package):
data: dict[Date, VersionData] = dict()
entries: list[dict[str, str]] = loads(
get(URL.format(package=package)).text)['data']
for e in entries:
date = Date.fromisoformat(e['date'])
data.setdefault(date, dict())
N = e['downloads']
v = e['category']
data[date][v] = N
return data
def average_version_states(data: dict[Date, VersionData], start: Date = None, end: Date = None):
version_data: VersionData = dict()
for date, entries in data.items():
if not (start is None or start <= date) and (end is None or date <= end):
continue
for v, N in entries.items():
version_data.setdefault(v, 0)
version_data[v] += N
return version_data
package = 'urllib3'
end_date = Date.today()
N_days = 30
data = get_version_stats(package)
data = average_version_states(
data, end_date - timedelta(days=N_days), end_date)
for k in list(data):
if not k.startswith('3.'):
del data[k]
labels = sorted(data.keys(), reverse=True,
key=lambda x: tuple(map(int, x.split('.'))))
usage = [data[i] for i in labels]
usage_cumulative = [sum(usage[:i+1]) for i in range(len(usage))]
N = sum(usage)
print('Version Downloads Cumulative (%)')
for i, u, U in zip(labels, usage, usage_cumulative):
print(f'{i:<5} {u:10} {U:>12} {U/N*100:>6.2f}%')
from matplotlib import pyplot # noqa
from matplotlib.ticker import PercentFormatter # noqa
pyplot.title('Python version usage (avg of last {} days, as of {})'.format(
N_days, end_date))
pyplot.xlabel('Python version')
pyplot.xlim(0, len(labels)-1)
usage_display = [u/N for u in usage]
pyplot.ylabel('Downloads for PyPI package {!r}'.format(package))
pyplot.locator_params(axis='y', nbins=11)
pyplot.gca().yaxis.set_major_formatter(PercentFormatter(1))
pyplot.grid(linewidth=0.2)
pyplot.hist(labels, histtype='step', weights=usage_display, cumulative=True)
pyplot.savefig('usage.png')
@yunruse
Copy link
Author

yunruse commented Jan 29, 2023

Which version of Python should you support to ensure you are supporting the most users?

Version   Downloads    Cumulative (%)
3.12          17766        17766    0.01%
3.11        7112527      7130293    2.66%
3.10       18557639     25687932    9.57%
3.9        70130541     95818473   35.69%
3.8        53781913    149600386   55.73%
3.7        82474033    232074419   86.45%
3.6        34914607    266989026   99.45%
3.5         1116711    268105737   99.87%
3.4          347573    268453310  100.00%
3.3             303    268453613  100.00%
3.2              12    268453625  100.00%

The same stats in graph form

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment