Skip to content

Instantly share code, notes, and snippets.

@jvolkman
Last active February 2, 2023 07:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jvolkman/05658a8d6f67342ab49c55f707899038 to your computer and use it in GitHub Desktop.
Save jvolkman/05658a8d6f67342ab49c55f707899038 to your computer and use it in GitHub Desktop.
how many pypi packages use different sets of requirements per wheel?
#standardSQL
-- https://stackoverflow.com/a/63364851
-- max version number in the dataset is 217 chars (someone dumped many digits of pi into the version string >:( )
CREATE TEMP FUNCTION normalizedSemanticVersion(semanticVersion STRING)
AS ((
SELECT STRING_AGG(
IF(isDigit, REPEAT('0', 220 - LENGTH(chars)) || chars, chars), '' ORDER BY grp
) || '..zzzzzzzzzzzzzz'
FROM (
SELECT grp, isDigit, STRING_AGG(char, '' ORDER BY OFFSET) chars,
FROM (
SELECT OFFSET, char, isDigit,
COUNTIF(NOT isDigit) OVER(ORDER BY OFFSET) AS grp
FROM UNNEST(SPLIT(semanticVersion, '')) AS char WITH OFFSET,
UNNEST([char IN ('1','2','3','4','5','6','7','8','9','0')]) isDigit
)
GROUP BY grp, isDigit
)));
-- get the latest version for each distribution that ships wheels
with latest_versions as (
SELECT name, version
FROM `bigquery-public-data.pypi.distribution_metadata`
where packagetype = 'bdist_wheel'
QUALIFY ROW_NUMBER() OVER (PARTITION BY name ORDER BY normalizedSemanticVersion(version) DESC) = 1
),
wheels as (
select name, version, requires_dist
FROM `bigquery-public-data.pypi.distribution_metadata` join latest_versions using (name, version)
where packagetype = 'bdist_wheel'
),
-- order and dedupe the lines in requires_dist, placing them into a single string
deduped_requires as (
select name, version, array_to_string(array(SELECT distinct elem FROM UNNEST(split(array_to_string(requires_dist, '\n'), '\n')) AS elem ORDER BY elem), '\n') requires_dist
from wheels
),
-- select distinct name, version, requires_dist rows
distinct_requires as (
select name, version, requires_dist
from deduped_requires
group by name, version, requires_dist
)
-- select all name, version rows that have > 1 requires_dist set
select name, version, count(*) from distinct_requires group by name, version having count(*) > 1 order by name
@jvolkman
Copy link
Author

jvolkman commented Feb 2, 2023

Results on 2023/02/01

name	version	f0_
AIFactoryDS	0.1.13	3
AbsGarden	0.0.1	2
CaChannel	3.2.0	2
CellStar	2.0.2	2
DkamSDK	1.4.87	3
DracoPy	1.2.0	2
GPy	1.10.0	2
InsurAutoML	0.2.3	2
KoiLang	0.2.0b4	2
LbNightlyTools	3.0.48	2
LightSim2Grid	0.7.1	2
MonoTools	0.2.0	2
MorphIO	3.3.4	2
NREL-PySAM	4.0.0	2
Nesquicc	0.0.0	2
OpenOPC-DA	1.5.0	2
PySC2	4.0.0	2
PySPX	0.5.0	2
SMUTHI	2.0.2	2
STT-tflite	0.10.0a4	5
Scarlet-ltl	0.0.1	2
Seeed-grove.py	0.3	2
TerPy	0.0.1	2
ZITPyVAT	1.0.20	2
aacgmv2	2.6.2	2
actinia-core	4.2.0	2
adidnsdump	1.3.0	2
af-metrics	0.1.0	2
agile-toolkit	0.6.0	2
aim	3.16.0rc2	2
aim-with-auth	3.14.4	2
alertadengue	3.2.0	2
aliopts	0.0.2	2
ams-dott-runtime	1.1.0	2
anuga	3.0.1	2
anvil-stubs	0.0.1	2
apache-beam	2.44.0rc1	3
arcor2-logger	0.1.0	2
asammdf	7.2.0	3
asgi-types	0.1.0	2
bed-reader	0.2.29	2
bigdl-nano	2.3.0b20230201	3
bigml-sensenet	0.7.0	2
block2	0.5.1	2
blspy	1.0.16	2
bpan	1.0.0	2
brotlipy	0.7.0	2
buffalo	2.0.0	2
capnpy-agates	0.9.2rc1	2
cavint	1.0.4	2
cctbx-base	2020.8	2
chinillablspy	1.0.16	2
clientcentral	12.0.1	2
cloudViewer	0.3.7	2
cmsisdsp	1.9.5	2
cntk	2.7.post2	2
cntk-gpu	2.7.post2	2
coqui-stt-ctcdecoder	1.4.0a6	7
cpprb	10.6.4	4
czf-env	1.0	2
daisykit	0.2.0.1	2
datetimeparse	0.5.6	2
dbscan	0.0.12	3
deciphon-api	0.3.2	2
deepspeech	0.10.0a3	9
deepspeech-gpu	0.10.0a3	8
deepspeech-tflite	0.10.0a3	9
dipy	1.6.0	2
dott-ng-runtime	1.1.0	2
dpcpp-cpp-rt	2023.0.0	2
ds-ctcdecoder	0.10.0a3	9
dune-fem	2.8.0.dev20210308	2
dynaphopy	1.17.14	2
eo-kit	0.1.1	2
errbot	6.1.9	2
eulerangles	1.0.2	2
exafunction	0.16.0	2
ezephys	0.4.3	2
fast-tokenizer-python	1.0.1	2
fastdeploy-python	1.0.3	2
fastdup	0.204	3
faster-tokenizer	0.2.0	2
feyn	3.0.5	3
fixturepy	0.5	2
flask-oidc-pp	1.4.2	2
fluent-compiler	0.3	2
fortran-rt	2021.2.0	5
freesas	0.9.0	4
fretwork	0.5.0	3
ganessa	2.3.5	4
gco-wrapper	3.0.8	2
gcvspline	0.4	2
gensim	4.3.0	4
geoai-GDAL	3.2.3	2
gm	3.0.148	2
gmtrade	3.0.4	2
grpcio	1.52.0rc1	2
gumnut-server	1.0.0rc1	2
hat-qt	0.5.1	2
helics	3.4.0	2
hget	1.0.5	3
hibiapi	0.7.7	2
higra	0.6.5	3
hooqu	0.1.0	2
imdb-tv-scraper	0.1.0	2
imread	0.7.4	2
intel-fortran-rt	2023.0.0	3
ip-locator	0.0.1	2
jiminy-py	1.7.14	3
kudio	1.1.3	2
lib-V2	0.0.0	2
libasd	1.5.5	2
librapid	0.5.8	2
lightguide	0.2.1	2
lime-stability	0.1.1	2
lmdb	1.4.0	2
loop-tool-py	0.0.9	2
magicinvoke	2.4.6	2
mapbuffer	0.5.1	2
matxscript	1.8.1	2
medaka	1.7.2	2
mera	1.3.0	2
metas-unclib	2.6.2	2
mitm6	0.3.0	2
mkl-fft	1.3.1	5
mkl-random	1.2.2	5
mkl-umath	0.1.1	7
mkr-format	0.0.13	2
mozilla-voice-stt	0.9.0a7	8
mozilla-voice-stt-cuda	0.9.0a7	7
mozilla-voice-stt-tflite	0.9.0a7	8
ms-ivy	1.8.23	2
mvs-ctcdecoder	0.9.0a7	8
myclang	0.1.3	2
nagisa	0.2.8	2
nanosimpy	0.0.1	2
natera-wes	1.0.0	2
neuralfit	0.1.6	3
neuronetlib	0.0.1	2
nfstream	6.5.3	2
nmslib	2.1.1	2
nnabla	1.33.0	5
nrlmsise00	0.1.1	2
oarphpy	0.1.0	2
omegaml	0.15.3	2
oneflow	0.9.0	4
onelogin-aws-assume-role	1.10.1	2
onnxruntime-openvino	1.13.1	2
onnxruntime-powerpc64le	1.12.0	2
open-aea	1.28.0.post1	2
open3d	0.16.1	2
opencanary	0.7.1	2
opencl-rt	2021.2.0	2
opencovis-contrib-python	4.5.1.84	4
opencovis-contrib-python-headless	4.5.1.84	4
opencovis-python	4.5.1.84	4
opencovis-python-headless	4.5.1.84	4
opencv-contrib-python-asenyaev	4.5.2	4
opencv-contrib-python-headless-asenyaev	4.5.2	4
opencv-python-headless-asenyaev	4.5.2	4
openvino-python	2021.1	6
ort-nightly	1.11.0.dev20220320001	3
ort-nightly-directml	1.11.0.dev20220320001	2
oscar-test	0.1.0	2
ovito	3.8.0.dev126	2
p4p	4.1.5a1	12
paddle2onnx	1.0.5	2
paddleaudio	1.1.0	2
panda-robot	0.0.5	2
pandana	0.6.1	2
parquery	0.1.15	2
parqueryd	0.1.33	2
pau	0.0.17	3
pavlok	0.2.0	2
pdfo	1.2	2
pep-517-example	1	2
phoenixdb	1.2.1	2
pi-heif	0.9.3	2
pillow-heif	0.9.3	2
pod5-format	0.0.41	2
pointers.py	2.4.0	2
practicus	22.11.0	2
pyFAI	0.21.3	6
pyPSCF	0.1.3	2
pybcj	1.0.1	2
pybluez2	0.46	2
pydeform	0.5.1	2
pydex	0.0.9	3
pyhindsight	20211215	2
pyjls	0.4.3	2
pyjoulescope-driver	1.1.4	2
pylubridate	0.1.0	2
pymocky	1.2.0	2
pymongo	4.4.0b0	3
pyms-nist-search	0.5.0b3	2
pyodps	0.11.3b1	3
pypiprivate	0.5.0	2
pyresample	1.26.0.post0	2
pyscf	2.1.1	2
pyshtools	4.10.1	3
pystages	1.1.0	2
pytango	9.4.0rc2	2
python-iptp	0.1.0	2
python-snappy	0.6.1	2
python3-saml	1.15.0	2
qat-core	1.6.4	2
qtensor	0.1.2	2
rational-activations	0.2.1	2
raven-framework	2.2rc4	2
ray	2.2.0	3
rchitect	0.3.40	2
recency-frequency	0.0.1	2
risktools	0.2.8.1	2
rook	0.1.193	2
rqdatac	2.10.13.1	3
rsm-markup	0.2.4	2
runjob	2.10.2	2
s-gd2	1.8.1	4
sail-ml	0.0.2a0	2
scikit-ipp	1.2.0	2
scrapy-rss	0.3.1	3
secretflow-ray	2.2.0	2
seesaw2	0.10.3	2
seeta-dragon	0.3.0	2
sg2	2.3.1	4
shared-nearest-neighbors	0.1.0	2
sidekick-agent-python	0.1.2	2
silabs-mltk	0.14.0	4
silx	1.1.2	6
simplelayout-stc950502	1.0	2
sleap	1.2.9	2
snappy	3.0.3	2
spaudio	0.7.16	2
speechy	1.0.7	2
srautils	1.0.1	2
strucscan	0.post0.dev68	2
stt	1.4.0a6	7
stt-tflite	0.10.0a10	8
substreams-python	0.0.1	2
supercell	0.11.0	2
tagr	0.0.1	2
targeted	0.0.30	2
tcod	15.0.0	2
temporalio	1.0.0	2
temporalio-village	0.1b1	2
tensorflow	2.11.0rc2	2
tensorflow-cpu	2.11.0rc2	2
tensorflow-decision-forests	1.2.0	2
tensorflow-recommenders-addons	0.5.0	2
tensorflow-rocm-enhanced	2.4.3	2
test-project	0.1.0	2
tf-nightly	2.12.0.dev20230201	3
tf-nightly-cpu	2.12.0.dev20230201	3
tf1-tensorflow-object-detection-api	1.15.0	2
thundra-debugger	0.0.7	2
tidalapi	0.7.0rc1	2
tkextrafont	0.6.3	2
tksvg	0.7.4	2
tmap-viz	1.0.16	2
torch	1.13.1	2
torchaudio	0.13.1	2
torchtext	0.14.1	2
torchvision	0.14.1	2
truckle	0.1.2	2
turicreate	6.4.1	6
ufostroker	0.2.4	2
usa-csc-526-by-spc	1.0.0	2
visible-model-runner	0.0.2	2
vqc	0.0.1	3
xdis	6.0.5	2
xlwings	0.29.0	2
xpress	9.0.2	5
ylearn	0.2.0	2
zenutils	0.3.19	2
zhinst-core	22.8.36541	5

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment