Created
July 9, 2022 11:13
-
-
Save jspeed-meyers/d309dc84c561845f8e3d77a7fbdf8de3 to your computer and use it in GitHub Desktop.
Measure number of dependencies for each version of most depended upon packages using deps.dev data - SQL Query
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DECLARE LatestSnapshot TIMESTAMP; | |
SET LatestSnapshot = (SELECT MAX(Time) FROM `bigquery-public-data.deps_dev_v1.Snapshots`); | |
WITH | |
-- Releases includes every release of every package. | |
Releases AS ( | |
SELECT | |
System, | |
Name, | |
Version, | |
VersionInfo | |
FROM | |
`bigquery-public-data.deps_dev_v1.PackageVersions` | |
WHERE | |
SnapshotAt = LatestSnapshot | |
AND VersionInfo.IsRelease | |
AND System IS NOT NULL ), | |
-- OrderedReleases has every version of every package, sorted by the version (descending). | |
OrderedReleases AS ( | |
SELECT | |
System, | |
Name, | |
Version, | |
VersionInfo.Ordinal AS VersionOrdinal, | |
ROW_NUMBER() OVER (PARTITION BY Name ORDER BY VersionInfo.Ordinal DESC) AS RowNumber | |
FROM | |
Releases ), | |
HighestReleases AS ( | |
SELECT | |
System, | |
Name, | |
Version | |
FROM | |
OrderedReleases | |
WHERE | |
RowNumber = 1 ), | |
-- PackagesByPopularity has every package along with its popularity rank *within* each system, calculated as: | |
-- How many packages depend on this one, directly or indirectly, in their latest version? | |
PackagesByPopularity AS ( | |
SELECT | |
D.System, | |
D.Dependency.Name AS Name, | |
COUNT(*) AS NDependents, | |
ROW_NUMBER() OVER (PARTITION BY D.System ORDER BY COUNT(*) DESC) AS Rank | |
FROM | |
`bigquery-public-data.deps_dev_v1.Dependencies` AS D | |
JOIN | |
HighestReleases H | |
ON | |
D.System = H.System | |
AND D.Version = H.Version | |
AND D.Name = H.Name | |
WHERE | |
D.SnapshotAt = LatestSnapshot | |
GROUP BY | |
D.System, | |
D.Dependency.Name ), | |
-- TopPopularPackages contains the top 100 packages for each system. | |
TopPopularPackages AS ( | |
SELECT | |
P.System, | |
P.Name, | |
P.NDependents, | |
P.Rank | |
FROM | |
PackagesByPopularity P | |
WHERE | |
P.Rank <= 100 | |
AND P.System IS NOT NULL) | |
SELECT | |
O.System, | |
O.Name, | |
T.Rank, | |
O.VersionOrdinal, | |
O.Version, | |
COUNT(*) AS NTotalDependencies, | |
COUNT(CASE WHEN E.System IS NOT NULL THEN 1 END) AS NDirectDependencies | |
FROM | |
TopPopularPackages T | |
JOIN | |
OrderedReleases O | |
ON | |
O.System = T.System | |
AND O.Name = T.Name | |
LEFT JOIN | |
`bigquery-public-data.deps_dev_v1.Dependencies` D | |
ON | |
D.System = O.System | |
AND D.Name = O.Name | |
AND D.Version = O.Version | |
-- all conditions need to be in ON for LEFT JOIN to include packages with no dependencies | |
AND D.SnapshotAt = LatestSnapshot | |
LEFT JOIN | |
`bigquery-public-data.deps_dev_v1.DependencyGraphEdges` E | |
ON | |
-- Matching this packages | |
E.System = O.System | |
AND E.Name = O.Name | |
AND E.Version = O.Version | |
-- Matching this dependencny | |
AND E.To.Name = D.Dependency.Name | |
AND E.To.Version = D.Dependency.Version | |
-- Only direct dependencies | |
AND E.From.Name = O.Name | |
-- all conditions need to be in ON for LEFT JOIN to include packages with no dependencies | |
AND E.SnapshotAt = LatestSnapshot | |
GROUP BY | |
O.System, | |
O.Name, | |
O.Version, | |
O.VersionOrdinal, | |
T.Rank | |
ORDER BY | |
O.System ASC, | |
T.Rank ASC, | |
O.VersionOrdinal ASC; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment