Skip to content

Instantly share code, notes, and snippets.

@sgammon
Created April 5, 2024 00:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sgammon/de8e3a2e0b8e4ec95176936d39097cf6 to your computer and use it in GitHub Desktop.
Save sgammon/de8e3a2e0b8e4ec95176936d39097cf6 to your computer and use it in GitHub Desktop.
Maven Central snapshot from multiple sources
WITH
Snapshots AS (
SELECT TIMESTAMP_TRUNC(MAX(SnapshotAt), DAY) AS Latest
FROM `bigquery-public-data.deps_dev_v1.PackageVersionsLatest`
WHERE system = 'MAVEN'
LIMIT 1
),
ReadyArtifacts AS (
SELECT
LOWER(Name) AS Name,
Version,
SnapshotAt,
VersionInfo,
Licenses,
Hashes,
Advisories,
Registries,
SLSAProvenance,
FROM
`bigquery-public-data.deps_dev_v1.PackageVersionsLatest`
WHERE
TIMESTAMP_TRUNC(SnapshotAt, DAY) = (SELECT Latest FROM Snapshots)
AND
system = 'MAVEN'
AND
VersionInfo.IsRelease = true
AND
DependenciesProcessed = true
),
RelevantProjects AS (
SELECT DISTINCT
Name,
ProjectName,
ProjectType,
RelationProvenance,
RelationType
FROM
`bigquery-public-data.deps_dev_v1.PackageVersionToProjectLatest`
WHERE
ProjectName IS NOT NULL
AND
ProjectType = "GITHUB"
),
Dependencies AS (
SELECT
Name,
Version,
ARRAY_AGG(STRUCT(`To`.Name as Name, `To`.Version as Version, Requirement)) as Dependencies,
FROM
`bigquery-public-data.deps_dev_v1.DependencyGraphEdgesLatest`
WHERE
TIMESTAMP_TRUNC(SnapshotAt, DAY) = (SELECT Latest FROM Snapshots)
AND
system = 'MAVEN'
GROUP BY
SnapshotAt,
Name,
Version
),
Dependents AS (
SELECT
Name,
Version,
NDependents,
FROM
`java-modules-419217.javamodules_v1.deps_package_top_dependents_v1`
),
Commits AS (
SELECT
Name,
ProjectType,
ProjectName,
commitHash,
authorName,
authorEmail,
pushedAt
FROM
`java-modules-419217.javamodules_v1.javamodules_projects_latest_git_v1`
)
SELECT
ra.Name AS Name,
CONCAT("pkg:maven/", ra.Name, "@", ra.Version) AS Purl,
ra.Version AS Version,
ra.SnapshotAt AS SnapshotAt,
ra.VersionInfo AS VersionInfo,
ra.Hashes AS Hashes,
ra.Licenses AS Licenses,
ra.SLSAProvenance AS SLSAProvenance,
ARRAY_LENGTH(ra.Advisories) AS AdvisoryCount,
ra.Advisories AS Advisories,
ra.Registries AS Registries,
pvtpl.ProjectType,
pvtpl.ProjectName,
pvtpl.RelationProvenance,
pvtpl.RelationType,
commits.commitHash AS LatestCommitHash,
commits.authorName AS LatestCommitAuthorName,
commits.authorEmail AS LatestCommitAuthorEmail,
commits.pushedAt AS LatestCommitPushedAt,
dependents.NDependents AS DependentCount,
ARRAY_LENGTH(dependencies.Dependencies) AS DependencyCount,
dependencies.Dependencies,
FROM
ReadyArtifacts ra
LEFT JOIN
RelevantProjects AS pvtpl
ON
ra.Name = pvtpl.Name
LEFT JOIN
Dependencies AS dependencies
ON
ra.Name = dependencies.Name
AND
ra.Version = dependencies.Version
LEFT JOIN
Dependents AS dependents
ON
ra.Name = dependents.Name
AND
ra.Version = dependents.Version
LEFT JOIN
Commits AS commits
ON
ra.Name = commits.Name
ORDER BY
ra.SnapshotAt DESC,
ra.Name ASC
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment