Skip to content

Instantly share code, notes, and snippets.

@Strum355
Last active February 28, 2023 13:07
Show Gist options
  • Save Strum355/339ffa07f2eeff085d9665ec49015227 to your computer and use it in GitHub Desktop.
Save Strum355/339ffa07f2eeff085d9665ec49015227 to your computer and use it in GitHub Desktop.
BEGIN;
CREATE OR REPLACE FUNCTION glob_to_regex(pat text) RETURNS text AS $$
DECLARE
res text[] := array[]::text[];
i int := 0;
n int := length(pat) + 1;
BEGIN
WHILE i < n LOOP
DECLARE
c text := substring(pat, i + 1, 1);
BEGIN
i := i + 1;
IF c = '*' THEN
-- compress consecutive `*` into one
IF (array_length(res, 1) IS NULL OR res[array_length(res, 1)] IS NOT NULL) THEN
res := array_append(res, NULL);
END IF;
ELSIF c = '?' THEN
res := array_append(res, '\.');
ELSIF c = '[' THEN
DECLARE
j int := i;
BEGIN
IF substring(pat, j + 1, 1) = '!' THEN
j := j + 1;
END IF;
IF substring(pat, j + 1, 1) = ']' THEN
j := j + 1;
END IF;
WHILE substring(pat, j + 1, 1) <> ']' LOOP
j := j + 1;
END LOOP;
IF j >= n THEN
res := array_append(res, '\[');
ELSE
DECLARE
stuff text := substring(pat, i, j - i + 1);
BEGIN
IF position('-' IN stuff) = 0 THEN
stuff := replace(stuff, '\\', '\\\\');
ELSE
DECLARE
chunks text[] := '{}';
k int := i + 2;
BEGIN
IF substring(pat, i + 1, 1) = '!' THEN
k := k + 1;
END IF;
WHILE TRUE LOOP
k := position('-' in substring(pat, k, j - k + 1));
IF k = 0 THEN
EXIT;
END IF;
chunks := array_append(chunks, substring(pat, i, k - i));
i := k + 1;
k := k + 3;
END LOOP;
DECLARE
chunk text := substring(pat, i, j - i + 1);
BEGIN
IF chunk <> '' THEN
chunks := array_append(chunks, chunk);
ELSE
chunks[array_length(chunks, 1)] := chunks[array_length(chunks, 1)] || '-';
END IF;
-- Remove empty ranges -- invalid in RE.
FOR k IN REVERSE 1 .. array_length(chunks, 1) - 1 LOOP
IF substring(chunks[k - 1], array_length(chunks[k - 1], 1), 1) > substring(chunks[k], 1, 1) THEN
chunks[k - 1] := substring(chunks[k - 1], 1, array_length(chunks[k - 1], 1) - 1) || substring(chunks[k], 2);
chunks := array_remove(chunks, k);
END IF;
END LOOP;
-- Escape backslashes and hyphens for set difference (--).
-- Hyphens that create ranges shouldn't be escaped.
stuff := array_to_string(chunks, '-', true);
END;
END;
END IF;
-- Escape set operations (&&, ~~ and ||).
stuff := replace(stuff, '([&~|])', '\\\\\1');
i := j + 1;
IF stuff = '' THEN
-- Empty range: never match.
res := array_append(res, '(?!)');
ELSIF stuff = '!' THEN
-- Negated empty range: match any character.
res := array_append(res, '\.');
ELSE
IF substring(stuff, 1, 1) = '!' THEN
stuff := '^' || substring(stuff, 2);
ELSIF substring(stuff, 1, 1) IN ('^', '[') THEN
stuff := '\\' || stuff;
END IF;
res := array_append(res, format('[%s]', stuff));
END IF;
END;
END IF;
END;
ELSE
res := array_append(res, replace(c, '\', '\\\\'));
END IF;
END;
END LOOP;
ASSERT i = n, format('"%s" %s != %s', pat, i, n);
-- Deal with STARs.
DECLARE
inp text[] := res;
res text[] := array[]::text[];
i int := 1;
n int := array_length(inp, 1)+1;
BEGIN
-- Fixed pieces at the start?
WHILE i < n AND inp[i] IS NOT NULL LOOP
res := array_append(res, inp[i]);
i := i + 1;
END LOOP;
-- Now deal with STAR fixed STAR fixed ...
-- For an interior `STAR fixed` pairing, we want to do a minimal
-- .*? match followed by `fixed`, with no possibility of backtracking.
-- Atomic groups ("(?>...)") allow us to spell that directly.
-- Note: people rely on the undocumented ability to join multiple
-- translate() results together via "|" to build large regexps matching
-- "one of many" shell patterns.
WHILE i < n LOOP
ASSERT inp[i] IS NULL, format('%s at %s IS NOT NULL', inp[i], i);
i := i + 1;
IF i = n THEN
res := array_append(res, '.*');
EXIT;
END IF;
ASSERT inp[i] IS NOT NULL, format('%s IS NULL', i);
DECLARE
fixed text[] := '{}';
fixed1 text := '';
BEGIN
WHILE i < n AND inp[i] IS NOT NULL LOOP
fixed := array_append(fixed, inp[i]);
i := i + 1;
END LOOP;
fixed1 := array_to_string(fixed, '');
IF i = n THEN
res := array_append(res, '.*');
res := array_append(res, fixed1);
ELSE
res := array_append(res, format('(.*?%s)', fixed1));
END IF;
END;
END LOOP;
ASSERT i = n, format('%s != %s', i, n);
RETURN format('^%s$', array_to_string(res, ''));
END;
END
$$ LANGUAGE plpgsql;
CREATE TEMPORARY TABLE IF NOT EXISTS package_repo_filters (
id SERIAL PRIMARY KEY NOT NULL,
behaviour TEXT NOT NULL,
scheme TEXT NOT NULL,
matcher JSONB NOT NULL,
internal_regex TEXT NOT NULL
) ON COMMIT DROP;
CREATE OR REPLACE FUNCTION func_package_repo_filters_globtoregex() RETURNS TRIGGER AS $$
BEGIN
IF NEW.matcher ? 'VersionGlob' THEN
NEW.internal_regex := glob_to_regex(NEW.matcher->>'VersionGlob');
ELSE
NEW.internal_regex := glob_to_regex(NEW.matcher->>'PackageGlob');
END IF;
RETURN NEW;
END $$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS trigger_package_repo_filters_globtoregex ON package_repo_filters;
CREATE TRIGGER trigger_package_repo_filters_globtoregex
BEFORE INSERT ON package_repo_filters
FOR EACH ROW
EXECUTE PROCEDURE func_package_repo_filters_globtoregex();
ALTER TABLE package_repo_filters
DROP CONSTRAINT IF EXISTS package_repo_filters_is_pkgrepo_scheme,
ADD CONSTRAINT package_repo_filters_is_pkgrepo_scheme CHECK (
scheme = ANY('{"semanticdb","npm","go","python","rust-analyzer","scip-ruby"}')
);
ALTER TABLE package_repo_filters
DROP CONSTRAINT IF EXISTS package_repo_filters_behaviour_is_allow_or_block,
ADD CONSTRAINT package_repo_filters_behaviour_is_allow_or_block CHECK (
behaviour = ANY('{"BLOCK","ALLOW"}')
);
ALTER TABLE package_repo_filters
DROP CONSTRAINT IF EXISTS package_repo_filters_emptystr_version_glob,
ADD CONSTRAINT package_repo_filters_emptystr_version_glob CHECK (
NOT(matcher ? 'VersionGlob') OR (matcher->>'VersionGlob' <> '')
);
ALTER TABLE package_repo_filters
DROP CONSTRAINT IF EXISTS package_repo_filters_oneof,
ADD CONSTRAINT package_repo_filters_oneof CHECK (
matcher ? 'VersionGlob' OR matcher ? 'PackageGlob'
);
CREATE OR REPLACE FUNCTION is_unversioned_package_allowed(package text, pkgscheme text) RETURNS boolean AS $$
DECLARE
blocked boolean;
allowed boolean;
BEGIN
blocked := (
SELECT COALESCE(bool_or(m.matches), FALSE)
FROM (
SELECT TRUE AS matches
FROM package_repo_filters f
WHERE f.behaviour = 'BLOCK'
AND f.scheme = pkgscheme
AND (
(
f.matcher ? 'PackageGlob'
AND package ~ f.internal_regex
) OR (
-- non-all-encompassing version globs don't apply to unversioned packages,
-- likely we're at too-early point in the syncing process to know, but also
-- we may still want the package to browse versions that _dont_ match this
f.matcher ? 'VersionGlob'
AND f.matcher->>'PackageName' = package
AND f.matcher->>'VersionGlob' = '*'
)
)
) as m
);
-- blacklist takes priority, can't poke holes out
IF blocked = TRUE THEN
RETURN FALSE;
END IF;
-- default allow if no allowlist and not blocked so far
allowed := (
SELECT COUNT(*) = 0
FROM package_repo_filters f
WHERE f.behaviour = 'ALLOW'
AND f.scheme = pkgscheme
);
allowed := allowed OR (
SELECT COALESCE(bool_or(m.matches), FALSE)
FROM (
SELECT TRUE AS matches
FROM package_repo_filters f
WHERE f.behaviour = 'ALLOW'
AND f.scheme = pkgscheme
AND (
(
f.matcher ? 'PackageGlob'
AND package ~ f.internal_regex
) OR (
f.matcher ? 'VersionGlob'
AND f.matcher->>'PackageName' = package
AND f.matcher->>'VersionGlob' = '*'
)
)
) AS m
);
RETURN allowed;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION is_versioned_package_allowed(package text, version text, pkgscheme text, dontcheckpkg bool) RETURNS boolean AS $$
DECLARE
blocked boolean;
allowed boolean;
BEGIN
blocked := (
SELECT COALESCE(bool_or(m.matches), FALSE)
FROM (
SELECT TRUE AS matches
FROM package_repo_filters f
WHERE f.behaviour = 'BLOCK'
AND f.scheme = pkgscheme
AND
(
(
-- for when we know that the input packages have already been checked against this
NOT(dontcheckpkg)
AND f.matcher ? 'PackageGlob'
AND package ~ f.internal_regex
) OR (
f.matcher ? 'VersionGlob'
AND f.matcher->>'PackageName' = package
AND version ~ f.internal_regex
)
)
) as m
);
-- blacklist takes priority, can't poke holes out
IF blocked = TRUE THEN
RETURN FALSE;
END IF;
-- default allow if no allowlist and not blocked so far
allowed := (
SELECT COUNT(*) = 0
FROM package_repo_filters f
WHERE behaviour = 'ALLOW'
AND f.scheme = pkgscheme
);
allowed := allowed OR (
SELECT COALESCE(bool_or(m.matches), FALSE)
FROM (
SELECT TRUE AS matches
FROM package_repo_filters f
WHERE f.behaviour = 'ALLOW'
AND f.scheme = pkgscheme
AND
(
(
NOT(dontcheckpkg)
AND f.matcher ? 'PackageGlob'
AND package ~ f.internal_regex
) OR (
f.matcher ? 'VersionGlob'
AND f.matcher->>'PackageName' = package
AND version ~ f.internal_regex
)
)
) AS m
);
RETURN allowed;
END
$$ LANGUAGE plpgsql;
INSERT INTO package_repo_filters (behaviour, scheme, matcher)
VALUES ('BLOCK', 'rust-analyzer', '{"PackageGlob": "*tokio*"}'::jsonb)
ON CONFLICT DO NOTHING;
SELECT * FROM package_repo_filters;
-- LIST
EXPLAIN (ANALYZE, BUFFERS, VERBOSE)
SELECT lr.id, lr.scheme, lr.name, array_agg(prv.id) as vid, array_agg(prv.version) as version
FROM (
SELECT l.id, l.scheme, l.name, is_unversioned_package_allowed(l.name, l.scheme) as allowed
FROM lsif_dependency_repos l
WHERE
l.scheme = 'rust-analyzer'
) lr
JOIN package_repo_versions prv ON lr.id = prv.package_id
WHERE
lr.allowed AND
is_versioned_package_allowed(lr.name, prv.version, lr.scheme, true)
GROUP BY lr.id, lr.scheme, lr.name -- group by
ORDER BY lr.id ASC -- order by
LIMIT 100; -- limit
-- COUNT
EXPLAIN (ANALYZE, BUFFERS, VERBOSE)
SELECT COUNT(DISTINCT(lr.id))
FROM (
SELECT l.id, l.scheme, l.name, is_unversioned_package_allowed(l.name, l.scheme) as allowed
FROM lsif_dependency_repos l
WHERE
l.scheme = 'rust-analyzer'
) lr
JOIN package_repo_versions prv ON lr.id = prv.package_id
WHERE
lr.allowed AND
is_versioned_package_allowed(lr.name, prv.version, lr.scheme, true)
ROLLBACK;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment