-
-
Save kez/17638bade0382f820280dafa46277435 to your computer and use it in GitHub Desktop.
CREATE EXTENSION IF NOT EXISTS "unaccent" | |
CREATE OR REPLACE FUNCTION slugify("value" TEXT) | |
RETURNS TEXT AS $$ | |
-- removes accents (diacritic signs) from a given string -- | |
WITH "unaccented" AS ( | |
SELECT unaccent("value") AS "value" | |
), | |
-- lowercases the string | |
"lowercase" AS ( | |
SELECT lower("value") AS "value" | |
FROM "unaccented" | |
), | |
-- remove single and double quotes | |
"removed_quotes" AS ( | |
SELECT regexp_replace("value", '[''"]+', '', 'gi') AS "value" | |
FROM "lowercase" | |
), | |
-- replaces anything that's not a letter, number, hyphen('-'), or underscore('_') with a hyphen('-') | |
"hyphenated" AS ( | |
SELECT regexp_replace("value", '[^a-z0-9\\-_]+', '-', 'gi') AS "value" | |
FROM "removed_quotes" | |
), | |
-- trims hyphens('-') if they exist on the head or tail of the string | |
"trimmed" AS ( | |
SELECT regexp_replace(regexp_replace("value", '\-+$', ''), '^\-', '') AS "value" | |
FROM "hyphenated" | |
) | |
SELECT "value" FROM "trimmed"; | |
$$ LANGUAGE SQL STRICT IMMUTABLE; |
CREATE FUNCTION public.set_slug_from_name() RETURNS trigger | |
LANGUAGE plpgsql | |
AS $$ | |
BEGIN | |
NEW.slug := slugify(NEW.name); | |
RETURN NEW; | |
END | |
$$; | |
CREATE TRIGGER "trg_slug_insert" | |
BEFORE INSERT ON "my_table_name" | |
FOR EACH ROW | |
WHEN (NEW.name IS NOT NULL AND NEW.slug IS NULL) | |
EXECUTE PROCEDURE set_slug_from_name(); |
I've rewritten this in PL/PGSQL, making it more readable to me. Also added some german specific umlaut replacement rules, and declared it as PARALLEL SAFE which allows for a substantially neater and faster query plan in my case.
CREATE EXTENSION IF NOT EXISTS "unaccent";
CREATE OR REPLACE FUNCTION slugify(t text) RETURNS text
AS $$
BEGIN
t := regexp_replace(t, '[Ää]', 'ae', 'g');
t := regexp_replace(t, '[Öö]', 'oe', 'g');
t := regexp_replace(t, '[Üü]', 'ue', 'g');
t := unaccent(t);
t := lower(t);
t := regexp_replace(t, '[''"]+', '', 'gi');
t := regexp_replace(t, '[^a-z0-9\\-_]+', '-', 'gi');
t := regexp_replace(t, '\-+$', '');
t := regexp_replace(t, '^\-', '');
RETURN t;
END;
$$ LANGUAGE plpgsql STRICT IMMUTABLE PARALLEL SAFE;
Thanks for sharing this modified version of @ianks function, the removal of the leading and trailing -
didn't work for me. So I replaced it with the trim
string function.
"trimmed" AS (
SELECT trim(both '-' from "value") AS "value"
FROM "hyphenated"
)
the following chars are all not being filtered out:
\ backslash
] right square bracket
^ caret
_ underscore
The issue seems to be a slash too many, \\-_
will match characters from range "\" to "_" (char code 92 to 95).
Removing one of the slashes made it work as intended, i.e. :
-- replaces anything that's not a letter, number, hyphen('-'), or underscore('_') with a hyphen('-')
"hyphenated" AS (
SELECT regexp_replace("value", '[^a-z0-9\-_]+', '-', 'gi') AS "value"
FROM "removed_quotes"
),
the following chars are all not being filtered out:
\ backslash ] right square bracket ^ caret _ underscore [...]
Thank you @NielsRenard! With your fix the slugify
function is now more robust for corner cases like "the title [something]".
@kez: can you please update the code in the gist please? This is a useful utility.
If you don't care that the slug changes when the name changes, you can also make it a generated column since
slugify(value text)
is immutable.