Skip to content

Instantly share code, notes, and snippets.

@UrsaDK
Last active April 16, 2020 17:47
Show Gist options
  • Save UrsaDK/bdf771d89b39e1873e6c7c5c818dde7f to your computer and use it in GitHub Desktop.
Save UrsaDK/bdf771d89b39e1873e6c7c5c818dde7f to your computer and use it in GitHub Desktop.
Replace musicbrainz_unaccent with pgsql's unaccent extension
/**
* MusicBrainz (http://musicbrainz.org) comes with a custom unaccent pgsql extension (musicbrainz_unaccent).
* However, AWS PGSQL service does not include this extension and it can not be easilly added.
*
* The following code duplicates musicbrainz_unaccent functionality using the built in unaccent extension.
*/
CREATE EXTENSION unaccent;
CREATE OR REPLACE FUNCTION musicbrainz_unaccent(txt text) RETURNS text AS $$
BEGIN
RETURN unaccent(txt);
END;
$$ LANGUAGE 'plpgsql' IMMUTABLE;
CREATE TEXT SEARCH DICTIONARY musicbrainz_unaccentdict (
TEMPLATE = unaccent,
RULES='unaccent'
);
/**
* Tests for the musicbrainz_unaccent extencion.
* (The following code is not required to use the extension)
*/
SELECT musicbrainz_unaccent('Hôtel') = 'Hotel';
SELECT musicbrainz_unaccent('ľščťžýáí') = 'lsctzyai';
SELECT musicbrainz_unaccent('foo—bar‒baz') = 'foo—bar‒baz';
SELECT musicbrainz_unaccent('nonunicode') = 'nonunicode';
SELECT musicbrainz_unaccent('') = '';
SELECT musicbrainz_unaccent(null) IS NULL;
SELECT musicbrainz_unaccent(repeat('ä', 65536)) = repeat('a', 65536);
SELECT ts_lexize('musicbrainz_unaccentdict', 'ľščťžýáí foo—bar‒baz nonunicode') = E'{"lsctzyai foo—bar‒baz nonunicode"}’;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment