Skip to content

Instantly share code, notes, and snippets.

@frozenspider
Last active November 20, 2021 06:19
Show Gist options
  • Save frozenspider/677290625162db87ef0b8c27d2527aa8 to your computer and use it in GitHub Desktop.
Save frozenspider/677290625162db87ef0b8c27d2527aa8 to your computer and use it in GitHub Desktop.
text_search_configuration.sql, optimized
SET LOCAL yb_non_ddl_txn_for_sys_tables_allowed TO true;
DO $$
DECLARE
template_snowball_oid oid;
proc_oids oid array;
noop int;
BEGIN
IF NOT EXISTS (
SELECT FROM pg_catalog.pg_ts_template WHERE tmplname = 'snowball'
) THEN
-- pg_proc
--
-- Unfortunately we can't put this into the CTE below because then
-- yb_non_ddl_txn_for_sys_tables_allowed prevents us from resolving regproc reference in
-- pg_ts_template
WITH proc_oids_from_query (oid) AS (
INSERT INTO pg_catalog.pg_proc (
proname, pronamespace, proowner, prolang, procost, prorows, provariadic, protransform,
prokind, prosecdef, proleakproof, proisstrict, proretset, provolatile, proparallel, pronargs,
pronargdefaults, prorettype, proargtypes, proallargtypes, proargmodes, proargnames,
proargdefaults, protrftypes, prosrc, probin, proconfig, proacl
) VALUES
('dsnowball_init', 11, 10, 13, 1, 0, 0, '-', 'f', false, false, true, false,
'v', 'u', 1, 0, 2281, '2281', NULL, NULL, NULL,
NULL, NULL, 'dsnowball_init', '$libdir/dict_snowball', NULL, NULL),
('dsnowball_lexize', 11, 10, 13, 1, 0, 0, '-', 'f', false, false, true, false,
'v', 'u', 4, 0, 2281, '2281 2281 2281 2281', NULL, NULL, NULL,
NULL, NULL, 'dsnowball_lexize', '$libdir/dict_snowball', NULL, NULL)
RETURNING oid
)
SELECT array_agg(oid) FROM proc_oids_from_query INTO proc_oids;
-- pg_ts_template
INSERT INTO pg_catalog.pg_ts_template (
tmplname, tmplnamespace, tmplinit, tmpllexize
) VALUES
('snowball', 11, 'dsnowball_init', 'dsnowball_lexize')
RETURNING oid
INTO template_snowball_oid;
-- Use one huge CTE block to aggregate everything text search related.
-- This is used because yb_non_ddl_txn_for_sys_tables_allowed doesn't let us use temp tables.
WITH
-- Raw text search config values without auto-generated OIDs.
-- Note that Russian uses english_stem as ASCII dictionary (see
-- src/postgres/src/backend/snowball/Makefile for detials).
ts_raw_values (
row_number, cfgname, nconfigs, configs_with_own_dict, configs_with_english_dict
) AS (
VALUES
(1, 'danish', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[]),
(2, 'dutch', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[]),
(3, 'english', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[]),
(4, 'finnish', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[]),
(5, 'french', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[]),
(6, 'german', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[]),
(7, 'hungarian', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[]),
(8, 'italian', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[]),
(9, 'norwegian', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[]),
(10, 'portuguese', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[]),
(11, 'romanian', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[]),
(12, 'russian', 22, ARRAY[2, 10, 17], ARRAY[1, 11, 16]),
(13, 'spanish', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[]),
(14, 'swedish', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[]),
(15, 'turkish', 22, ARRAY[1, 2, 10, 11, 16, 17], ARRAY[]::int[])
),
-- pg_ts_config
ts_config (ts_oid, cfgname) AS (
INSERT INTO pg_catalog.pg_ts_config (
cfgname, cfgnamespace, cfgowner, cfgparser
) SELECT cfgname, 11, 10, 3722 FROM ts_raw_values
RETURNING oid, cfgname
),
-- pg_ts_dict
ts_dict_oids (ts_dict_oid) AS (
INSERT INTO pg_catalog.pg_ts_dict (
dictname, dictnamespace, dictowner, dicttemplate, dictinitoption
)
SELECT
cfgname || '_stem', 11, 10, template_snowball_oid,
'language = ''' || cfgname || ''', stopwords = ''' || cfgname || ''''
FROM ts_raw_values
RETURNING oid
),
-- Aggregate everything we know about text seach configs
ts_full (
ts_oid, cfgname, ts_dict_oid, nconfigs, configs_with_own_dict, configs_with_english_dict
) AS (
SELECT
ts_values_rn.ts_oid,
ts_values_rn.cfgname,
ts_dict_rn.ts_dict_oid,
ts_raw_values.nconfigs,
ts_raw_values.configs_with_own_dict,
ts_raw_values.configs_with_english_dict
FROM (
SELECT
ROW_NUMBER() OVER (ORDER BY ts_dict_oid) AS row_number,
ts_dict_oid
FROM ts_dict_oids
) ts_dict_rn
INNER JOIN (
SELECT
ROW_NUMBER() OVER (ORDER BY ts_oid) AS row_number,
ts_oid,
cfgname
FROM ts_config
) ts_values_rn ON ts_values_rn.row_number = ts_dict_rn.row_number
INNER JOIN ts_raw_values ON ts_raw_values.row_number = ts_dict_rn.row_number
),
ts_dict_english_oid AS (
SELECT ts_dict_oid FROM ts_full WHERE cfgname = 'english'
),
ts_configs (ts_oid, cfgname, idx, dict_oid_to_use) AS (
SELECT
ts_full.ts_oid,
ts_full.cfgname,
s.idx,
CASE
WHEN s.idx = ANY(ts_full.configs_with_own_dict) THEN ts_full.ts_dict_oid
WHEN s.idx = ANY(ts_full.configs_with_english_dict) THEN (TABLE ts_dict_english_oid)
ELSE 3765::oid -- "simple" dictionary
END
FROM ts_full
INNER JOIN (SELECT * FROM generate_series(1, 100)) s (idx) ON s.idx <= ts_full.nconfigs
ORDER BY ts_full.ts_oid, s.idx
),
-- pg_ts_config_map
_ignored_1 AS (
INSERT INTO pg_catalog.pg_ts_config_map (
mapcfg, maptokentype, mapseqno, mapdict
) SELECT ts_oid, idx, 1, dict_oid_to_use FROM ts_configs
),
-- pg_description
_ignored_2 AS (
INSERT INTO pg_catalog.pg_description (
objoid, classoid, objsubid, description
) SELECT objoid, classoid, 0, description FROM (
-- pg_ts_config
SELECT ts_oid, 3602, 'configuration for ' || cfgname || ' language' FROM ts_config
UNION ALL
-- pg_ts_dict
SELECT ts_dict_oid, 3600, 'snowball stemmer for ' || cfgname || ' language' FROM ts_full
UNION ALL
-- pg_ts_template
VALUES (template_snowball_oid, 3764, 'snowball stemmer')
-- pg_ts_template
) t (objoid, classoid, description)
),
-- pg_depend
_ignored_3 AS (
INSERT INTO pg_catalog.pg_depend (
classid, objid, objsubid, refclassid, refobjid, refobjsubid, deptype
) SELECT classid, objid, 0, refclassid, refobjid, 0, 'n' FROM (
-- pg_ts_dict --> pg_ts_template
SELECT 3600, ts_dict_oids.ts_dict_oid, 3764, template_snowball_oid FROM ts_dict_oids
UNION ALL
-- pg_ts_config --> pg_ts_dict
SELECT 3602, ts_full.ts_oid, 3600, ts_full.ts_dict_oid FROM ts_full
UNION ALL
-- pg_ts_config --> pg_ts_dict (Russian)
SELECT 3602, ts_full.ts_oid, 3600, (TABLE ts_dict_english_oid) FROM ts_full
WHERE ts_full.cfgname = 'russian'
UNION ALL
-- pg_ts_template --> pg_proc
SELECT 3764, template_snowball_oid, 1255, p.oid FROM unnest(proc_oids) p (oid)
) t (classid, objid, refclassid, refobjid)
)
SELECT 1 INTO noop; -- For consistency, everything was put into CTE expressions.
END IF;
END $$;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment