Created
November 26, 2019 22:20
-
-
Save Mijyuoon/02e7647ad65d58b9fb6ba5f8e43f7a0c to your computer and use it in GitHub Desktop.
Markov chain in PostgreSQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE FUNCTION | |
markov_learn(a_chain BIGINT, a_source TEXT) RETURNS VOID | |
AS $$ | |
DECLARE | |
t_depth INTEGER; | |
t_index INTEGER; | |
t_words TEXT[]; | |
BEGIN | |
t_depth := a_chain & 7; | |
t_words := array_remove(regexp_split_to_array(a_source, E'\\s+'), ''); | |
IF coalesce(array_length(t_words, 1), 0) < t_depth THEN | |
RETURN; | |
END IF; | |
INSERT INTO markov_start VALUES (a_chain, t_words[1 : t_depth]) | |
ON CONFLICT DO NOTHING; | |
FOR t_index IN | |
1 .. (array_length(t_words, 1) - t_depth + 1) | |
LOOP | |
INSERT INTO markov_cont VALUES (a_chain, | |
t_words[t_index + 0 : t_index + t_depth - 1], coalesce(t_words[t_index + t_depth], ''), 1) | |
ON CONFLICT (chain, prev, curr) DO UPDATE SET count = markov_cont.count + 1; | |
END LOOP; | |
END; | |
$$ LANGUAGE plpgsql; | |
CREATE OR REPLACE FUNCTION | |
markov_getword(a_chain BIGINT, a_prev TEXT[]) RETURNS TEXT | |
AS $$ | |
DECLARE | |
t_depth INTEGER; | |
t_final INTEGER; | |
t_accum INTEGER := 0; | |
t_row markov_cont%ROWTYPE; | |
BEGIN | |
t_depth := a_chain & 7; | |
IF t_depth <> array_length(a_prev, 1) THEN | |
RETURN NULL; | |
END IF; | |
SELECT round(sum(count) * random()) INTO t_final FROM markov_cont WHERE chain = a_chain AND prev = a_prev; | |
FOR t_row IN | |
SELECT * FROM markov_cont WHERE chain = a_chain AND prev = a_prev | |
LOOP | |
t_accum := t_accum + t_row.count; | |
if t_final <= t_accum THEN | |
RETURN nullif(t_row.curr, ''); | |
END IF; | |
END LOOP; | |
RETURN NULL; | |
END; | |
$$ LANGUAGE plpgsql; | |
CREATE OR REPLACE FUNCTION | |
markov_getstart(a_chain BIGINT, a_words TEXT[]) RETURNS TEXT[] | |
AS $$ | |
DECLARE | |
t_length INTEGER; | |
t_loops INTEGER := 2; | |
t_row1 markov_start%ROWTYPE; | |
t_row2 markov_cont%ROWTYPE; | |
BEGIN | |
a_words := array_remove(a_words, ''); | |
t_length := coalesce(array_length(a_words, 1), 0); | |
IF t_length < (a_chain & 7) THEN | |
FOR t_loops IN 1 .. t_loops | |
LOOP | |
FOR t_row1 IN | |
SELECT * FROM markov_start WHERE chain = a_chain AND prev[1 : t_length] = a_words | |
LOOP | |
EXIT WHEN random() < 0.02; | |
END LOOP; | |
END LOOP; | |
IF t_row1 IS NOT NULL THEN | |
RETURN t_row1.prev; | |
END IF; | |
FOR t_loops IN 1 .. t_loops | |
LOOP | |
FOR t_row2 IN | |
SELECT * FROM markov_cont WHERE chain = a_chain | |
AND prev[1 : t_length] = a_words | |
LOOP | |
EXIT WHEN random() < 0.02; | |
END LOOP; | |
END LOOP; | |
IF t_row2 IS NOT NULL THEN | |
RETURN t_row2.prev; | |
END IF; | |
END IF; | |
RETURN a_words; | |
END; | |
$$ LANGUAGE plpgsql; | |
CREATE OR REPLACE FUNCTION | |
markov_generate(a_chain BIGINT, a_start TEXT) RETURNS TEXT | |
AS $$ | |
DECLARE | |
t_depth INTEGER; | |
t_offset INTEGER; | |
t_word TEXT; | |
t_words TEXT[]; | |
BEGIN | |
t_depth := a_chain & 7; | |
t_words := markov_getstart(a_chain, regexp_split_to_array(a_start, E'\\s+')); | |
t_offset := array_length(t_words, 1) - t_depth + 1; | |
LOOP | |
t_word := markov_getword(a_chain, t_words[t_offset + 0 : t_offset + t_depth - 1]); | |
EXIT WHEN t_word IS NULL; | |
t_words := array_append(t_words, t_word); | |
t_offset := t_offset + 1; | |
END LOOP; | |
RETURN array_to_string(t_words, ' '); | |
END; | |
$$ LANGUAGE plpgsql; | |
CREATE OR REPLACE FUNCTION | |
markov_from_messages(a_chain BIGINT, a_depth INTEGER) RETURNS BIGINT | |
AS $$ | |
DECLARE | |
t_chain BIGINT; | |
t_row messages%ROWTYPE; | |
BEGIN | |
t_chain = (a_chain & ~255) | (a_depth & 7); | |
FOR t_row IN | |
SELECT * FROM messages | |
LOOP | |
PERFORM markov_learn(t_chain, t_row.content); | |
END LOOP; | |
RETURN t_chain; | |
END; | |
$$ LANGUAGE plpgsql; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment