Skip to content

Instantly share code, notes, and snippets.

@Mijyuoon
Created November 26, 2019 22:20
Show Gist options
  • Save Mijyuoon/02e7647ad65d58b9fb6ba5f8e43f7a0c to your computer and use it in GitHub Desktop.
Save Mijyuoon/02e7647ad65d58b9fb6ba5f8e43f7a0c to your computer and use it in GitHub Desktop.
Markov chain in PostgreSQL
CREATE OR REPLACE FUNCTION
markov_learn(a_chain BIGINT, a_source TEXT) RETURNS VOID
AS $$
DECLARE
t_depth INTEGER;
t_index INTEGER;
t_words TEXT[];
BEGIN
t_depth := a_chain & 7;
t_words := array_remove(regexp_split_to_array(a_source, E'\\s+'), '');
IF coalesce(array_length(t_words, 1), 0) < t_depth THEN
RETURN;
END IF;
INSERT INTO markov_start VALUES (a_chain, t_words[1 : t_depth])
ON CONFLICT DO NOTHING;
FOR t_index IN
1 .. (array_length(t_words, 1) - t_depth + 1)
LOOP
INSERT INTO markov_cont VALUES (a_chain,
t_words[t_index + 0 : t_index + t_depth - 1], coalesce(t_words[t_index + t_depth], ''), 1)
ON CONFLICT (chain, prev, curr) DO UPDATE SET count = markov_cont.count + 1;
END LOOP;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION
markov_getword(a_chain BIGINT, a_prev TEXT[]) RETURNS TEXT
AS $$
DECLARE
t_depth INTEGER;
t_final INTEGER;
t_accum INTEGER := 0;
t_row markov_cont%ROWTYPE;
BEGIN
t_depth := a_chain & 7;
IF t_depth <> array_length(a_prev, 1) THEN
RETURN NULL;
END IF;
SELECT round(sum(count) * random()) INTO t_final FROM markov_cont WHERE chain = a_chain AND prev = a_prev;
FOR t_row IN
SELECT * FROM markov_cont WHERE chain = a_chain AND prev = a_prev
LOOP
t_accum := t_accum + t_row.count;
if t_final <= t_accum THEN
RETURN nullif(t_row.curr, '');
END IF;
END LOOP;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION
markov_getstart(a_chain BIGINT, a_words TEXT[]) RETURNS TEXT[]
AS $$
DECLARE
t_length INTEGER;
t_loops INTEGER := 2;
t_row1 markov_start%ROWTYPE;
t_row2 markov_cont%ROWTYPE;
BEGIN
a_words := array_remove(a_words, '');
t_length := coalesce(array_length(a_words, 1), 0);
IF t_length < (a_chain & 7) THEN
FOR t_loops IN 1 .. t_loops
LOOP
FOR t_row1 IN
SELECT * FROM markov_start WHERE chain = a_chain AND prev[1 : t_length] = a_words
LOOP
EXIT WHEN random() < 0.02;
END LOOP;
END LOOP;
IF t_row1 IS NOT NULL THEN
RETURN t_row1.prev;
END IF;
FOR t_loops IN 1 .. t_loops
LOOP
FOR t_row2 IN
SELECT * FROM markov_cont WHERE chain = a_chain
AND prev[1 : t_length] = a_words
LOOP
EXIT WHEN random() < 0.02;
END LOOP;
END LOOP;
IF t_row2 IS NOT NULL THEN
RETURN t_row2.prev;
END IF;
END IF;
RETURN a_words;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION
markov_generate(a_chain BIGINT, a_start TEXT) RETURNS TEXT
AS $$
DECLARE
t_depth INTEGER;
t_offset INTEGER;
t_word TEXT;
t_words TEXT[];
BEGIN
t_depth := a_chain & 7;
t_words := markov_getstart(a_chain, regexp_split_to_array(a_start, E'\\s+'));
t_offset := array_length(t_words, 1) - t_depth + 1;
LOOP
t_word := markov_getword(a_chain, t_words[t_offset + 0 : t_offset + t_depth - 1]);
EXIT WHEN t_word IS NULL;
t_words := array_append(t_words, t_word);
t_offset := t_offset + 1;
END LOOP;
RETURN array_to_string(t_words, ' ');
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION
markov_from_messages(a_chain BIGINT, a_depth INTEGER) RETURNS BIGINT
AS $$
DECLARE
t_chain BIGINT;
t_row messages%ROWTYPE;
BEGIN
t_chain = (a_chain & ~255) | (a_depth & 7);
FOR t_row IN
SELECT * FROM messages
LOOP
PERFORM markov_learn(t_chain, t_row.content);
END LOOP;
RETURN t_chain;
END;
$$ LANGUAGE plpgsql;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment