Last active
March 22, 2017 02:05
-
-
Save mumbleskates/7bcef4015d5b1422e88b to your computer and use it in GitHub Desktop.
Pure-SQL markov heaps (SQLite)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* powers of 2 utility view for heap traversal */ | |
CREATE VIEW IF NOT EXISTS power2 AS | |
WITH RECURSIVE doubling(n) AS ( | |
VALUES (2) | |
UNION ALL | |
SELECT n*2 FROM doubling LIMIT 62 | |
) | |
SELECT n twos FROM doubling; | |
/* generates random floating point values in interval [0, 1) */ | |
CREATE VIEW IF NOT EXISTS rand_real AS | |
SELECT 0.5 - RANDOM() / CAST(-9223372036854775808 AS REAL) / 2 rnd; | |
/* token to token-id mapping */ | |
CREATE TABLE IF NOT EXISTS tokens ( | |
id INTEGER PRIMARY KEY, | |
token TEXT UNIQUE | |
); | |
/* markov heap data */ | |
CREATE TABLE IF NOT EXISTS triples ( | |
a INTEGER NOT NULL REFERENCES tokens(id), | |
b INTEGER NOT NULL REFERENCES tokens(id), | |
c INTEGER NOT NULL REFERENCES tokens(id), | |
weight INTEGER NOT NULL, | |
idx INTEGER PRIMARY KEY, | |
total INTEGER NOT NULL, | |
forward_idx INTEGER NOT NULL, | |
forward_total INTEGER NOT NULL, | |
reverse_idx INTEGER NOT NULL, | |
reverse_total INTEGER NOT NULL, | |
center_idx INTEGER NOT NULL, | |
center_total INTEGER NOT NULL | |
); | |
CREATE UNIQUE INDEX IF NOT EXISTS ix_triples__unique ON triples(a, b, c); | |
CREATE UNIQUE INDEX IF NOT EXISTS ix_triples__forward_idx ON triples(a, b, forward_idx); | |
CREATE UNIQUE INDEX IF NOT EXISTS ix_triples__reverse_idx ON triples(b, c, reverse_idx); | |
CREATE UNIQUE INDEX IF NOT EXISTS ix_triples__center_idx ON triples(b, center_idx); | |
/* allows viewing and modifying (through insert) of triples weights */ | |
CREATE VIEW IF NOT EXISTS triples_simple AS SELECT a, b, c, weight FROM triples; | |
/* inserts triples weights as deltas while maintaining heap integrity */ | |
CREATE TRIGGER IF NOT EXISTS tg_triples__populate INSTEAD OF INSERT ON triples_simple | |
FOR EACH ROW BEGIN | |
/* ensure the triple is created */ | |
INSERT OR IGNORE INTO triples ( | |
a, b, c, weight, | |
idx, total, forward_idx, forward_total, | |
reverse_idx, reverse_total, center_idx, center_total | |
) | |
VALUES ( | |
new.a, new.b, new.c, | |
0, | |
COALESCE((SELECT MAX(t.idx) + 1 FROM triples t), 1), | |
0, | |
COALESCE((SELECT MAX(t.forward_idx) + 1 FROM triples t | |
WHERE t.a = new.a AND t.b = new.b), 1), | |
0, | |
COALESCE((SELECT MAX(t.reverse_idx) + 1 FROM triples t | |
WHERE t.b = new.b AND t.c = new.c), 1), | |
0, | |
COALESCE((SELECT MAX(t.center_idx) + 1 FROM triples t | |
WHERE t.b = new.b), 1), | |
0 | |
); | |
/* update totals in own row */ | |
UPDATE triples | |
SET | |
weight = weight + new.weight, | |
total = total + new.weight, | |
forward_total = forward_total + new.weight, | |
reverse_total = reverse_total + new.weight, | |
center_total = center_total + new.weight | |
WHERE a = new.a AND b = new.b AND c = new.c; | |
/* update totals of ancestors in main heap */ | |
UPDATE triples | |
SET total = total + new.weight | |
WHERE idx IN (SELECT t.idx / twos FROM triples t, power2 | |
WHERE t.a = new.a AND t.b = new.b AND t.c = new.c AND t.idx >= twos); | |
/* update totals of ancestors in forward heap */ | |
UPDATE triples | |
SET forward_total = forward_total + new.weight | |
WHERE a = new.a AND b = new.b | |
AND forward_idx IN (SELECT t.forward_idx / twos FROM triples t, power2 | |
WHERE t.a = new.a AND t.b = new.b AND t.c = new.c AND t.forward_idx >= twos); | |
/* update totals of ancestors in reverse heap */ | |
UPDATE triples | |
SET reverse_total = reverse_total + new.weight | |
WHERE b = new.b AND c = new.c | |
AND reverse_idx IN (SELECT t.reverse_idx / twos FROM triples t, power2 | |
WHERE t.a = new.a AND t.b = new.b AND t.c = new.c AND t.reverse_idx >= twos); | |
/* update totals of ancestors in center heap */ | |
UPDATE triples | |
SET center_total = center_total + new.weight | |
WHERE b = new.b | |
AND center_idx IN (SELECT t.center_idx / twos FROM triples t, power2 | |
WHERE t.a = new.a AND t.b = new.b AND t.c = new.c AND t.center_idx >= twos); | |
END; | |
/* allows viewing and adding triples weights with raw tokens instead of ids. | |
* weight is optional and defaults to 1. */ | |
CREATE VIEW IF NOT EXISTS triples_tokens AS | |
SELECT ta.token a, tb.token b, tc.token c, t.weight | |
FROM | |
triples t | |
INNER JOIN tokens ta ON t.a = ta.id | |
INNER JOIN tokens tb ON t.b = tb.id | |
INNER JOIN tokens tc ON t.c = tc.id; | |
CREATE TRIGGER IF NOT EXISTS tg_triples_tokens__populate INSTEAD OF INSERT ON triples_tokens | |
FOR EACH ROW BEGIN | |
/* ensure tokens are created */ | |
INSERT OR IGNORE INTO tokens(token) VALUES (new.a), (new.b), (new.c); | |
/* hand off to triples_simple */ | |
INSERT INTO triples_simple(a, b, c, weight) | |
VALUES ( | |
(SELECT id FROM tokens WHERE token = new.a), | |
(SELECT id FROM tokens WHERE token = new.b), | |
(SELECT id FROM tokens WHERE token = new.c), | |
COALESCE(new.weight, 1) | |
); | |
END; | |
/* chooses a random token triple */ | |
CREATE VIEW IF NOT EXISTS choose_triple AS | |
WITH RECURSIVE traverse(i, gas) AS ( | |
SELECT | |
1, | |
CAST(rnd * (SELECT total FROM triples WHERE idx = 1) AS INTEGER) | |
FROM rand_real | |
UNION ALL | |
SELECT | |
CASE WHEN gas >= cur.weight + COALESCE(child1.total, 0) | |
THEN i*2 + 1 | |
ELSE i*2 | |
END, | |
CASE WHEN gas >= cur.weight + COALESCE(child1.total, 0) | |
THEN gas - cur.weight - COALESCE(child1.total, 0) | |
ELSE gas - cur.weight | |
END | |
FROM | |
traverse | |
LEFT JOIN triples cur ON cur.idx = i | |
LEFT JOIN triples child1 ON child1.idx = i*2 | |
WHERE gas >= cur.weight | |
) | |
SELECT t.a, t.b, t.c | |
FROM triples t | |
WHERE t.idx = (SELECT MAX(i) FROM traverse); | |
CREATE VIEW IF NOT EXISTS choose_forward_triple AS | |
WITH RECURSIVE traverse(a, b, i, gas) AS ( | |
SELECT | |
triples.a, triples.b, | |
1, | |
CAST(rnd * (SELECT total FROM triples t | |
WHERE t.forward_idx = 1 AND t.a = a AND t.b = b) AS INTEGER) | |
FROM rand_real, triples | |
WHERE triples.forward_idx = 1 | |
UNION ALL | |
SELECT | |
traverse.a, traverse.b, | |
CASE WHEN gas >= cur.weight + COALESCE(child1.total, 0) | |
THEN i*2 + 1 | |
ELSE i*2 | |
END, | |
CASE WHEN gas >= cur.weight + COALESCE(child1.total, 0) | |
THEN gas - cur.weight - COALESCE(child1.total, 0) | |
ELSE gas - cur.weight | |
END | |
FROM | |
traverse | |
LEFT JOIN triples cur ON cur.forward_idx = i | |
AND cur.a = a AND cur.b = b | |
LEFT JOIN triples child1 ON child1.forward_idx = i | |
AND child1.a = a AND child1.b = b | |
WHERE gas >= cur.weight | |
) | |
SELECT t.a, t.b, t.c | |
FROM triples t | |
WHERE t.forward_idx = (SELECT MAX(i) FROM traverse tr WHERE tr.a = a AND tr.b = b); | |
/* TODO: create views for choosing random forward, reverse, and center predicated triples */ | |
CREATE VIEW IF NOT EXISTS integrity_check AS | |
SELECT ( | |
/* weights should all be positive (could be user error) */ | |
(SELECT COUNT(*) FROM triples WHERE weight < 0) = 0 | |
/* triples heap integrity */ | |
AND (SELECT COUNT(*) FROM triples) = | |
(SELECT COUNT(*) FROM triples parent | |
WHERE ( | |
SELECT parent.weight + COALESCE(SUM(child.total), 0) FROM triples child | |
WHERE child.idx IN (parent.idx * 2, parent.idx * 2 + 1) | |
) = parent.total) | |
/* forward heap integrity */ | |
AND (SELECT COUNT(*) FROM triples) = | |
(SELECT COUNT(*) FROM triples parent | |
WHERE ( | |
SELECT parent.weight + COALESCE(SUM(child.forward_total), 0) FROM triples child | |
WHERE child.a = parent.a AND child.b = parent.b | |
AND child.forward_idx IN (parent.forward_idx * 2, parent.forward_idx * 2 + 1) | |
) = parent.forward_total) | |
/* reverse heap integrity */ | |
AND (SELECT COUNT(*) FROM triples) = | |
(SELECT COUNT(*) FROM triples parent | |
WHERE ( | |
SELECT parent.weight + COALESCE(SUM(child.reverse_total), 0) FROM triples child | |
WHERE child.b = parent.b AND child.c = parent.c | |
AND child.reverse_idx IN (parent.reverse_idx * 2, parent.reverse_idx * 2 + 1) | |
) = parent.reverse_total) | |
/* center heap integrity */ | |
AND (SELECT COUNT(*) FROM triples) = | |
(SELECT COUNT(*) FROM triples parent | |
WHERE ( | |
SELECT parent.weight + COALESCE(SUM(child.center_total), 0) FROM triples child | |
WHERE child.b = parent.b | |
AND child.center_idx IN (parent.center_idx * 2, parent.center_idx * 2 + 1) | |
) = parent.center_total) | |
) integrity; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment