Skip to content

Instantly share code, notes, and snippets.

@mumbleskates
Last active March 22, 2017 02:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mumbleskates/7bcef4015d5b1422e88b to your computer and use it in GitHub Desktop.
Save mumbleskates/7bcef4015d5b1422e88b to your computer and use it in GitHub Desktop.
Pure-SQL markov heaps (SQLite)
/* powers of 2 utility view for heap traversal */
CREATE VIEW IF NOT EXISTS power2 AS
WITH RECURSIVE doubling(n) AS (
VALUES (2)
UNION ALL
SELECT n*2 FROM doubling LIMIT 62
)
SELECT n twos FROM doubling;
/* generates random floating point values in interval [0, 1) */
CREATE VIEW IF NOT EXISTS rand_real AS
SELECT 0.5 - RANDOM() / CAST(-9223372036854775808 AS REAL) / 2 rnd;
/* token to token-id mapping */
CREATE TABLE IF NOT EXISTS tokens (
id INTEGER PRIMARY KEY,
token TEXT UNIQUE
);
/* markov heap data */
CREATE TABLE IF NOT EXISTS triples (
a INTEGER NOT NULL REFERENCES tokens(id),
b INTEGER NOT NULL REFERENCES tokens(id),
c INTEGER NOT NULL REFERENCES tokens(id),
weight INTEGER NOT NULL,
idx INTEGER PRIMARY KEY,
total INTEGER NOT NULL,
forward_idx INTEGER NOT NULL,
forward_total INTEGER NOT NULL,
reverse_idx INTEGER NOT NULL,
reverse_total INTEGER NOT NULL,
center_idx INTEGER NOT NULL,
center_total INTEGER NOT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS ix_triples__unique ON triples(a, b, c);
CREATE UNIQUE INDEX IF NOT EXISTS ix_triples__forward_idx ON triples(a, b, forward_idx);
CREATE UNIQUE INDEX IF NOT EXISTS ix_triples__reverse_idx ON triples(b, c, reverse_idx);
CREATE UNIQUE INDEX IF NOT EXISTS ix_triples__center_idx ON triples(b, center_idx);
/* allows viewing and modifying (through insert) of triples weights */
CREATE VIEW IF NOT EXISTS triples_simple AS SELECT a, b, c, weight FROM triples;
/* inserts triples weights as deltas while maintaining heap integrity */
CREATE TRIGGER IF NOT EXISTS tg_triples__populate INSTEAD OF INSERT ON triples_simple
FOR EACH ROW BEGIN
/* ensure the triple is created */
INSERT OR IGNORE INTO triples (
a, b, c, weight,
idx, total, forward_idx, forward_total,
reverse_idx, reverse_total, center_idx, center_total
)
VALUES (
new.a, new.b, new.c,
0,
COALESCE((SELECT MAX(t.idx) + 1 FROM triples t), 1),
0,
COALESCE((SELECT MAX(t.forward_idx) + 1 FROM triples t
WHERE t.a = new.a AND t.b = new.b), 1),
0,
COALESCE((SELECT MAX(t.reverse_idx) + 1 FROM triples t
WHERE t.b = new.b AND t.c = new.c), 1),
0,
COALESCE((SELECT MAX(t.center_idx) + 1 FROM triples t
WHERE t.b = new.b), 1),
0
);
/* update totals in own row */
UPDATE triples
SET
weight = weight + new.weight,
total = total + new.weight,
forward_total = forward_total + new.weight,
reverse_total = reverse_total + new.weight,
center_total = center_total + new.weight
WHERE a = new.a AND b = new.b AND c = new.c;
/* update totals of ancestors in main heap */
UPDATE triples
SET total = total + new.weight
WHERE idx IN (SELECT t.idx / twos FROM triples t, power2
WHERE t.a = new.a AND t.b = new.b AND t.c = new.c AND t.idx >= twos);
/* update totals of ancestors in forward heap */
UPDATE triples
SET forward_total = forward_total + new.weight
WHERE a = new.a AND b = new.b
AND forward_idx IN (SELECT t.forward_idx / twos FROM triples t, power2
WHERE t.a = new.a AND t.b = new.b AND t.c = new.c AND t.forward_idx >= twos);
/* update totals of ancestors in reverse heap */
UPDATE triples
SET reverse_total = reverse_total + new.weight
WHERE b = new.b AND c = new.c
AND reverse_idx IN (SELECT t.reverse_idx / twos FROM triples t, power2
WHERE t.a = new.a AND t.b = new.b AND t.c = new.c AND t.reverse_idx >= twos);
/* update totals of ancestors in center heap */
UPDATE triples
SET center_total = center_total + new.weight
WHERE b = new.b
AND center_idx IN (SELECT t.center_idx / twos FROM triples t, power2
WHERE t.a = new.a AND t.b = new.b AND t.c = new.c AND t.center_idx >= twos);
END;
/* allows viewing and adding triples weights with raw tokens instead of ids.
* weight is optional and defaults to 1. */
CREATE VIEW IF NOT EXISTS triples_tokens AS
SELECT ta.token a, tb.token b, tc.token c, t.weight
FROM
triples t
INNER JOIN tokens ta ON t.a = ta.id
INNER JOIN tokens tb ON t.b = tb.id
INNER JOIN tokens tc ON t.c = tc.id;
CREATE TRIGGER IF NOT EXISTS tg_triples_tokens__populate INSTEAD OF INSERT ON triples_tokens
FOR EACH ROW BEGIN
/* ensure tokens are created */
INSERT OR IGNORE INTO tokens(token) VALUES (new.a), (new.b), (new.c);
/* hand off to triples_simple */
INSERT INTO triples_simple(a, b, c, weight)
VALUES (
(SELECT id FROM tokens WHERE token = new.a),
(SELECT id FROM tokens WHERE token = new.b),
(SELECT id FROM tokens WHERE token = new.c),
COALESCE(new.weight, 1)
);
END;
/* chooses a random token triple */
CREATE VIEW IF NOT EXISTS choose_triple AS
WITH RECURSIVE traverse(i, gas) AS (
SELECT
1,
CAST(rnd * (SELECT total FROM triples WHERE idx = 1) AS INTEGER)
FROM rand_real
UNION ALL
SELECT
CASE WHEN gas >= cur.weight + COALESCE(child1.total, 0)
THEN i*2 + 1
ELSE i*2
END,
CASE WHEN gas >= cur.weight + COALESCE(child1.total, 0)
THEN gas - cur.weight - COALESCE(child1.total, 0)
ELSE gas - cur.weight
END
FROM
traverse
LEFT JOIN triples cur ON cur.idx = i
LEFT JOIN triples child1 ON child1.idx = i*2
WHERE gas >= cur.weight
)
SELECT t.a, t.b, t.c
FROM triples t
WHERE t.idx = (SELECT MAX(i) FROM traverse);
CREATE VIEW IF NOT EXISTS choose_forward_triple AS
WITH RECURSIVE traverse(a, b, i, gas) AS (
SELECT
triples.a, triples.b,
1,
CAST(rnd * (SELECT total FROM triples t
WHERE t.forward_idx = 1 AND t.a = a AND t.b = b) AS INTEGER)
FROM rand_real, triples
WHERE triples.forward_idx = 1
UNION ALL
SELECT
traverse.a, traverse.b,
CASE WHEN gas >= cur.weight + COALESCE(child1.total, 0)
THEN i*2 + 1
ELSE i*2
END,
CASE WHEN gas >= cur.weight + COALESCE(child1.total, 0)
THEN gas - cur.weight - COALESCE(child1.total, 0)
ELSE gas - cur.weight
END
FROM
traverse
LEFT JOIN triples cur ON cur.forward_idx = i
AND cur.a = a AND cur.b = b
LEFT JOIN triples child1 ON child1.forward_idx = i
AND child1.a = a AND child1.b = b
WHERE gas >= cur.weight
)
SELECT t.a, t.b, t.c
FROM triples t
WHERE t.forward_idx = (SELECT MAX(i) FROM traverse tr WHERE tr.a = a AND tr.b = b);
/* TODO: create views for choosing random forward, reverse, and center predicated triples */
CREATE VIEW IF NOT EXISTS integrity_check AS
SELECT (
/* weights should all be positive (could be user error) */
(SELECT COUNT(*) FROM triples WHERE weight < 0) = 0
/* triples heap integrity */
AND (SELECT COUNT(*) FROM triples) =
(SELECT COUNT(*) FROM triples parent
WHERE (
SELECT parent.weight + COALESCE(SUM(child.total), 0) FROM triples child
WHERE child.idx IN (parent.idx * 2, parent.idx * 2 + 1)
) = parent.total)
/* forward heap integrity */
AND (SELECT COUNT(*) FROM triples) =
(SELECT COUNT(*) FROM triples parent
WHERE (
SELECT parent.weight + COALESCE(SUM(child.forward_total), 0) FROM triples child
WHERE child.a = parent.a AND child.b = parent.b
AND child.forward_idx IN (parent.forward_idx * 2, parent.forward_idx * 2 + 1)
) = parent.forward_total)
/* reverse heap integrity */
AND (SELECT COUNT(*) FROM triples) =
(SELECT COUNT(*) FROM triples parent
WHERE (
SELECT parent.weight + COALESCE(SUM(child.reverse_total), 0) FROM triples child
WHERE child.b = parent.b AND child.c = parent.c
AND child.reverse_idx IN (parent.reverse_idx * 2, parent.reverse_idx * 2 + 1)
) = parent.reverse_total)
/* center heap integrity */
AND (SELECT COUNT(*) FROM triples) =
(SELECT COUNT(*) FROM triples parent
WHERE (
SELECT parent.weight + COALESCE(SUM(child.center_total), 0) FROM triples child
WHERE child.b = parent.b
AND child.center_idx IN (parent.center_idx * 2, parent.center_idx * 2 + 1)
) = parent.center_total)
) integrity;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment