Skip to content

Instantly share code, notes, and snippets.

@Rillke
Created November 11, 2018 14:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Rillke/c2da0921f8f2a047615f41fab8781c11 to your computer and use it in GitHub Desktop.
Save Rillke/c2da0921f8f2a047615f41fab8781c11 to your computer and use it in GitHub Desktop.
MySQL stored procedure for calculating the compartments of a Venn-Diagram created from 4 sets
-- Given the 4 tables, each containing items and representing one set,
-- I'd like to get the count of the items in each compartment required
-- to draw a Venn diagram as shown below. The calculation should take
-- place in the MySQL server since I do not want to transmit the single
-- items to the application server.
-- https://stackoverflow.com/q/53234943/2683737
-- Copyright 2018 Rainer Rillke
-- Permission is hereby granted, free of charge, to any person obtaining a copy
-- of this software and associated documentation files (the "Software"), to deal
-- in the Software without restriction, including without limitation the rights
-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-- of the Software, and to permit persons to whom the Software is furnished to do
-- so, subject to the following conditions:
-- The above copyright notice and this permission notice shall be included in all
-- copies or substantial portions of the Software.
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-- FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-- COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-- IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
DROP PROCEDURE IF EXISTS venn;
DELIMITER $$
CREATE PROCEDURE venn()
BEGIN
DECLARE compart_sum INT DEFAULT 0;
DROP TEMPORARY TABLE IF EXISTS tmp_venn;
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s1;
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s2;
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s3;
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s4;
DROP TEMPORARY TABLE IF EXISTS tmp_venn_counts;
CREATE TEMPORARY TABLE tmp_venn ENGINE=MEMORY
SELECT item, 's1' AS Origin
FROM adata_venn_s1
UNION
SELECT item, 's2' AS Origin
FROM adata_venn_s2
UNION
SELECT item, 's3' AS Origin
FROM adata_venn_s3
UNION
SELECT item, 's4' AS Origin
FROM adata_venn_s4;
-- Looks stupid but is required due to
-- Error Code: 1137. Can't reopen table: 'tmp_venn'
-- I4 other words: You can't refer to one table twice in one query
-- https://stackoverflow.com/q/343402/2683737
CREATE TEMPORARY TABLE tmp_venn_s1 ENGINE=MEMORY
SELECT *
FROM tmp_venn
WHERE Origin = 's1';
CREATE TEMPORARY TABLE tmp_venn_s2 ENGINE=MEMORY
SELECT *
FROM tmp_venn
WHERE Origin = 's2';
CREATE TEMPORARY TABLE tmp_venn_s3 ENGINE=MEMORY
SELECT *
FROM tmp_venn
WHERE Origin = 's3';
CREATE TEMPORARY TABLE tmp_venn_s4 ENGINE=MEMORY
SELECT *
FROM tmp_venn
WHERE Origin = 's4';
-- This query allows checking the data for sanity.
-- There should be no duplicates in a set.
-- SELECT count(item), item, Origin
-- FROM tmp_venn
-- GROUP BY item, Origin
-- HAVING count(item) > 1;
-- All relevant items from all sets
-- SELECT * FROM tmp_venn;
-- Count by origin
-- SELECT count(*), Origin FROM tmp_venn
-- GROUP BY Origin;
-- Legend:
-- Operations: I: INTERSECT, E: EXCEPT
-- Sets: 3: s1, 5: s2, c: s3, n: s4
-- Note: Using prefix notation.
-- Type |s1 ∩ s2 ∩ s3 ∩ s4| (center of Venn-Diagram)
SELECT count(*)
INTO @I1234
FROM (
SELECT item
FROM tmp_venn
GROUP BY item
HAVING count(item) = 4
) AS I1234;
-- Type |(s1 ∩ s2 ∩ s4) \ s3| (4 diagonal values around the center)
SELECT count(*)
INTO @I234E1
FROM(
SELECT item
FROM tmp_venn
WHERE Origin <> 's1'
GROUP BY item
HAVING count(item) = 3
AND item NOT IN (
SELECT item
FROM tmp_venn_s1
)
) AS I234E1;
SELECT count(*)
INTO @I134E2
FROM(
SELECT item
FROM tmp_venn
WHERE Origin <> 's2'
GROUP BY item
HAVING count(item) = 3
AND item NOT IN (
SELECT item
FROM tmp_venn_s2
)
) AS I134E2;
SELECT count(*)
INTO @I124E3
FROM(
SELECT item
FROM tmp_venn
WHERE Origin <> 's3'
GROUP BY item
HAVING count(item) = 3
AND item NOT IN (
SELECT item
FROM tmp_venn_s3
)
) AS I124E3;
SELECT count(*)
INTO @I123E4
FROM(
SELECT item
FROM tmp_venn
WHERE Origin <> 's4'
GROUP BY item
HAVING count(item) = 3
AND item NOT IN (
SELECT item
FROM tmp_venn_s4
)
) AS I123E4;
-- Type |(s1 ∩ s4) \ (s2 ∪ s3)|
-- (4 values in North, East, Sout, West direction respectively)
-- (values on top of two parallel elipses)
SELECT count(*)
INTO @I12E34
FROM(
SELECT item
FROM tmp_venn
WHERE Origin IN ('s1', 's2')
GROUP BY item
HAVING count(item) = 2
AND item NOT IN (
SELECT item
FROM tmp_venn_s3
)
AND item NOT IN (
SELECT item
FROM tmp_venn_s4
)
) AS I12E34;
SELECT count(*)
INTO @I13E24
FROM(
SELECT item
FROM tmp_venn
WHERE Origin IN ('s1', 's3')
GROUP BY item
HAVING count(item) = 2
AND item NOT IN (
SELECT item
FROM tmp_venn_s2
)
AND item NOT IN (
SELECT item
FROM tmp_venn_s4
)
) AS I13E24;
SELECT count(*)
INTO @I14E23
FROM(
SELECT item
FROM tmp_venn
WHERE Origin IN ('s1', 's4')
GROUP BY item
HAVING count(item) = 2
AND item NOT IN (
SELECT item
FROM tmp_venn_s2
)
AND item NOT IN (
SELECT item
FROM tmp_venn_s3
)
) AS I14E23;
SELECT count(*)
INTO @I23E14
FROM(
SELECT item
FROM tmp_venn
WHERE Origin IN ('s2', 's3')
GROUP BY item
HAVING count(item) = 2
AND item NOT IN (
SELECT item
FROM tmp_venn_s1
)
AND item NOT IN (
SELECT item
FROM tmp_venn_s4
)
) AS I23E14;
SELECT count(*)
INTO @I24E13
FROM(
SELECT item
FROM tmp_venn
WHERE Origin IN ('s2', 's4')
GROUP BY item
HAVING count(item) = 2
AND item NOT IN (
SELECT item
FROM tmp_venn_s1
)
AND item NOT IN (
SELECT item
FROM tmp_venn_s3
)
) AS I24E13;
SELECT count(*)
INTO @I34E12
FROM(
SELECT item
FROM tmp_venn
WHERE Origin IN ('s3', 's4')
GROUP BY item
HAVING count(item) = 2
AND item NOT IN (
SELECT item
FROM tmp_venn_s1
)
AND item NOT IN (
SELECT item
FROM tmp_venn_s2
)
) AS I34E12;
-- Type |s1 \ (s2 ∪ s3 ∪ s4)|
-- the upper corners of the elipses
SELECT count(*)
INTO @I1E234
FROM(
SELECT item
FROM tmp_venn_s1
WHERE item NOT IN (
SELECT item
FROM tmp_venn
WHERE Origin <> 's1'
)
) AS I1E234;
SELECT count(*)
INTO @I2E134
FROM(
SELECT item
FROM tmp_venn_s2
WHERE item NOT IN (
SELECT item
FROM tmp_venn
WHERE Origin <> 's2'
)
) AS I2E134;
SELECT count(*)
INTO @I3E124
FROM(
SELECT item
FROM tmp_venn_s3
WHERE item NOT IN (
SELECT item
FROM tmp_venn
WHERE Origin <> 's3'
)
) AS I3E124;
SELECT count(*)
INTO @I4E123
FROM(
SELECT item
FROM tmp_venn_s4
WHERE item NOT IN (
SELECT item
FROM tmp_venn
WHERE Origin <> 's4'
)
) AS I4E123;
-- Create result set
CREATE TEMPORARY TABLE tmp_venn_counts ENGINE=MEMORY
SELECT 'I1234' AS compartment, @I1234 AS count
UNION
SELECT 'I234E1' AS compartment, @I234E1 AS count
UNION
SELECT 'I134E2' AS compartment, @I134E2 AS count
UNION
SELECT 'I124E3' AS compartment, @I124E3 AS count
UNION
SELECT 'I123E4' AS compartment, @I123E4 AS count
UNION
SELECT 'I12E34' AS compartment, @I12E34 AS count
UNION
SELECT 'I13E24' AS compartment, @I13E24 AS count
UNION
SELECT 'I14E23' AS compartment, @I14E23 AS count
UNION
SELECT 'I23E14' AS compartment, @I23E14 AS count
UNION
SELECT 'I24E13' AS compartment, @I24E13 AS count
UNION
SELECT 'I34E12' AS compartment, @I34E12 AS count
UNION
SELECT 'I1E234' AS compartment, @I1E234 AS count
UNION
SELECT 'I2E134' AS compartment, @I2E134 AS count
UNION
SELECT 'I3E124' AS compartment, @I3E124 AS count
UNION
SELECT 'I4E123' AS compartment, @I4E123 AS count;
-- sum-up the compartment powers for sanity checking
SELECT sum(count)
INTO compart_sum
FROM tmp_venn_counts;
-- sum and set_power should always equal, otherwise we made a mistake
-- or the source data is insane
SELECT compartment, count FROM tmp_venn_counts
UNION
SELECT 'sum', cast(compart_sum as UNSIGNED) AS count
UNION
SELECT 'set_power', (SELECT count(DISTINCT item) FROM tmp_venn) AS count;
-- clean up
DROP TEMPORARY TABLE IF EXISTS tmp_venn;
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s1;
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s2;
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s3;
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s4;
DROP TEMPORARY TABLE IF EXISTS tmp_venn_counts;
END $$
DELIMITER ;
@Rillke
Copy link
Author

Rillke commented Nov 11, 2018

Note that you might not want to create the tmp_venn* temporary tables the way I did. The reason they are created as shown is that they are actually the result of another MySQL operation.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment