Created
November 11, 2018 14:36
-
-
Save Rillke/c2da0921f8f2a047615f41fab8781c11 to your computer and use it in GitHub Desktop.
MySQL stored procedure for calculating the compartments of a Venn-Diagram created from 4 sets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Given the 4 tables, each containing items and representing one set, | |
-- I'd like to get the count of the items in each compartment required | |
-- to draw a Venn diagram as shown below. The calculation should take | |
-- place in the MySQL server since I do not want to transmit the single | |
-- items to the application server. | |
-- https://stackoverflow.com/q/53234943/2683737 | |
-- Copyright 2018 Rainer Rillke | |
-- Permission is hereby granted, free of charge, to any person obtaining a copy | |
-- of this software and associated documentation files (the "Software"), to deal | |
-- in the Software without restriction, including without limitation the rights | |
-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |
-- of the Software, and to permit persons to whom the Software is furnished to do | |
-- so, subject to the following conditions: | |
-- The above copyright notice and this permission notice shall be included in all | |
-- copies or substantial portions of the Software. | |
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | |
-- FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | |
-- COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | |
-- IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | |
-- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
DROP PROCEDURE IF EXISTS venn; | |
DELIMITER $$ | |
CREATE PROCEDURE venn() | |
BEGIN | |
DECLARE compart_sum INT DEFAULT 0; | |
DROP TEMPORARY TABLE IF EXISTS tmp_venn; | |
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s1; | |
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s2; | |
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s3; | |
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s4; | |
DROP TEMPORARY TABLE IF EXISTS tmp_venn_counts; | |
CREATE TEMPORARY TABLE tmp_venn ENGINE=MEMORY | |
SELECT item, 's1' AS Origin | |
FROM adata_venn_s1 | |
UNION | |
SELECT item, 's2' AS Origin | |
FROM adata_venn_s2 | |
UNION | |
SELECT item, 's3' AS Origin | |
FROM adata_venn_s3 | |
UNION | |
SELECT item, 's4' AS Origin | |
FROM adata_venn_s4; | |
-- Looks stupid but is required due to | |
-- Error Code: 1137. Can't reopen table: 'tmp_venn' | |
-- I4 other words: You can't refer to one table twice in one query | |
-- https://stackoverflow.com/q/343402/2683737 | |
CREATE TEMPORARY TABLE tmp_venn_s1 ENGINE=MEMORY | |
SELECT * | |
FROM tmp_venn | |
WHERE Origin = 's1'; | |
CREATE TEMPORARY TABLE tmp_venn_s2 ENGINE=MEMORY | |
SELECT * | |
FROM tmp_venn | |
WHERE Origin = 's2'; | |
CREATE TEMPORARY TABLE tmp_venn_s3 ENGINE=MEMORY | |
SELECT * | |
FROM tmp_venn | |
WHERE Origin = 's3'; | |
CREATE TEMPORARY TABLE tmp_venn_s4 ENGINE=MEMORY | |
SELECT * | |
FROM tmp_venn | |
WHERE Origin = 's4'; | |
-- This query allows checking the data for sanity. | |
-- There should be no duplicates in a set. | |
-- SELECT count(item), item, Origin | |
-- FROM tmp_venn | |
-- GROUP BY item, Origin | |
-- HAVING count(item) > 1; | |
-- All relevant items from all sets | |
-- SELECT * FROM tmp_venn; | |
-- Count by origin | |
-- SELECT count(*), Origin FROM tmp_venn | |
-- GROUP BY Origin; | |
-- Legend: | |
-- Operations: I: INTERSECT, E: EXCEPT | |
-- Sets: 3: s1, 5: s2, c: s3, n: s4 | |
-- Note: Using prefix notation. | |
-- Type |s1 ∩ s2 ∩ s3 ∩ s4| (center of Venn-Diagram) | |
SELECT count(*) | |
INTO @I1234 | |
FROM ( | |
SELECT item | |
FROM tmp_venn | |
GROUP BY item | |
HAVING count(item) = 4 | |
) AS I1234; | |
-- Type |(s1 ∩ s2 ∩ s4) \ s3| (4 diagonal values around the center) | |
SELECT count(*) | |
INTO @I234E1 | |
FROM( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin <> 's1' | |
GROUP BY item | |
HAVING count(item) = 3 | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s1 | |
) | |
) AS I234E1; | |
SELECT count(*) | |
INTO @I134E2 | |
FROM( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin <> 's2' | |
GROUP BY item | |
HAVING count(item) = 3 | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s2 | |
) | |
) AS I134E2; | |
SELECT count(*) | |
INTO @I124E3 | |
FROM( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin <> 's3' | |
GROUP BY item | |
HAVING count(item) = 3 | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s3 | |
) | |
) AS I124E3; | |
SELECT count(*) | |
INTO @I123E4 | |
FROM( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin <> 's4' | |
GROUP BY item | |
HAVING count(item) = 3 | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s4 | |
) | |
) AS I123E4; | |
-- Type |(s1 ∩ s4) \ (s2 ∪ s3)| | |
-- (4 values in North, East, Sout, West direction respectively) | |
-- (values on top of two parallel elipses) | |
SELECT count(*) | |
INTO @I12E34 | |
FROM( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin IN ('s1', 's2') | |
GROUP BY item | |
HAVING count(item) = 2 | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s3 | |
) | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s4 | |
) | |
) AS I12E34; | |
SELECT count(*) | |
INTO @I13E24 | |
FROM( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin IN ('s1', 's3') | |
GROUP BY item | |
HAVING count(item) = 2 | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s2 | |
) | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s4 | |
) | |
) AS I13E24; | |
SELECT count(*) | |
INTO @I14E23 | |
FROM( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin IN ('s1', 's4') | |
GROUP BY item | |
HAVING count(item) = 2 | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s2 | |
) | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s3 | |
) | |
) AS I14E23; | |
SELECT count(*) | |
INTO @I23E14 | |
FROM( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin IN ('s2', 's3') | |
GROUP BY item | |
HAVING count(item) = 2 | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s1 | |
) | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s4 | |
) | |
) AS I23E14; | |
SELECT count(*) | |
INTO @I24E13 | |
FROM( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin IN ('s2', 's4') | |
GROUP BY item | |
HAVING count(item) = 2 | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s1 | |
) | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s3 | |
) | |
) AS I24E13; | |
SELECT count(*) | |
INTO @I34E12 | |
FROM( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin IN ('s3', 's4') | |
GROUP BY item | |
HAVING count(item) = 2 | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s1 | |
) | |
AND item NOT IN ( | |
SELECT item | |
FROM tmp_venn_s2 | |
) | |
) AS I34E12; | |
-- Type |s1 \ (s2 ∪ s3 ∪ s4)| | |
-- the upper corners of the elipses | |
SELECT count(*) | |
INTO @I1E234 | |
FROM( | |
SELECT item | |
FROM tmp_venn_s1 | |
WHERE item NOT IN ( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin <> 's1' | |
) | |
) AS I1E234; | |
SELECT count(*) | |
INTO @I2E134 | |
FROM( | |
SELECT item | |
FROM tmp_venn_s2 | |
WHERE item NOT IN ( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin <> 's2' | |
) | |
) AS I2E134; | |
SELECT count(*) | |
INTO @I3E124 | |
FROM( | |
SELECT item | |
FROM tmp_venn_s3 | |
WHERE item NOT IN ( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin <> 's3' | |
) | |
) AS I3E124; | |
SELECT count(*) | |
INTO @I4E123 | |
FROM( | |
SELECT item | |
FROM tmp_venn_s4 | |
WHERE item NOT IN ( | |
SELECT item | |
FROM tmp_venn | |
WHERE Origin <> 's4' | |
) | |
) AS I4E123; | |
-- Create result set | |
CREATE TEMPORARY TABLE tmp_venn_counts ENGINE=MEMORY | |
SELECT 'I1234' AS compartment, @I1234 AS count | |
UNION | |
SELECT 'I234E1' AS compartment, @I234E1 AS count | |
UNION | |
SELECT 'I134E2' AS compartment, @I134E2 AS count | |
UNION | |
SELECT 'I124E3' AS compartment, @I124E3 AS count | |
UNION | |
SELECT 'I123E4' AS compartment, @I123E4 AS count | |
UNION | |
SELECT 'I12E34' AS compartment, @I12E34 AS count | |
UNION | |
SELECT 'I13E24' AS compartment, @I13E24 AS count | |
UNION | |
SELECT 'I14E23' AS compartment, @I14E23 AS count | |
UNION | |
SELECT 'I23E14' AS compartment, @I23E14 AS count | |
UNION | |
SELECT 'I24E13' AS compartment, @I24E13 AS count | |
UNION | |
SELECT 'I34E12' AS compartment, @I34E12 AS count | |
UNION | |
SELECT 'I1E234' AS compartment, @I1E234 AS count | |
UNION | |
SELECT 'I2E134' AS compartment, @I2E134 AS count | |
UNION | |
SELECT 'I3E124' AS compartment, @I3E124 AS count | |
UNION | |
SELECT 'I4E123' AS compartment, @I4E123 AS count; | |
-- sum-up the compartment powers for sanity checking | |
SELECT sum(count) | |
INTO compart_sum | |
FROM tmp_venn_counts; | |
-- sum and set_power should always equal, otherwise we made a mistake | |
-- or the source data is insane | |
SELECT compartment, count FROM tmp_venn_counts | |
UNION | |
SELECT 'sum', cast(compart_sum as UNSIGNED) AS count | |
UNION | |
SELECT 'set_power', (SELECT count(DISTINCT item) FROM tmp_venn) AS count; | |
-- clean up | |
DROP TEMPORARY TABLE IF EXISTS tmp_venn; | |
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s1; | |
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s2; | |
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s3; | |
DROP TEMPORARY TABLE IF EXISTS tmp_venn_s4; | |
DROP TEMPORARY TABLE IF EXISTS tmp_venn_counts; | |
END $$ | |
DELIMITER ; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Note that you might not want to create the
tmp_venn*
temporary tables the way I did. The reason they are created as shown is that they are actually the result of another MySQL operation.