Last active
June 12, 2022 12:40
-
-
Save jjakob/41cb972ec71b4320314766764621b889 to your computer and use it in GitHub Desktop.
Quassel-IRC plpgsql procedure to delete senders that violate the unique index constraint. https://bugs.quassel-irc.org/issues/1245
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE PROCEDURE delete_conflict_senders() AS $$ | |
BEGIN | |
DROP INDEX IF EXISTS sender_sender_realname_avatarurl_uindex; | |
/* part 1: update columns */ | |
UPDATE sender | |
SET realname='' | |
WHERE realname IS NULL | |
; | |
UPDATE sender | |
SET avatarurl='' | |
WHERE avatarurl IS NULL | |
; | |
ALTER TABLE sender ALTER realname SET NOT NULL; | |
ALTER TABLE sender ALTER avatarurl SET NOT NULL; | |
/* part 2: delete conflict rows */ | |
CREATE /*TEMP*/ TABLE bad_senderids AS | |
SELECT good_senderid, bad_senderid | |
FROM ( | |
SELECT min(senderid) OVER ( | |
PARTITION BY | |
sender, | |
realname, | |
avatarurl | |
) AS good_senderid, | |
senderid AS bad_senderid | |
FROM sender | |
) AS x | |
WHERE bad_senderid > good_senderid | |
; | |
ALTER TABLE backlog DISABLE TRIGGER ALL; | |
ALTER TABLE sender DISABLE TRIGGER ALL; | |
UPDATE backlog | |
SET senderid=bsi.good_senderid | |
FROM bad_senderids bsi | |
WHERE backlog.senderid=bsi.bad_senderid | |
; | |
DELETE FROM sender | |
USING bad_senderids bsi | |
WHERE senderid = bsi.bad_senderid | |
; | |
ALTER TABLE backlog ENABLE TRIGGER ALL; | |
ALTER TABLE sender ENABLE TRIGGER ALL; | |
/* part 3: reindex (because triggers were disabled) */ | |
REINDEX TABLE sender; | |
REINDEX TABLE backlog; | |
CREATE UNIQUE INDEX sender_sender_realname_avatarurl_uindex | |
ON sender USING btree (sender, realname, avatarurl); | |
END; | |
$$ LANGUAGE plpgsql; | |
/* | |
* Returns the number of rows in the sender table that violate the | |
* index constraint and would get deleted in the real function. | |
*/ | |
CREATE OR REPLACE FUNCTION delete_conflict_senders_count( | |
OUT good_rows bigint, | |
OUT bad_rows bigint, | |
OUT percent_bad numeric | |
) | |
AS $$ | |
DECLARE | |
all_rows bigint; | |
BEGIN | |
bad_rows := COALESCE(0,( | |
SELECT sum(a.countof-1) | |
FROM ( | |
SELECT count(*) AS countof | |
FROM sender | |
GROUP BY | |
COALESCE(sender,''), | |
COALESCE(realname,''), | |
COALESCE(avatarurl,'') | |
HAVING count(*) > 1 | |
) a | |
)); | |
all_rows := ( | |
SELECT count(*) | |
FROM sender | |
); | |
good_rows := all_rows - bad_rows; | |
percent_bad := CAST((CAST(bad_rows AS double precision) / all_rows * 100) AS NUMERIC(5,2)); | |
END; | |
$$ LANGUAGE plpgsql; | |
/* | |
* Returns the rows in the sender table that violate the index constraint | |
* and would get deleted in the real function. | |
* The rows are grouped and a count of the number of duplicate | |
* rows is added to each row. | |
*/ | |
CREATE OR REPLACE FUNCTION delete_conflict_senders_summary() | |
RETURNS TABLE ( | |
sender text, | |
realname text, | |
avatarurl text, | |
countof bigint | |
) | |
AS $$ | |
BEGIN | |
RETURN QUERY ( | |
SELECT | |
COALESCE(s.sender,''), | |
COALESCE(s.realname,''), | |
COALESCE(s.avatarurl,''), | |
count(*) as countof | |
FROM sender s | |
GROUP BY | |
COALESCE(s.sender,''), | |
COALESCE(s.realname,''), | |
COALESCE(s.avatarurl,'') | |
HAVING count(*) > 1 | |
); | |
END; | |
$$ LANGUAGE plpgsql; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment