Skip to content

Instantly share code, notes, and snippets.

@jjakob
Last active June 12, 2022 12:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jjakob/41cb972ec71b4320314766764621b889 to your computer and use it in GitHub Desktop.
Save jjakob/41cb972ec71b4320314766764621b889 to your computer and use it in GitHub Desktop.
Quassel-IRC plpgsql procedure to delete senders that violate the unique index constraint. https://bugs.quassel-irc.org/issues/1245
CREATE OR REPLACE PROCEDURE delete_conflict_senders() AS $$
BEGIN
DROP INDEX IF EXISTS sender_sender_realname_avatarurl_uindex;
/* part 1: update columns */
UPDATE sender
SET realname=''
WHERE realname IS NULL
;
UPDATE sender
SET avatarurl=''
WHERE avatarurl IS NULL
;
ALTER TABLE sender ALTER realname SET NOT NULL;
ALTER TABLE sender ALTER avatarurl SET NOT NULL;
/* part 2: delete conflict rows */
CREATE /*TEMP*/ TABLE bad_senderids AS
SELECT good_senderid, bad_senderid
FROM (
SELECT min(senderid) OVER (
PARTITION BY
sender,
realname,
avatarurl
) AS good_senderid,
senderid AS bad_senderid
FROM sender
) AS x
WHERE bad_senderid > good_senderid
;
ALTER TABLE backlog DISABLE TRIGGER ALL;
ALTER TABLE sender DISABLE TRIGGER ALL;
UPDATE backlog
SET senderid=bsi.good_senderid
FROM bad_senderids bsi
WHERE backlog.senderid=bsi.bad_senderid
;
DELETE FROM sender
USING bad_senderids bsi
WHERE senderid = bsi.bad_senderid
;
ALTER TABLE backlog ENABLE TRIGGER ALL;
ALTER TABLE sender ENABLE TRIGGER ALL;
/* part 3: reindex (because triggers were disabled) */
REINDEX TABLE sender;
REINDEX TABLE backlog;
CREATE UNIQUE INDEX sender_sender_realname_avatarurl_uindex
ON sender USING btree (sender, realname, avatarurl);
END;
$$ LANGUAGE plpgsql;
/*
* Returns the number of rows in the sender table that violate the
* index constraint and would get deleted in the real function.
*/
CREATE OR REPLACE FUNCTION delete_conflict_senders_count(
OUT good_rows bigint,
OUT bad_rows bigint,
OUT percent_bad numeric
)
AS $$
DECLARE
all_rows bigint;
BEGIN
bad_rows := COALESCE(0,(
SELECT sum(a.countof-1)
FROM (
SELECT count(*) AS countof
FROM sender
GROUP BY
COALESCE(sender,''),
COALESCE(realname,''),
COALESCE(avatarurl,'')
HAVING count(*) > 1
) a
));
all_rows := (
SELECT count(*)
FROM sender
);
good_rows := all_rows - bad_rows;
percent_bad := CAST((CAST(bad_rows AS double precision) / all_rows * 100) AS NUMERIC(5,2));
END;
$$ LANGUAGE plpgsql;
/*
* Returns the rows in the sender table that violate the index constraint
* and would get deleted in the real function.
* The rows are grouped and a count of the number of duplicate
* rows is added to each row.
*/
CREATE OR REPLACE FUNCTION delete_conflict_senders_summary()
RETURNS TABLE (
sender text,
realname text,
avatarurl text,
countof bigint
)
AS $$
BEGIN
RETURN QUERY (
SELECT
COALESCE(s.sender,''),
COALESCE(s.realname,''),
COALESCE(s.avatarurl,''),
count(*) as countof
FROM sender s
GROUP BY
COALESCE(s.sender,''),
COALESCE(s.realname,''),
COALESCE(s.avatarurl,'')
HAVING count(*) > 1
);
END;
$$ LANGUAGE plpgsql;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment