jometho/handling_duplicates_in_postgresql.sql

## handling_duplicates_in_postgresql.sql

--Given table coverage with field name which is guaranteed uniqueness
-- You can select duplicated records with this "sequel" on the table
select  coverage.name, count(*) from catalog.coverage GROUP BY coverage.name
HAVING count(*) > 1;

--Same scenario now say you want to delete the redundant tables and remain with only one record for each
--You can use this SQL, where name is the unique column
--ctid is a postgresql system columns for the physical row location of a record

DELETE FROM catalog.coverage a USING (
                           SELECT MIN(ctid) as ctid, name
                           FROM catalog.coverage
                           GROUP BY name HAVING COUNT(*) > 1
                         ) b
WHERE a.name = b.name
      AND a.ctid <> b.ctid

	--Given table coverage with field name which is guaranteed uniqueness
	-- You can select duplicated records with this "sequel" on the table
	select coverage.name, count(*) from catalog.coverage GROUP BY coverage.name
	HAVING count(*) > 1;

	--Same scenario now say you want to delete the redundant tables and remain with only one record for each
	--You can use this SQL, where name is the unique column
	--ctid is a postgresql system columns for the physical row location of a record

	DELETE FROM catalog.coverage a USING (
	SELECT MIN(ctid) as ctid, name
	FROM catalog.coverage
	GROUP BY name HAVING COUNT(*) > 1
	) b
	WHERE a.name = b.name
	AND a.ctid <> b.ctid