dvarrazzo/codice_catastale.sql

## codice_catastale.sql
-- Crea una tabella di importazione dati
create table codice_catastale_import (
    codice text not null,
    codice_belfiore text,
    provincia text,
    comune text not null,
    annotazione text,
    codice_var text,
    codice_belfiore_var text,
    provincia_var text,
    comune_var text,
    data_costituzione date,
    data_variazione date);

-- Import data saved as csv
copy codice_catastale_import from '/home/piro/Downloads/codici_catastali.csv'
    with (format csv, header true);

create table codice_catastale (
    comune text not null,
    provincia text not null,
    codice text not null,
    removed bool not null default false);

-- Inserisci i comuni attivi
-- nota: alcuni record hanno annotazione null ma dovrebbero avere qualcosa:
-- si vede da codice_var.
insert into codice_catastale
select comune, provincia, codice
from codice_catastale_import
where (annotazione is null and codice_var is null);

-- Inserisci i comuni non piu' attivi
insert into codice_catastale
select comune, provincia, codice, true
from codice_catastale_import src
where not (src.annotazione is null and src.codice_var is null)
and not exists (
    select 1 from codice_catastale pre
    where (pre.comune, pre.provincia) = (src.comune, src.provincia));

-- Inserisci i sinonimi in altre lingue
insert into codice_catastale
select ltrim(split_part(comune, '*', 2)),
    provincia, codice, removed
from codice_catastale
where comune ~ E'\\*';

-- Rimuovi il sinonimo dalla voce di partenza
update codice_catastale
set comune = ltrim(split_part(comune, '*', 1))
where comune ~ E'\\*';

-- Rimuovi i cambi di provincia che non hanno cambiato codice catastale
delete from codice_catastale cc
where removed
and exists (
    select 1 from codice_catastale cc1
    where not removed
    and cc.comune = cc1.comune and cc.codice = cc1.codice);

-- Crea un indice fuzzy sul nome del comune
create schema pg_trgm;
create extension pg_trgm with schema pg_trgm;
create index codice_catastale_trgm
    on codice_catastale using gin (comune pg_trgm.gin_trgm_ops);

-- Per esempio:
select *, similarity(comune, 'cassino') sml
from codice_catastale
where comune % 'cassino'
order by sml desc;

-- NOTA: una stringa corta ma che sia un prefisso esatto (o una sottostringa esatta)
-- ha comunque una similarity bassa, quindi forse la similarity
-- non e' adatta all'autocompletion di per se'.
	-- Crea una tabella di importazione dati
	create table codice_catastale_import (
	codice text not null,
	codice_belfiore text,
	provincia text,
	comune text not null,
	annotazione text,
	codice_var text,
	codice_belfiore_var text,
	provincia_var text,
	comune_var text,
	data_costituzione date,
	data_variazione date);

	-- Import data saved as csv
	copy codice_catastale_import from '/home/piro/Downloads/codici_catastali.csv'
	with (format csv, header true);

	create table codice_catastale (
	comune text not null,
	provincia text not null,
	codice text not null,
	removed bool not null default false);

	-- Inserisci i comuni attivi
	-- nota: alcuni record hanno annotazione null ma dovrebbero avere qualcosa:
	-- si vede da codice_var.
	insert into codice_catastale
	select comune, provincia, codice
	from codice_catastale_import
	where (annotazione is null and codice_var is null);

	-- Inserisci i comuni non piu' attivi
	insert into codice_catastale
	select comune, provincia, codice, true
	from codice_catastale_import src
	where not (src.annotazione is null and src.codice_var is null)
	and not exists (
	select 1 from codice_catastale pre
	where (pre.comune, pre.provincia) = (src.comune, src.provincia));

	-- Inserisci i sinonimi in altre lingue
	insert into codice_catastale
	select ltrim(split_part(comune, '*', 2)),
	provincia, codice, removed
	from codice_catastale
	where comune ~ E'\\*';

	-- Rimuovi il sinonimo dalla voce di partenza
	update codice_catastale
	set comune = ltrim(split_part(comune, '*', 1))
	where comune ~ E'\\*';

	-- Rimuovi i cambi di provincia che non hanno cambiato codice catastale
	delete from codice_catastale cc
	where removed
	and exists (
	select 1 from codice_catastale cc1
	where not removed
	and cc.comune = cc1.comune and cc.codice = cc1.codice);

	-- Crea un indice fuzzy sul nome del comune
	create schema pg_trgm;
	create extension pg_trgm with schema pg_trgm;
	create index codice_catastale_trgm
	on codice_catastale using gin (comune pg_trgm.gin_trgm_ops);

	-- Per esempio:
	select *, similarity(comune, 'cassino') sml
	from codice_catastale
	where comune % 'cassino'
	order by sml desc;

	-- NOTA: una stringa corta ma che sia un prefisso esatto (o una sottostringa esatta)
	-- ha comunque una similarity bassa, quindi forse la similarity
	-- non e' adatta all'autocompletion di per se'.