-
-
Save tomasgreif/6904830 to your computer and use it in GitHub Desktop.
Guess table definition. The function will create sql create table statement from sample data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create or replace function gtd(sample text) | |
returns varchar as $$ | |
declare outsql varchar; | |
begin | |
select tbl_definition into outsql from ( | |
with | |
source as (select $1::varchar instr) | |
,elems as ( | |
select elem[en], rn, en | |
from ( | |
select elem, rn, generate_subscripts(elem,1) en | |
from( | |
select string_to_array(arr[rn],';') as elem, rn | |
from ( | |
select *, generate_subscripts(arr, 1) as rn | |
from ( | |
select regexp_split_to_array(instr, E'\\n') as arr from source | |
) w | |
) x | |
) y | |
) z | |
) | |
, type_tests as ( | |
select | |
*, | |
case | |
when elem ~ '^$' then 'empty' | |
when elem ~ '^[-+]?[0-9]+$' then 'integer' | |
when elem ~ '^[-+]?[0-9]*\.?[0-9]*([eE][-+]?[0-9]+)?$' then 'numeric' | |
when elem ~ '^[0-9]{4}-[0-9]{1,2}-[0-9]{1,2} [0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}(\.[0-9]*)?$' or | |
elem ~ '^[0-9]{4}/[0-9]{1,2}/[0-9]{1,2} [0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}(\.[0-9]*)?$' then 'timestamp' | |
when elem ~ '^[0-9]{1,2}:[0-9]{1,2}(:[0-9]{1,2}(\.[0-9]+)?)?$' then 'time' | |
when elem ~ '^[0-9]{4}/[0-9]{1,2}/[0-9]{1,2}$' or elem ~ '^[0-9]{4}-[0-9]{1,2}-[0-9]{1,2}$' then 'date' | |
else 'varchar' end as dtype | |
from | |
elems | |
) | |
, results_agg as ( | |
select | |
tt2.en, | |
tt2.elem, | |
count(*), | |
array_agg(tt.dtype order by tt.rn) as dtypes, | |
array_agg(tt.elem order by tt.rn) tmp | |
from | |
type_tests tt | |
join type_tests tt2 on (tt.en = tt2.en and tt2.rn=1 and tt.rn <> 1) | |
group by | |
tt2.en, | |
tt2.elem | |
order by | |
tt2.en | |
) | |
, type_determination as ( | |
select | |
*, | |
case | |
when dtypes @> array['integer'] and not(dtypes && array['numeric','date','time','timestamp','varchar']) then 'integer' | |
when dtypes @> array['timestamp'] and not(dtypes && array['numeric','time','varchar']) then 'timestamp' | |
when dtypes @> array['date'] and not(dtypes && array['numeric','time','varchar']) then 'date' | |
when dtypes @> array['time'] and not(dtypes && array['numeric','date','varchar','timestamp']) then 'time' | |
when dtypes @> array['numeric'] and not(dtypes && array['date','time','timestamp','varchar']) then 'numeric' | |
else 'varchar' end as result_type | |
from | |
results_agg | |
) | |
select | |
E'create table (\n' || array_to_string(array_agg(E'\t' || replace(lower(elem),' ','_') || | |
' ' || result_type order by en),E',\n') || E'\n);' as tbl_definition | |
from | |
type_determination | |
) a; | |
return outsql; | |
end; | |
$$ LANGUAGE plpgsql | |
; | |
select gtd('a a;b;c c c;d;e;f;g;h;i i;j;k | |
2012-01-01 13:04:04.321;test;2012-01-01;2012/01/01;abcd;321;-324;.1;-5.5;0;14:54 | |
2012/01/01 13:04:04.321;test;2012-01-01;2012/01/01;abcd;321;-324;.1;-5;0;14:54 | |
2012-01-01 13:04:04.321;test;2012-01-01;2012/01/01;.1;321;-324;.1;-5.5;0;14:54 | |
2012-01-01 13:04:04.321;test;2012-01-01;2012/01/01;abcd;321;-324;.1;-5.5;0;14:54 | |
2012-01-01 13:04:04.321;;2012-01-01;2012/01/01;1;321;-324;.1;-5.5;0;14:54'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment