Skip to content

Instantly share code, notes, and snippets.

@sfkeller
Last active April 6, 2021 07:43
Show Gist options
  • Save sfkeller/37c94d5b005ca42a531da4d9e877b957 to your computer and use it in GitHub Desktop.
Save sfkeller/37c94d5b005ca42a531da4d9e877b957 to your computer and use it in GitHub Desktop.
--
-- FUNCTION split_text_to_array(...)
-- Accepts a delimited text (string). Returns a sorted array of text.
--
--drop function if exists split_text_to_array(text, char(1));
create or replace function split_text_to_array(text_delimited text, delimiter text default ';')
returns text[]
as $$
select array(select regexp_split_to_table(lower(regexp_replace(text_delimited, '( )|'||delimiter||'$', '', 'g')), delimiter) order by 1);
$$ language sql immutable strict parallel safe;
-- Testing standalone with messy input data (mostly fom OpenStreetMap)
-- Search for 'overlap' of {italian,pizza} with values. Expecting all true escept of last 'italian_pizza'
-- The predicate && is usually used in WHERE clause.
with tbl (values_semicolon_delimited) as (
values
('pizza'),
('Pizza'),
('pizza '),
('pizza ;'),
('pizza;italian'),
('pizza; italian'),
('italian'),
('italian;pizza'),
('italian;Pizza'),
('italian;_pizzeria; swiss'),
('italian_pizza;pizza'),
('italian.pizza;pizza'),
('swiss;american_food;fast_food;pizza'),
('pizza;...'),
('italian_pizza')
)
select
'>'||values_semicolon_delimited||'<' as values_semicolon_delimited,
values_as_array,
values_as_array && '{italian,pizza}' as contains_italian_or_pizza
from (
select
values_semicolon_delimited,
-- Old solution
--(select array(select unnest(regexp_split_to_array(lower(regexp_replace(values_semicolon_delimited, '( )|;$', '', 'g')), ';')::text[]) order by 1)) as values_as_array
-- Better solution replacing unnest & regexp_split_to_array() with just regexp_split_to_table() Thx to david fetter!
--array(select regexp_split_to_table(lower(regexp_replace(values_semicolon_delimited, '( )|;$', '', 'g')), ';') order by 1) as values_as_array
split_text_to_array(values_semicolon_delimited) as values_as_array
from tbl
) as tbl_tmp;
--
-- Demo: List all OpenStreetMap objects with key cuisine tagged 'italian',
-- given table osm_point (used osm2pgsql with OSM country data from download.geofabrik.de).
--
select osm_id, name, cuisine_arr, st_astext(geom,7) as geom_wkt
from (
select *, split_text_to_array(tags->'cuisine') as cuisine_arr
from osm_point
where tags->'cuisine' is not null
) as tbl
where cuisine_arr && '{italian,pizza,italian_pizza}'
order by array_length(cuisine_arr,1) desc, cuisine_arr;
/*
osm_id | name | cuisine_arr | geom_wkt
------------+-------------------------------+------------------------------------+-----------------------------
4262942591 | Sunset Bar | {burger,coffee_shop,italian_pizza} | POINT(7.0758288 46.1046015)
5378024770 | Happy Chef | {burger,italian,kebab} | POINT(8.5351329 47.3679553)
6121323299 | Cakibi's | {burger,kebab,pizza} | POINT(8.8897094 47.5575453)
5591143159 | Linden Express | {burger,kebab,pizza} | POINT(7.5606657 47.5580886)
...
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment