Skip to content

Instantly share code, notes, and snippets.

@xtender
Created July 25, 2014 08:21
Show Gist options
  • Save xtender/129c0b9c1998841d2896 to your computer and use it in GitHub Desktop.
Save xtender/129c0b9c1998841d2896 to your computer and use it in GitHub Desktop.
some string splitting techniques
-- 1. xmltable-string-tokenize:
with v as (
select 1 id, 'Hello Brendan How are you today?' str from dual
union all
select 2 id, 'It is a hot and sunny day in Ireland.' from dual
union all
select 3 id, '$$$It is a hot and sunny day in #Ireland.' from dual
union all
select 4 id, '#####' from dual
)
select *
from v
,xmltable('ora:tokenize(.,$delim)[.]'
passing v.str, '\W+' as "delim"
columns
n for ordinality
,token varchar2(4000) path '.'
)(+)-- remove "(+)" - if you don't want to return strings without tokens.
/
-- 2. table/lateral + pivot_query(connect by/model, etc.)
with v as (
select 1 id, 'Hello Brendan How are you today?' str from dual
union all
select 2 id, 'It is a hot and sunny day in Ireland.' from dual
union all
select 3 id, '$$$It is a hot and sunny day in #Ireland.' from dual
union all
select 4 id, '#####' from dual
)
select v.id,v.str
,v_lateral.n
,regexp_substr(v.str,'\w+',1,v_lateral.n) token
from v
left join (select level n from dual connect by level<=2000) v_lateral
on v_lateral.n <= regexp_count(v.str,'\w+')
order by id,n
/
-- 3.1 model:
with v as (
select 1 id, 'Hello Brendan How are you today?' str from dual
union all
select 2 id, 'It is a hot and sunny day in Ireland.' from dual
union all
select 3 id, '$$$It is a hot and sunny day in #Ireland.' from dual
union all
select 4 id, '#####' from dual
)
select id,str,n,token
from v
model
partition by (id)
dimension by (1 n)
measures (str, cast(null as varchar2(4000)) token, nvl(regexp_count(str,'\w+'),0) tokens )
rules
(
token[for n from 1 to tokens[1] increment 1] = regexp_substr(str[1],'\w+',1,cv(n))
,str [for n from 1 to tokens[1] increment 1] = str[1]
)
order by id,n
/
-- 3.2 another model:
with v as (
select 1 id, 'Hello Brendan How are you today?' str from dual
union all
select 2 id, 'It is a hot and sunny day in Ireland.' from dual
union all
select 3 id, '$$$It is a hot and sunny day in #Ireland.' from dual
union all
select 4 id, '#####' from dual
)
select id,str,n,token
from v
model
partition by (id)
dimension by (1 n)
measures (str, cast(null as varchar2(4000)) token, nvl(regexp_count(str,'\w+'),0) tokens)
rules iterate(2000) until (iteration_number+1 >= tokens[1])
(
token[iteration_number+1] = regexp_substr(str[1],'\w+',1,cv(n))
,str [iteration_number+1] = str[1]
)
order by id,n
/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment