Skip to content

Instantly share code, notes, and snippets.

@carymrobbins
Last active October 21, 2021 23:18
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save carymrobbins/b8e7340ce63a22e8d33c to your computer and use it in GitHub Desktop.
Save carymrobbins/b8e7340ce63a22e8d33c to your computer and use it in GitHub Desktop.
Parse CSV strings with PostgreSQL
create or replace function parse_csv(s text, raise_on_error bool default true) returns text[]
immutable strict language plpgsql as $$
declare
result text[] = '{}';
len int = char_length(s);
i int = 1;
pos int;
start_pos int;
c char;
begin
if len = 0 then
return result;
end if;
while i <= len + 1 loop
-- If this element starts with a quote, consume until the next unescaped quote.
if substr(s, i) like '"%' then
i := i + 1;
start_pos := i;
while true loop
-- Find the next quote, it could be an escaped quote or the end of this element.
pos := position('"' in substr(s, i));
-- If we can't find another quote, the csv is malformed.
if pos = 0 then
if raise_on_error then
raise exception 'Unable to parse csv, expected string terminator: %', s;
else
return null;
end if;
end if;
i := i + pos;
c := substr(s, i, 1);
-- If next char is a " then we are just escaping a quote.
if c = '"' then
i := i + 1;
continue;
else
-- Otherwise, we'd expect a comma, terminating this field.
if c in (',', '') then
-- Append this element, unescaping any quotes.
result := array_append(result,
regexp_replace(substr(s, start_pos, i - start_pos - 1), '""', '"', 'g')
);
i := i + 1;
exit;
else
if raise_on_error then
raise exception 'Unable to parse csv, expected comma or EOF, got ''%'' instead: %', c, s;
else
return null;
end if;
end if;
end if;
end loop;
else
start_pos := i;
pos := position(',' in substr(s, start_pos));
if pos = 0 then
result := array_append(result, substr(s, start_pos));
exit;
else
i := start_pos + pos;
result := array_append(result, substr(s, start_pos, greatest(i - start_pos - 1, 0)));
end if;
end if;
end loop;
return result;
end
$$;
-- Poor man's test suite.
with cte(s, expected) as (values
('foo,bar,baz', array['foo','bar','baz']),
('"foo",bar,baz', array['foo','bar','baz']),
('foo,bar,', array['foo','bar','']),
('foo,"bar,baz",quux', array['foo','bar,baz','quux']),
('"bar,baz",,quux', array['bar,baz','','quux']),
('"","foo ""bar"" baz",quux,"""spam"""', array['', 'foo "bar" baz', 'quux', '"spam"']),
('"""foo"""', array['"foo"']),
('', array[]::text[]),
-- Malformed csv
('foo,"bar', null),
('"foo,bar,baz', null),
('"foo,bar","""baz quux""', null)
), parsed as (
select s, parse_csv(s, false) as actual, expected from cte
), test_suite as (
select s as test_case, actual, expected, actual is not distinct from expected as passed
from parsed
)
select test_case,
case when passed then 'pass' else 'FAIL' end as result,
case when passed then null else expected end as expected,
case when passed then null else actual end as actual
from test_suite;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment