Skip to content

Instantly share code, notes, and snippets.

@rtraschke
Last active August 29, 2015 14:15
Show Gist options
  • Save rtraschke/5de05df116007a55e0eb to your computer and use it in GitHub Desktop.
Save rtraschke/5de05df116007a55e0eb to your computer and use it in GitHub Desktop.
CSV parser in Lua

The make_csv_parser{ separator="char", delimiter="char", escape=true or "char", trim=true } function builds a CSV line parser. The make_csv_parser() function returns a new function that takes a string (e.g. one line from a CSV file) as an argument and returns an array of field values. The returned function splits the string into components according to the provided special CSV charaters. By default, the setting separator is a comma, the string delimiter is a double-quote, no escape character is used (i.e., string delimiters must be stuttered within a string) and extra spaces around the separators are not trimmed. Setting escape to true uses the backslash character as the escape. Setting trim to true enables the trimming of whitespace around the separator characters.

For example:

> parse = make_csv_parser()
> T = parse [[a b,"a,b"," a,""b""c", hello "world"!,]]
> for i, v in ipairs(T) do print(i, v) end
1	a b
2	a,b
3	 a,"b"c
4	 hello "world"!
5	
>
> parse = make_csv_parser{ separator = ";", delimiter = "'", escape = true, trim = true }
> T = parse [[a b;a\;b;'a;b';' a,\'b\'c'; hello 'world'!;]]
> for i, v in ipairs(T) do print(i, v) end
1	a b
2	a;b
3	a;b
4	 a,'b'c
5	hello 'world'!
6	
> 
function make_csv_parser(args)
args = args or {}
local separator = args.separator or ','
local delimiter = args.delimiter or '"'
local escape = args.escape or false
local trim = args.trim or false
if escape then
local e_escape
if escape == true then
escape = "\\"
e_escape = "\\"
elseif escape == "\\" then
e_escape = escape
elseif escape:find("%w") then
e_escape = escape
else
e_escape = "%" .. escape
end
return function (s)
s = (s or "") .. separator -- ending separator
local t = {} -- table to collect fields
local fieldstart = 1
repeat
if trim then
local s, e = s:find("^%s+", fieldstart)
if e then
fieldstart = e + 1
end
end
if s:find('^' .. delimiter, fieldstart) then
-- field is quoted (starts with delimiter)
local a, c
local i = fieldstart
repeat -- find closing delimiter
a, i, c = s:find('(' .. e_escape .. '?)' .. delimiter, i + 1)
until c ~= escape -- delimiter not preceeded by escape?
if not i then
return nil, 'unmatched delimiter (' .. delimiter .. ')'
end
local f = s:sub(fieldstart + 1, i - 1)
if trim then
t[#t + 1] = rtrim(f:gsub(e_escape .. '(.)', '%1'))
else
t[#t + 1] = (f:gsub(e_escape .. '(.)', '%1'))
end
fieldstart = s:find(separator, i) + 1
else
local a, c
local i = fieldstart - 1
repeat -- find next separator
a, i, c = s:find('(' .. e_escape .. '?)' .. separator, i + 1)
until c ~= escape -- separator not preceeded by escape?
if not i then
return nil, 'missing separator (' .. separator .. ')'
end
local f = s:sub(fieldstart, i - 1)
if trim then
t[#t + 1] = rtrim(f:gsub(e_escape .. '(.)', '%1'))
else
t[#t + 1] = (f:gsub(e_escape .. '(.)', '%1'))
end
fieldstart = i + 1
end
until fieldstart > #s
return t
end
else -- stutter
return function (s)
s = (s or "") .. separator -- ending separator
local t = {} -- table to collect fields
local fieldstart = 1
repeat
if trim then
local s, e = s:find("^%s+", fieldstart)
if e then
fieldstart = e + 1
end
end
if s:find('^' .. delimiter, fieldstart) then
-- field is quoted (starts with delimiter)
local a, c
local i = fieldstart
repeat -- find closing delimiter
a, i, c = s:find(delimiter .. '(' .. delimiter .. '?)', i + 1)
until c ~= delimiter -- delimiter not followed by delimiter?
if not i then
return nil, 'unmatched delimiter (' .. delimiter .. ')'
end
local f = s:sub(fieldstart + 1, i - 1)
if trim then
t[#t + 1] = rtrim(f:gsub(delimiter .. delimiter, delimiter))
else
t[#t + 1] = (f:gsub(delimiter .. delimiter, delimiter))
end
fieldstart = s:find(separator, i) + 1
else
local i = s:find(separator, fieldstart)
if trim then
t[#t + 1] = rtrim(s:sub(fieldstart, i - 1))
else
t[#t + 1] = s:sub(fieldstart, i - 1)
end
fieldstart = i + 1
end
until fieldstart > #s
return t
end
end
end
function rtrim(s)
s = s or ""
local first, last = s:find"%s+$"
if first then
return s:sub(1, first-1)
else
return s
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment