Skip to content

Instantly share code, notes, and snippets.

@leeonix
Created June 12, 2014 08:49
Show Gist options
  • Save leeonix/3820ae3a43f74845e6f0 to your computer and use it in GitHub Desktop.
Save leeonix/3820ae3a43f74845e6f0 to your computer and use it in GitHub Desktop.
parse csv file use state machine
-- vi: syntax=lua ts=4 sw=4 et:
--
-- FILE: csv.lua
-- AUTHOR: LeeoNix
-- DESCRIPTION: parse csv file use state machine
-- NOTES: ---
--
local C_INV = 1 -- invalid characters
local C_COMMA = 2 -- ,
local C_DQUOT = 3 -- "
local C_LF = 4 -- \n
local C_CR = 5 -- \r
local C_ETC = 6 -- the rest
local C_MAX = 7
--
-- This array maps the first 96 ASCII characters into character classes
-- The remaining characters should be mapped to C_ETC
--
local ascii_class = {
C_INV, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
C_ETC, C_ETC, C_LF, C_ETC, C_ETC, C_CR, C_ETC, C_ETC,
C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
C_ETC, C_ETC, C_DQUOT, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
C_ETC, C_ETC, C_ETC, C_ETC, C_COMMA, C_ETC, C_ETC, C_ETC,
C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
};
local S_NLF = 1
local S_NCR = 2
local S_NCL = 3
local S_NST = 4
local S_STR = 5
local S_QST = 6
local S_QT1 = 7
local S_QT2 = 8
local S_MAX = 9
local S_ERR = S_MAX
local state_table = {
--[[ inv , " \n \r etc ]]
--[[ S_NLF ]] { S_ERR, S_NST, S_QT1, S_NLF, S_NCR, S_STR },
--[[ S_NCR ]] { S_ERR, S_NST, S_QT1, S_NCL, S_NCR, S_STR },
--[[ S_NCL ]] { S_ERR, S_NST, S_QT1, S_NLF, S_NCR, S_STR },
--[[ S_NST ]] { S_ERR, S_NST, S_QT1, S_NLF, S_NCR, S_STR },
--[[ S_STR ]] { S_ERR, S_NST, S_ERR, S_NLF, S_NCR, S_STR },
--[[ S_QST ]] { S_ERR, S_QST, S_QT2, S_QST, S_QST, S_QST },
--[[ S_QT1 ]] { S_ERR, S_QST, S_QT2, S_QST, S_QST, S_QST },
--[[ S_QT2 ]] { S_ERR, S_NST, S_QST, S_NLF, S_NCR, S_ERR }
};
local csv = {
state = S_NLF,
cache = '',
cache_row = nil,
result = {},
}
function csv:clear()
self.state = S_NLF
self.cache = ''
self.cache_row = nil
self.result = {}
end -- end function
function csv:add(str)
local row
local state = self.state
if state == S_STR or state == S_QST then
str = self.cache .. str
row = self.cache_row
else
row = {}
end -- end if
local first = 1
local last = 1
for i = 1, #str do
local c = str:byte(i)
local asc
if c < 64 then
asc = ascii_class[c + 1]
else
asc = C_ETC
end -- end if else
state = state_table[state][asc]
if state == S_STR or state == S_QST then
last = i
elseif state == S_NST then
table.insert(row, str:sub(first, last))
first = i + 1
elseif state == S_NLF or state == S_NCR then
table.insert(row, str:sub(first, last))
first = i + 1
table.insert(self.result, row)
row = {}
elseif state == S_QT1 then
first = first + 1
elseif state == S_ERR then
return false, "Parse Error"
end -- end if else
end -- end for
if state == S_STR or state == S_QST then
self.cache = str:sub(first)
self.cache_row = row
elseif #row ~= 0 then
table.insert(self.result, row)
end -- end if
self.state = state
return true
end -- end function
function csv:finish()
local state = self.state
if state < S_NLF and state > S_NST then
return nil, "Finish Error"
end -- end if
self.state = S_ERR;
return self.result
end -- end function
function csv:parse_string(s)
local last = s:byte(#s)
if last ~= 10 and last ~= 13 then
s = s .. '\n'
end -- end
local r, err = self:add(s)
if not r then
return r, err
end -- end if
return self:finish()
end -- end function
function csv:parse_file(name)
local f = assert(io.open(name, "r"))
local s = f:read('*a')
assert(io.close(f))
return self:parse_string(s)
end -- end function
function csv:dump(t, write, nl)
t = t or self.result
write = write or io.write
nl = nl or '\n'
for _, row in ipairs(t) do
write(table.concat(row, ','))
write(nl)
end -- end for
end -- end function
local r, err = csv:parse_file(arg[1])
csv:dump()
return csv
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment