Skip to content

Instantly share code, notes, and snippets.

@kejadlen
Created December 24, 2010 20:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kejadlen/754490 to your computer and use it in GitHub Desktop.
Save kejadlen/754490 to your computer and use it in GitHub Desktop.
# An experiment with using the normal parser instead of the scanner.
class AlphaCSV
%%{
machine alphacsv;
textdata = 0x20..0x21 | 0x23..0x2b | 0x2d..0x7e;
cr = 0x0d;
lf = 0x0a;
comma = 0x2c;
dquote = 0x22;
crlf = cr? lf;
non_escaped = textdata* > { ts = fpc } % { current_line << data[ts...fpc].pack('c*') };
escaped = dquote (textdata | comma | cr | lf | dquote{2})* > { ts = fpc } dquote % { current_line << data[ts...fpc-1].pack('c*').sub('""', '"') };
field = escaped | non_escaped;
record = (field (comma field)*) % { csv << current_line; current_line = [] };
main := record (crlf record)* crlf?;
}%%
%% write data;
class << self
def parse data
data = data.unpack('c*')
csv = []
current_line = []
%% write init;
eof = pe
%% write exec;
csv.pop if csv.last == ['']
csv
end
end
end
# line 1 "alphacsv.rl"
# file = [header CRLF] record *(CRLF record) [CRLF]
# header = name *(COMMA name)
# record = field *(COMMA field)
# name = field
# field = (escaped / non-escaped)
# escaped = DQUOTE *(TEXTDATA / COMMA / CR / LF / 2DQUOTE) DQUOTE
# non-escaped = *TEXTDATA
# COMMA = %x2C
# CR = %x0D ;as per section 6.1 of RFC 2234 [2]
# DQUOTE = %x22 ;as per section 6.1 of RFC 2234 [2]
# LF = %x0A ;as per section 6.1 of RFC 2234 [2]
# CRLF = CR LF ;as per section 6.1 of RFC 2234 [2]
# TEXTDATA = %x20-21 / %x23-2B / %x2D-7E
class AlphaCSV
# line 35 "alphacsv.rl"
# line 24 "alphacsv.rb"
class << self
attr_accessor :_alphacsv_actions
private :_alphacsv_actions, :_alphacsv_actions=
end
self._alphacsv_actions = [
0, 1, 2, 1, 5, 1, 6, 1,
7, 1, 8, 1, 9, 2, 0, 1,
2, 3, 4
]
class << self
attr_accessor :_alphacsv_key_offsets
private :_alphacsv_key_offsets, :_alphacsv_key_offsets=
end
self._alphacsv_key_offsets = [
0, 0, 1, 6, 12, 18
]
class << self
attr_accessor :_alphacsv_trans_keys
private :_alphacsv_trans_keys, :_alphacsv_trans_keys=
end
self._alphacsv_trans_keys = [
10, 10, 13, 34, 32, 126, 10, 13,
34, 44, 32, 126, 32, 33, 35, 43,
45, 126, 34, 0
]
class << self
attr_accessor :_alphacsv_single_lengths
private :_alphacsv_single_lengths, :_alphacsv_single_lengths=
end
self._alphacsv_single_lengths = [
0, 1, 3, 4, 0, 1
]
class << self
attr_accessor :_alphacsv_range_lengths
private :_alphacsv_range_lengths, :_alphacsv_range_lengths=
end
self._alphacsv_range_lengths = [
0, 0, 1, 1, 3, 0
]
class << self
attr_accessor :_alphacsv_index_offsets
private :_alphacsv_index_offsets, :_alphacsv_index_offsets=
end
self._alphacsv_index_offsets = [
0, 0, 2, 7, 13, 17
]
class << self
attr_accessor :_alphacsv_trans_targs
private :_alphacsv_trans_targs, :_alphacsv_trans_targs=
end
self._alphacsv_trans_targs = [
3, 0, 2, 2, 5, 2, 3, 3,
1, 2, 3, 4, 0, 4, 4, 4,
3, 2, 3, 3, 3, 3, 0
]
class << self
attr_accessor :_alphacsv_trans_actions
private :_alphacsv_trans_actions, :_alphacsv_trans_actions=
end
self._alphacsv_trans_actions = [
5, 0, 0, 0, 16, 0, 11, 5,
0, 0, 3, 0, 0, 0, 0, 0,
9, 0, 7, 11, 9, 7, 0
]
class << self
attr_accessor :_alphacsv_to_state_actions
private :_alphacsv_to_state_actions, :_alphacsv_to_state_actions=
end
self._alphacsv_to_state_actions = [
0, 0, 0, 13, 0, 0
]
class << self
attr_accessor :_alphacsv_from_state_actions
private :_alphacsv_from_state_actions, :_alphacsv_from_state_actions=
end
self._alphacsv_from_state_actions = [
0, 0, 0, 1, 0, 0
]
class << self
attr_accessor :_alphacsv_eof_trans
private :_alphacsv_eof_trans, :_alphacsv_eof_trans=
end
self._alphacsv_eof_trans = [
0, 0, 20, 0, 21, 22
]
class << self
attr_accessor :alphacsv_start
end
self.alphacsv_start = 3;
class << self
attr_accessor :alphacsv_first_final
end
self.alphacsv_first_final = 3;
class << self
attr_accessor :alphacsv_error
end
self.alphacsv_error = 0;
class << self
attr_accessor :alphacsv_en_main
end
self.alphacsv_en_main = 3;
# line 38 "alphacsv.rl"
class << self
def parse data
data = data.unpack('c*')
csv = []
current_line = []
# line 150 "alphacsv.rb"
begin
p ||= 0
pe ||= data.length
cs = alphacsv_start
ts = nil
te = nil
act = 0
end
# line 47 "alphacsv.rl"
eof = pe
# line 165 "alphacsv.rb"
begin
_klen, _trans, _keys, _acts, _nacts = nil
_goto_level = 0
_resume = 10
_eof_trans = 15
_again = 20
_test_eof = 30
_out = 40
while true
_trigger_goto = false
if _goto_level <= 0
if p == pe
_goto_level = _test_eof
next
end
if cs == 0
_goto_level = _out
next
end
end
if _goto_level <= _resume
_acts = _alphacsv_from_state_actions[cs]
_nacts = _alphacsv_actions[_acts]
_acts += 1
while _nacts > 0
_nacts -= 1
_acts += 1
case _alphacsv_actions[_acts - 1]
when 2 then
# line 1 "NONE"
begin
ts = p
end
# line 199 "alphacsv.rb"
end # from state action switch
end
if _trigger_goto
next
end
_keys = _alphacsv_key_offsets[cs]
_trans = _alphacsv_index_offsets[cs]
_klen = _alphacsv_single_lengths[cs]
_break_match = false
begin
if _klen > 0
_lower = _keys
_upper = _keys + _klen - 1
loop do
break if _upper < _lower
_mid = _lower + ( (_upper - _lower) >> 1 )
if data[p] < _alphacsv_trans_keys[_mid]
_upper = _mid - 1
elsif data[p] > _alphacsv_trans_keys[_mid]
_lower = _mid + 1
else
_trans += (_mid - _keys)
_break_match = true
break
end
end # loop
break if _break_match
_keys += _klen
_trans += _klen
end
_klen = _alphacsv_range_lengths[cs]
if _klen > 0
_lower = _keys
_upper = _keys + (_klen << 1) - 2
loop do
break if _upper < _lower
_mid = _lower + (((_upper-_lower) >> 1) & ~1)
if data[p] < _alphacsv_trans_keys[_mid]
_upper = _mid - 2
elsif data[p] > _alphacsv_trans_keys[_mid+1]
_lower = _mid + 2
else
_trans += ((_mid - _keys) >> 1)
_break_match = true
break
end
end # loop
break if _break_match
_trans += _klen
end
end while false
end
if _goto_level <= _eof_trans
cs = _alphacsv_trans_targs[_trans]
if _alphacsv_trans_actions[_trans] != 0
_acts = _alphacsv_trans_actions[_trans]
_nacts = _alphacsv_actions[_acts]
_acts += 1
while _nacts > 0
_nacts -= 1
_acts += 1
case _alphacsv_actions[_acts - 1]
when 3 then
# line 1 "NONE"
begin
te = p+1
end
when 4 then
# line 30 "alphacsv.rl"
begin
act = 1; end
when 5 then
# line 32 "alphacsv.rl"
begin
te = p+1
end
when 6 then
# line 33 "alphacsv.rl"
begin
te = p+1
begin csv << current_line; current_line = [] end
end
when 7 then
# line 30 "alphacsv.rl"
begin
te = p
p = p - 1; begin current_line << data[ts+1...te-1].pack('c*').sub('""', '"') end
end
when 8 then
# line 31 "alphacsv.rl"
begin
te = p
p = p - 1; begin current_line << data[ts...te].pack('c*') end
end
when 9 then
# line 1 "NONE"
begin
case act
when 0 then
begin begin
cs = 0
_trigger_goto = true
_goto_level = _again
break
end
end
when 1 then
begin begin p = ((te))-1; end
current_line << data[ts+1...te-1].pack('c*').sub('""', '"') end
end
end
# line 314 "alphacsv.rb"
end # action switch
end
end
if _trigger_goto
next
end
end
if _goto_level <= _again
_acts = _alphacsv_to_state_actions[cs]
_nacts = _alphacsv_actions[_acts]
_acts += 1
while _nacts > 0
_nacts -= 1
_acts += 1
case _alphacsv_actions[_acts - 1]
when 0 then
# line 1 "NONE"
begin
ts = nil; end
when 1 then
# line 1 "NONE"
begin
act = 0
end
# line 339 "alphacsv.rb"
end # to state action switch
end
if _trigger_goto
next
end
if cs == 0
_goto_level = _out
next
end
p += 1
if p != pe
_goto_level = _resume
next
end
end
if _goto_level <= _test_eof
if p == eof
if _alphacsv_eof_trans[cs] > 0
_trans = _alphacsv_eof_trans[cs] - 1;
_goto_level = _eof_trans
next;
end
end
end
if _goto_level <= _out
break
end
end
end
# line 51 "alphacsv.rl"
csv << current_line unless current_line.empty?
csv
end
end
end
class AlphaCSV
%%{
machine alphacsv;
textdata = 0x20..0x21 | 0x23..0x2b | 0x2d..0x7e;
cr = 0x0d;
lf = 0x0a;
comma = 0x2c;
dquote = 0x22;
crlf = cr? lf;
non_escaped = textdata*;
escaped = dquote (textdata | comma | cr | lf | dquote{2})* dquote;
main := |*
escaped => { current_line << data[ts+1...te-1].pack('c*').sub('""', '"') };
non_escaped => { current_line << data[ts...te].pack('c*') };
comma;
crlf => { csv << current_line; current_line = [] };
*|;
}%%
%% write data;
class << self
def parse data
data = data.unpack('c*')
csv = []
current_line = []
%% write init;
eof = pe
%% write exec;
csv << current_line unless current_line.empty?
csv
end
end
end
require 'minitest/autorun'
require 'alphacsv'
class TestCSV < MiniTest::Unit::TestCase
def test_empty
assert_equal [], AlphaCSV.parse('')
end
def test_single_element
assert_equal [%w[ aaa ]], AlphaCSV.parse('aaa')
end
def test_multiple_elements
assert_equal [%w[ aaa bbb ]], AlphaCSV.parse('aaa,bbb')
end
def test_multiple_lines
assert_equal [%w[ aaa ], %w[ bbb ]], AlphaCSV.parse("aaa\r\nbbb")
end
def test_crlf
assert_equal [%w[ aaa ], %w[ bbb ]], AlphaCSV.parse("aaa\nbbb")
end
def test_rfc
assert_equal [%w[ aaa bbb ccc], %w[ zzz yyy xxx]], AlphaCSV.parse("aaa,bbb,ccc\r\nzzz,yyy,xxx\r\n")
assert_equal [%w[ aaa bbb ccc], %w[ zzz yyy xxx]], AlphaCSV.parse("aaa,bbb,ccc\r\nzzz,yyy,xxx")
assert_equal [%w[ aaa bbb ccc], %w[ zzz yyy xxx]], AlphaCSV.parse(%Q("aaa","bbb","ccc"\r\nzzz,yyy,xxx))
assert_equal [%W[ aaa b\r\nbb ccc], %w[ zzz yyy xxx]], AlphaCSV.parse(%Q("aaa","b\r\nbb","ccc"\r\nzzz,yyy,xxx))
assert_equal [%w[ aaa b"bb ccc], %w[ zzz yyy xxx]], AlphaCSV.parse(%Q("aaa","b""bb","ccc"\r\nzzz,yyy,xxx))
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment