Skip to content

Instantly share code, notes, and snippets.

@bryanhughes
Last active April 14, 2016 23:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bryanhughes/8fd49c3ef5f991975074cb65f4aac0e1 to your computer and use it in GitHub Desktop.
Save bryanhughes/8fd49c3ef5f991975074cb65f4aac0e1 to your computer and use it in GitHub Desktop.
Super simple parsing of CSV to Erlang terms
%% @author Bryan Hughes <bryan@go-factory.net>
%% @copyright 2016 GOFACTORY, Inc.
%%
%% Permission is hereby granted, free of charge, to any person obtaining a
%% copy of this software and associated documentation files (the "Software"),
%% to deal in the Software without restriction, including without limitation
%% the rights to use, copy, modify, merge, publish, distribute, sublicense,
%% and/or sell copies of the Software, and to permit persons to whom the
%% Software is furnished to do so, subject to the following conditions:
%%
%% The above copyright notice and this permission notice shall be included in
%% all copies or substantial portions of the Software.
%%
%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
%% THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
%% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
%% DEALINGS IN THE SOFTWARE.
-module(csv_parser).
-author("bryan@go-factory.net").
%% API
-export([parse/1]).
parse(String) ->
Lines = string:tokens(String, "\n"),
parse_line(Lines, []).
parse_line([], Acc) ->
{ok, lists:reverse(Acc)};
parse_line([[Char | _Rest] | Rest], Acc) when (Char == $%) ->
parse_line(Rest, Acc);
parse_line([Line0 | Rest], Acc) ->
Line1 = "[" ++ Line0 ++ "].",
case erl_scan:string(Line1) of
{ok, Tokens, _} ->
case erl_parse:parse_term(Tokens) of
{ok, Terms} ->
parse_line(Rest, [Terms | Acc]);
Reason0 ->
io:format(standard_error, "Failed to parse CSV terms. Reason=~p", [Reason0]),
{failed, <<"Failed to parse CSV terms">>}
end;
Reason ->
io:format(standard_error, "Failed to scan CSV values to tokens. Reason=~p", [Reason]),
{failed, <<"Failed to parse CSV values to tokens">>}
end.
%%
%% Tests
%%
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
%% WARNING:
%% This solution does not allow for empty cells denoted by `,,,`. The erlang parse will choke on these so
%% you will need use undefined.
simple_test() ->
% Supports embedding terms into a column
{ok, Results} = nucleus_csv:parse("1.0, \"Hello World\", \"This is, a test\", true, 200, [[\"Field 1\", true], [\"Field 2\", false]]"),
?assertEqual([[1.0, "Hello World","This is, a test", true, 200, [["Field 1", true], ["Field 2", false]]]], Results),
ok.
complex_test() ->
% Commented lines are ignored
String = "% name,channel_label,lat,lng,radius,description,system_messages,lease_time,life_time\n"
"\"Intersection #23\",\"Intersection\",37767037,-122388893,100,active,\"1725-1729 3rd Street, San Francisco\",false,0,0\n"
"\"Jobsite #23\",\"Jobsite\",37762330,-122386382,100,active,\"550 20th Street, San Francisco\",false,0,0\n"
"\"Bill and Linda\",\"Jobsite\",37758903,-122391199,100,active,\"895 Indiana Street, San Francisco\",false,0,0",
{ok, Results} = nucleus_csv:parse(String),
?assertEqual([["Intersection #23","Intersection",37767037,-122388893, 100, active, "1725-1729 3rd Street, San Francisco", false, 0, 0],
["Jobsite #23","Jobsite", 37762330, -122386382, 100, active, "550 20th Street, San Francisco", false, 0, 0],
["Bill and Linda","Jobsite", 37758903, -122391199, 100, active, "895 Indiana Street, San Francisco", false, 0, 0]], Results),
ok.
-endif.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment