Skip to content

Instantly share code, notes, and snippets.

@BrianHicks
Created August 25, 2018 15:18
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BrianHicks/165554b033eb797e3ed851964ecb3a38 to your computer and use it in GitHub Desktop.
Save BrianHicks/165554b033eb797e3ed851964ecb3a38 to your computer and use it in GitHub Desktop.
module CSV exposing
( CSV(..), parse
, Separators, defaultSeparators
, Plain
, firstRowAreNames
)
{-| Parse CSV data
# turning strings into CSVs
@docs CSV, parse
@docs Separators, defaultSeparators
# named fields
@docs Plain
@docs firstRowAreNames
-}
import Parser exposing ((|.), (|=), Parser)
type alias Row =
List String
type Plain
= Plain
type WithNamedFields
= WithNamedFields Row
| EmptyHeaders
type CSV a
= CSV a (List Row)
type alias Separators =
{ value : Char }
defaultSeparators : Separators
defaultSeparators =
{ value = ',' }
-- PARSING
parse : Separators -> String -> Result (List Parser.DeadEnd) (CSV Plain)
parse separators raw =
Parser.run (rows separators) raw
rows : Separators -> Parser (CSV Plain)
rows separators =
Parser.map (CSV Plain) (Parser.loop [] (rowsHelp separators))
rowsHelp : Separators -> List Row -> Parser (Parser.Step (List Row) (List Row))
rowsHelp separators revRows =
Parser.oneOf
[ Parser.end
|> Parser.map (\_ -> Parser.Done (List.reverse revRows))
, row separators
|> Parser.map (\newRow -> Parser.Loop (newRow :: revRows))
]
row : Separators -> Parser Row
row separators =
Parser.loop [] (rowHelp separators)
rowHelp : Separators -> Row -> Parser (Parser.Step Row Row)
rowHelp separators revVals =
let
doneWhen : Parser a -> Parser (Parser.Step Row Row)
doneWhen =
Parser.map (\_ -> Parser.Done (List.reverse revVals))
nextWhen : Parser String -> Parser (Parser.Step Row Row)
nextWhen =
Parser.map (\newVal -> Parser.Loop (newVal :: revVals))
in
Parser.oneOf
[ doneWhen Parser.end
, doneWhen (Parser.token "\n")
, Parser.token (String.fromChar separators.value) |> skipTo revVals
, nextWhen quotedValue
-- TODO: token for \r\n after updating elm-format. It automatically
-- formats to the wrong/old syntax for specifying codepoints in the
-- version I have installed ATM
, Parser.chompWhile (\c -> c /= '\n' && c /= separators.value)
|> Parser.getChompedString
|> nextWhen
]
quotedValue : Parser String
quotedValue =
Parser.succeed identity
|. Parser.token "\""
|= Parser.loop "" quotedValueHelp
|> Parser.andThen
(\final ->
case final of
Ok good ->
Parser.succeed good
Err err ->
Parser.problem err
)
quotedValueHelp : String -> Parser (Parser.Step String (Result String String))
quotedValueHelp soFar =
let
subAndLoop : String -> Parser a -> Parser (Parser.Step String b)
subAndLoop alt parser =
parser
|> Parser.map (\_ -> Parser.Loop (soFar ++ alt))
in
Parser.oneOf
[ Parser.end |> Parser.map (\_ -> Parser.Done (Err "I reached the end of the input while trying to parse a quoted string."))
, Parser.token "\"\"" |> subAndLoop "\""
, Parser.token "\\\"" |> subAndLoop "\""
, Parser.token "\\" |> skipTo soFar
, Parser.token "\""
|> Parser.map (\_ -> Parser.Done (Ok soFar))
, Parser.chompWhile (\c -> c /= '\\' && c /= '"')
|> Parser.getChompedString
|> Parser.map (\newPortion -> Parser.Loop (soFar ++ newPortion))
]
skipTo : b -> Parser a -> Parser (Parser.Step b c)
skipTo soFar =
Parser.map (\_ -> Parser.Loop soFar)
-- HEADERS
firstRowAreNames : CSV Plain -> CSV WithNamedFields
firstRowAreNames (CSV _ rowsAndHeader) =
case rowsAndHeader of
head :: body ->
CSV (WithNamedFields head) body
[] ->
CSV EmptyHeaders rowsAndHeader
@jwoLondon
Copy link

Apologies for the dumb question (I am new to elm/parser).

If a CSV row contains ,"Some quoted text", (i.e. quotation mark immediately following a separator), it will extract the text inside quotes as we would expect. But if it contains , "Some quoted text", (i.e. some whitespace between separator and first quote), then it will extract with escaped quotation marks. How do I adapt the parser to trim whitespace between the separator and the first non-whitespace character?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment