Skip to content

Instantly share code, notes, and snippets.

@erikvanoosten
Forked from maciej/Parboiled2CsvParser.scala
Created September 23, 2015 11:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erikvanoosten/f5b9d500871aea565445 to your computer and use it in GitHub Desktop.
Save erikvanoosten/f5b9d500871aea565445 to your computer and use it in GitHub Desktop.
Parboiled2 CSV parser
/* based on comments in https://github.com/sirthias/parboiled2/issues/61 */
case class Parboiled2CsvParser(input: ParserInput, delimeter: String) extends Parser {
def DQUOTE = '"'
def DELIMITER_TOKEN = rule(capture(delimeter))
def DQUOTE2 = rule("\"\"" ~ push("\""))
def CRLF = rule(capture("\n\r" | "\n"))
def NON_CAPTURING_CRLF = rule("\n\r" | "\n")
val delims = s"$delimeter\r\n" + DQUOTE
def TXT = rule(capture(!anyOf(delims) ~ ANY))
val WHITESPACE = CharPredicate(" \t")
def SPACES: Rule0 = rule(oneOrMore(WHITESPACE))
def escaped = rule(optional(SPACES) ~
DQUOTE ~ (zeroOrMore(DELIMITER_TOKEN | TXT | CRLF | DQUOTE2) ~ DQUOTE ~
optional(SPACES)) ~> (_.mkString("")))
def nonEscaped = rule(zeroOrMore(TXT | capture(DQUOTE)) ~> (_.mkString("")))
def field = rule(escaped | nonEscaped)
def row: Rule1[Seq[String]] = rule(oneOrMore(field).separatedBy(delimeter))
def file = rule(zeroOrMore(row).separatedBy(NON_CAPTURING_CRLF))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment