Skip to content

Instantly share code, notes, and snippets.

@varmas
Created November 15, 2019 15:00
Show Gist options
  • Save varmas/6bcad0dd14e3898128fcba2c914c757e to your computer and use it in GitHub Desktop.
Save varmas/6bcad0dd14e3898128fcba2c914c757e to your computer and use it in GitHub Desktop.
csv spec test
name := "csv-reader"
version := "0.1"
scalaVersion := "2.13.1"
libraryDependencies += "com.opencsv" % "opencsv" % "4.6"
libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.8" % "test"
import java.io.FileReader
import com.opencsv.enums.CSVReaderNullFieldIndicator
import com.opencsv.{CSVReader, CSVReaderBuilder}
import org.scalatest.{FlatSpec, Matchers}
import scala.collection.JavaConverters._
class CSVParserSpec extends FlatSpec with Matchers {
val path = "src/test/scala/test.csv"
behavior of "OpenCSV"
import com.opencsv.{RFC4180Parser, RFC4180ParserBuilder}
val rfc4180Parser: RFC4180Parser = new RFC4180ParserBuilder()
.withFieldAsNull(CSVReaderNullFieldIndicator.EMPTY_SEPARATORS)
.build
val csvReaderBuilder: CSVReaderBuilder = new CSVReaderBuilder(new FileReader(path))
val csvReader: CSVReader = csvReaderBuilder
.withCSVParser(rfc4180Parser)
.withKeepCarriageReturn(true)
.build()
val allOpenCsvRecords: List[Array[String]] = csvReader.readAll().asScala.toList
val headers :: items = allOpenCsvRecords
it should "parse empty null cells as null" in {
items.head should have length 5
items.head should be (Array[String](null, null, null, null, null))
}
it should "parse empty string cells as empty strings" in {
items(1) should have length 5
items(1) should be (List[String]("", "", "", "", ""))
}
it should "parse backslash string cells as backslash strings" in {
items(2) should have length 5
items(2) should be (List[String]("""A:\""", """B:\""", """C:\""", """D:\""", """E:\"""))
}
it should "parse carriage return string cells as carriage return string cells" in {
items(3) should have length 5
items(3) should be (List[String](
new String(Array[Char]('a', '\r')),
new String(Array[Char]('b', '\r')),
new String(Array[Char]('c', '\r')),
new String(Array[Char]('d', '\r')),
new String(Array[Char]('e', '\r'))
))
}
it should "parse quote containing string cells with quotes" in {
items(4) should have length 5
items(4) should be (List[String](
new String(Array[Char]('a', '"')),
new String(Array[Char]('b', '"')),
new String(Array[Char]('c', '"')),
new String(Array[Char]('d', '"')),
new String(Array[Char]('e', '"'))
))
}
}
We can make this file beautiful and searchable if this error is corrected: It looks like row 5 should actually have 5 columns, instead of 1. in line 4.
a,b,c,d,e
,,,,
"","","","",""
A:\,B:\,C:\,D:\,E:\
a
,b
,c
,d
,e
"a""","b""","c""","d""","e"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment