Skip to content

Instantly share code, notes, and snippets.

@guersam
Created March 14, 2016 04:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guersam/a2b058ca36dc62247716 to your computer and use it in GitHub Desktop.
Save guersam/a2b058ca36dc62247716 to your computer and use it in GitHub Desktop.
Naive, generic CSV reader
import cats.data.Xor
import cats.syntax.xor._
import elsresearch.common.util.Read
import scala.annotation.implicitNotFound
@implicitNotFound("Cannot find implicit ColumnReader for ${A}")
trait ColumnReader[A] {
def read(s: String): Throwable Xor A
}
object ColumnReader extends LowPriorityColumnReader {
def apply[A: ColumnReader]: ColumnReader[A] = implicitly
def instance[A](f: String => A): ColumnReader[A] =
new ColumnReader[A] {
def read(s: String): Throwable Xor A = Xor catchNonFatal f(s)
}
implicit def optionReader[A: ColumnReader]: ColumnReader[Option[A]] =
new ColumnReader[Option[A]] {
def read(str: String): Throwable Xor Option[A] =
str.trim match {
case "" => None.right
case s => implicitly[ColumnReader[A]].read(s).map(Some(_))
}
}
}
trait LowPriorityColumnReader {
implicit def readReader[A: Read]: ColumnReader[A] =
new ColumnReader[A] {
def read(str: String): Throwable Xor A =
implicitly[Read[A]].fromString(str)
}
}
import cats.data.Xor
import cats.syntax.xor._
import shapeless.labelled.{field, FieldType}
import shapeless.ops.hlist.Length
import shapeless.ops.nat.ToInt
import shapeless.{:: => #::, _}
import scala.annotation.implicitNotFound
sealed class CsvReadError(message: String) extends RuntimeException(message)
case class NumberOfColumnsMismatch(expected: Int, actual: Int) extends CsvReadError(
s"There is a mismatch between the numbers of columns, expected: $expected, actual: $actual"
)
case class ColumnNameMismatch(expected: String, actual: String) extends CsvReadError(
s"There is a mismatch between the column names, expected: $expected, actual: $actual"
)
case class LineParseFailure(idx: Int, cause: Throwable) extends CsvReadError(
s"Failed to parse line at column $idx: $cause"
)
@implicitNotFound("Cannot find implicit CsvReader for ${A}")
trait CsvReader[A] {
def checkHeader(s: List[String]): Throwable Xor Unit = checkHeaderWithIdx(s, 0)
def readCsv(s: List[String]): Throwable Xor A = readRowWithIdx(s, 0)
protected def readRowWithIdx(s: List[String], idx: Int): Throwable Xor A
protected def checkHeaderWithIdx(s: List[String], idx: Int): Throwable Xor Unit
}
object CsvReader {
// TODO cachedImplicit
def apply[A: CsvReader]: CsvReader[A] = implicitly
implicit def hnilInstance: CsvReader[HNil] =
new CsvReader[HNil] {
def checkHeaderWithIdx(s: List[String], idx: Int): Throwable Xor Unit =
s match {
case Nil => ().right
case remains =>
LineParseFailure(
idx,
NumberOfColumnsMismatch(idx, idx + remains.length)
).left
}
def readRowWithIdx(s: List[String], idx: Int): Throwable Xor HNil =
s match {
case Nil => HNil.right
case remains =>
LineParseFailure(
idx,
NumberOfColumnsMismatch(idx, idx + remains.length)
).left
}
}
implicit def hconsInstance[K <: Symbol, V, L <: Nat, T <: HList]
(implicit
keyWitness: Witness.Aux[K],
hdReader: ColumnReader[V],
tlReader: CsvReader[T],
len: Length.Aux[T, L],
lenToInt: ToInt[L]
): CsvReader[FieldType[K, V] #:: T] =
new CsvReader[FieldType[K, V] #:: T] {
private val expectedKey = keyWitness.value.name.toLowerCase
def checkHeaderWithIdx(s: List[String], idx: Int): Throwable Xor Unit =
s match {
case Nil => NumberOfColumnsMismatch(idx + lenToInt.apply, idx).left
case hd :: tl =>
if (hd.replaceAll("_", "").toLowerCase == expectedKey)
tlReader.checkHeaderWithIdx(tl, idx + 1)
else
ColumnNameMismatch(expectedKey, hd).left
}
def readRowWithIdx(s: List[String], idx: Int): Throwable Xor (FieldType[K, V] #:: T) =
s match {
case Nil => NumberOfColumnsMismatch(idx + lenToInt.apply, idx).left
case hd :: tl =>
for {
h <- hdReader.read(hd).leftMap(LineParseFailure(idx, _))
t <- tlReader.readRowWithIdx(tl, idx + 1)
} yield field[K](h) :: t
}
}
implicit def genericInstance[A, R <: HList, K <: HList, V <: HList]
(implicit
lgen: LabelledGeneric.Aux[A, R],
reprReader: CsvReader[R]
): CsvReader[A] =
new CsvReader[A] {
def checkHeaderWithIdx(s: List[String], idx: Int): Throwable Xor Unit =
reprReader.checkHeaderWithIdx(s, idx)
def readRowWithIdx(s: List[String], idx: Int): Throwable Xor A = {
reprReader.readRowWithIdx(s, idx) map lgen.from
}
}
}
import fastparse.all._
import cats.data.Xor
import cats.syntax.xor._
case class CsvParseFailure(msg: String) extends RuntimeException(msg)
class NaiveCsvParser(separator: Char) {
val White = CharIn(" ")
val Sep = P(CharIn(separator :: Nil))
val column = P(White.rep ~ CharsWhile(_ != separator).?.!).map(_.trim)
val line: P[List[String]] = P(column.rep(sep = Sep).map(_.toList))
def parseLine(str: String): Throwable Xor List[String] =
line.parse(str) match {
case Parsed.Success(s, _) => s.right
case f: Parsed.Failure => CsvParseFailure(f.msg).left
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment