Created
March 14, 2016 04:06
-
-
Save guersam/a2b058ca36dc62247716 to your computer and use it in GitHub Desktop.
Naive, generic CSV reader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cats.data.Xor | |
import cats.syntax.xor._ | |
import elsresearch.common.util.Read | |
import scala.annotation.implicitNotFound | |
@implicitNotFound("Cannot find implicit ColumnReader for ${A}") | |
trait ColumnReader[A] { | |
def read(s: String): Throwable Xor A | |
} | |
object ColumnReader extends LowPriorityColumnReader { | |
def apply[A: ColumnReader]: ColumnReader[A] = implicitly | |
def instance[A](f: String => A): ColumnReader[A] = | |
new ColumnReader[A] { | |
def read(s: String): Throwable Xor A = Xor catchNonFatal f(s) | |
} | |
implicit def optionReader[A: ColumnReader]: ColumnReader[Option[A]] = | |
new ColumnReader[Option[A]] { | |
def read(str: String): Throwable Xor Option[A] = | |
str.trim match { | |
case "" => None.right | |
case s => implicitly[ColumnReader[A]].read(s).map(Some(_)) | |
} | |
} | |
} | |
trait LowPriorityColumnReader { | |
implicit def readReader[A: Read]: ColumnReader[A] = | |
new ColumnReader[A] { | |
def read(str: String): Throwable Xor A = | |
implicitly[Read[A]].fromString(str) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cats.data.Xor | |
import cats.syntax.xor._ | |
import shapeless.labelled.{field, FieldType} | |
import shapeless.ops.hlist.Length | |
import shapeless.ops.nat.ToInt | |
import shapeless.{:: => #::, _} | |
import scala.annotation.implicitNotFound | |
sealed class CsvReadError(message: String) extends RuntimeException(message) | |
case class NumberOfColumnsMismatch(expected: Int, actual: Int) extends CsvReadError( | |
s"There is a mismatch between the numbers of columns, expected: $expected, actual: $actual" | |
) | |
case class ColumnNameMismatch(expected: String, actual: String) extends CsvReadError( | |
s"There is a mismatch between the column names, expected: $expected, actual: $actual" | |
) | |
case class LineParseFailure(idx: Int, cause: Throwable) extends CsvReadError( | |
s"Failed to parse line at column $idx: $cause" | |
) | |
@implicitNotFound("Cannot find implicit CsvReader for ${A}") | |
trait CsvReader[A] { | |
def checkHeader(s: List[String]): Throwable Xor Unit = checkHeaderWithIdx(s, 0) | |
def readCsv(s: List[String]): Throwable Xor A = readRowWithIdx(s, 0) | |
protected def readRowWithIdx(s: List[String], idx: Int): Throwable Xor A | |
protected def checkHeaderWithIdx(s: List[String], idx: Int): Throwable Xor Unit | |
} | |
object CsvReader { | |
// TODO cachedImplicit | |
def apply[A: CsvReader]: CsvReader[A] = implicitly | |
implicit def hnilInstance: CsvReader[HNil] = | |
new CsvReader[HNil] { | |
def checkHeaderWithIdx(s: List[String], idx: Int): Throwable Xor Unit = | |
s match { | |
case Nil => ().right | |
case remains => | |
LineParseFailure( | |
idx, | |
NumberOfColumnsMismatch(idx, idx + remains.length) | |
).left | |
} | |
def readRowWithIdx(s: List[String], idx: Int): Throwable Xor HNil = | |
s match { | |
case Nil => HNil.right | |
case remains => | |
LineParseFailure( | |
idx, | |
NumberOfColumnsMismatch(idx, idx + remains.length) | |
).left | |
} | |
} | |
implicit def hconsInstance[K <: Symbol, V, L <: Nat, T <: HList] | |
(implicit | |
keyWitness: Witness.Aux[K], | |
hdReader: ColumnReader[V], | |
tlReader: CsvReader[T], | |
len: Length.Aux[T, L], | |
lenToInt: ToInt[L] | |
): CsvReader[FieldType[K, V] #:: T] = | |
new CsvReader[FieldType[K, V] #:: T] { | |
private val expectedKey = keyWitness.value.name.toLowerCase | |
def checkHeaderWithIdx(s: List[String], idx: Int): Throwable Xor Unit = | |
s match { | |
case Nil => NumberOfColumnsMismatch(idx + lenToInt.apply, idx).left | |
case hd :: tl => | |
if (hd.replaceAll("_", "").toLowerCase == expectedKey) | |
tlReader.checkHeaderWithIdx(tl, idx + 1) | |
else | |
ColumnNameMismatch(expectedKey, hd).left | |
} | |
def readRowWithIdx(s: List[String], idx: Int): Throwable Xor (FieldType[K, V] #:: T) = | |
s match { | |
case Nil => NumberOfColumnsMismatch(idx + lenToInt.apply, idx).left | |
case hd :: tl => | |
for { | |
h <- hdReader.read(hd).leftMap(LineParseFailure(idx, _)) | |
t <- tlReader.readRowWithIdx(tl, idx + 1) | |
} yield field[K](h) :: t | |
} | |
} | |
implicit def genericInstance[A, R <: HList, K <: HList, V <: HList] | |
(implicit | |
lgen: LabelledGeneric.Aux[A, R], | |
reprReader: CsvReader[R] | |
): CsvReader[A] = | |
new CsvReader[A] { | |
def checkHeaderWithIdx(s: List[String], idx: Int): Throwable Xor Unit = | |
reprReader.checkHeaderWithIdx(s, idx) | |
def readRowWithIdx(s: List[String], idx: Int): Throwable Xor A = { | |
reprReader.readRowWithIdx(s, idx) map lgen.from | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fastparse.all._ | |
import cats.data.Xor | |
import cats.syntax.xor._ | |
case class CsvParseFailure(msg: String) extends RuntimeException(msg) | |
class NaiveCsvParser(separator: Char) { | |
val White = CharIn(" ") | |
val Sep = P(CharIn(separator :: Nil)) | |
val column = P(White.rep ~ CharsWhile(_ != separator).?.!).map(_.trim) | |
val line: P[List[String]] = P(column.rep(sep = Sep).map(_.toList)) | |
def parseLine(str: String): Throwable Xor List[String] = | |
line.parse(str) match { | |
case Parsed.Success(s, _) => s.right | |
case f: Parsed.Failure => CsvParseFailure(f.msg).left | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment