Skip to content

Instantly share code, notes, and snippets.

@madidier
Last active October 22, 2016 11:55
Show Gist options
  • Save madidier/aae3b276e6044ce5eeef713dd101d0c7 to your computer and use it in GitHub Desktop.
Save madidier/aae3b276e6044ce5eeef713dd101d0c7 to your computer and use it in GitHub Desktop.
A general DSL for consuming structured tabular data (i.e. wide CSVs, XLSXs...)
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Tabulata
( -- * Types
Fields
, Range
-- * DSL primitives
, rawField
, validate
, validatePure
-- * DSL evaluation
, extractHeader
, parseRow
) where
import Control.Applicative.Free.Final (Ap, liftAp, runAp, runAp_)
import Control.Monad.Reader (ReaderT, ask, runReaderT)
import Control.Monad.State (StateT, evalStateT, get, put)
import Control.Monad.Trans (lift)
import Data.Function ((&))
import Data.Functor.Compose (Compose(..))
import Data.Semigroup (Semigroup)
import Data.Text (Text)
import Data.Validation (AccValidation(..))
-- See also: https://gist.github.com/madidier/05e780651ec38dd3c5632fcfc0a92990
-- | Represents a structured set of fields
--
-- The type arguments represent the following :
-- - m is the evaluation context in which the validation functions will run
-- (i.e., you can use that to give validation functions access to a database)
-- - e is the type of validation error summaries (they must be concatenable)
-- - n is the type of column names
-- - p is the type of row indexes
-- - c is the type of the cells' contents
-- - a is the type of the value obtained from a row or a substructure in a row
--
-- From these types, the interpreter will only know :
-- - How to stitch together computations in the m evaluation context
-- (it "asks" for a Monad instance).
-- - How to combine/concat error messages (it "asks" for a Semigroup instance)
type Fields m e n p c = Ap (Field m e n p c)
-- | Represents a range of cells
data Range p
= Range
{ rangeRow :: p
, rangeStart :: Int
, rangeEnd :: Int
} deriving (Show, Eq)
data Field m e n p c a where
GetRawField :: n -> Field m e n p c c
Validate :: Fields m e n p c (Range p -> m (AccValidation e a)) -> Field m e n p c a
-- | A simple, stringly type field with the given name
rawField :: n -> Fields m e n p c c
rawField name = liftAp (GetRawField name)
-- | Performs validation on a substructure
validate :: Fields m e n p c (Range p -> m (AccValidation e a)) -> Fields m e n p c a
validate fields = liftAp (Validate fields)
-- | Uses a pure validation function on a substructure
validatePure :: Monad m => Fields m e n p c (Range p -> AccValidation e a) -> Fields m e n p c a
validatePure fields =
validate $ (\res -> (\range -> return (res range))) <$> fields
-- | Retrieves the fields names from a definition
extractHeader :: Fields m e n p c a -> [n]
extractHeader = runAp_
(\case
GetRawField name -> [name]
Validate f -> extractHeader f
)
-- | Extracts data from a single row
parseRow :: (Monad m, Semigroup e)
=> Fields m e n p c a -- The DSL expression to run
-> p -- The current row's position
-> [c] -- The row's contents
-> AccValidation e c -- The value to use in case a row ended prematurely
-> m (AccValidation e a)
parseRow definition rowNo contents overflowValue
= parseRow' definition
& getCompose
& flip runReaderT (rowNo, overflowValue)
& flip evalStateT (contents, 0)
type FIELDS m e p c a
= Compose (ReaderT (p, AccValidation e c)
(StateT ([c], Int) m))
(AccValidation e) a
parseRow' :: (Monad m, Semigroup e) => Fields m e n p c a -> FIELDS m e p c a
parseRow' = runAp
(\case
GetRawField name -> Compose $ do
(xs, !n) <- get
(_, overflowValue) <- ask
case xs of
x : xs -> do { put (xs, n + 1); return (AccSuccess x) }
[] -> do { put ([], n + 1); return overflowValue }
Validate fields -> Compose $ do
(_, rangeStart) <- get
res <- getCompose (parseRow' fields)
(_, rangeEnd) <- get
(currentRow, _) <- ask
case res of
AccSuccess validate -> lift . lift . validate $ Range currentRow rangeStart rangeEnd
AccFailure err -> return (AccFailure err)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment