derrickturk/Tafra.idr

## Tafra.idr
-- dependently-typed dataframes from scratch

module Tafra

%access public export

%hide Nat -- we'll roll our own, this is Carl Sagan's apple pie

-- natural numbers a la Peano
data Nat : Type where
  Z : Nat
  S : Nat -> Nat

-- addition, by recursion on the left
plus : Nat -> Nat -> Nat
plus Z m = m
plus (S n) m = S (plus n m)

-- "finite sets" - naturals less than a given number
data Fin : Nat -> Type where
  FZ : Fin (S n) -- zero is less than any successor
  FS : Fin n -> Fin (S n) -- by induction

-- "vectors" - known-length homogeneous sequences
namespace Vect
  data Vect : Nat -> Type -> Type where
    Nil : Vect Z a
    (::) : a -> Vect n a -> Vect (S n) a

  -- bounds-checked element access
  index : Fin n -> Vect n a -> a
  index FZ (x :: _) = x
  index (FS i) (_ :: xs) = index i xs

  -- length-tracking concatenation
  (++) : Vect n a -> Vect m a -> Vect (plus n m) a
  [] ++ ys = ys
  (x :: xs) ++ ys = x :: xs ++ ys

  -- we can map over vectors, preserving type
  Functor (Vect n) where
    map f [] = []
    map f (x :: xs) = f x :: map f xs

  -- we'll want this later to implement "slicing"
  vslice : Vect k (Fin n) -> Vect n a -> Vect k a
  vslice [] _ = []
  vslice (i :: is) xs = index i xs :: vslice is xs

  -- we'll want this to walk two parallel vectors in "lock-step"
  zip : Vect n a -> Vect n b -> Vect n (a, b)
  zip [] [] = []
  zip (x::xs) (y::ys) = (x, y)::zip xs ys

-- the types of "row transformer" functions for a given Tafra
RowTransformer : Vect n Type -> Vect n Type -> Vect n Type
RowTransformer [] [] = []
RowTransformer (a :: as) (b :: bs) = (a -> b) :: RowTransformer as bs

-- heterogeneous vectors - we'll need these to represent "rows"
namespace HVect
  data HVect : Vect n Type -> Type where
    Nil : HVect []
    (::) : a -> HVect as -> HVect (a::as)

  -- bounds-checked element access, with appropriate return type
  index : {n : Nat} ->
          {as : Vect n Type} ->
          (i : Fin n) ->
          HVect as ->
          index i as
  index FZ [] impossible
  index (FS _) [] impossible
  index FZ (x :: _) = x
  index (FS i) (_ :: xs) = index i xs

  -- HVects can be compared for equality, when their components
  --   are comparable - we'll need this for "group by"

  Eq (HVect []) where
    [] == [] = True

  (Eq a, Eq (HVect as)) => Eq (HVect (a::as)) where
    (x :: xs) == (y :: ys) = x == y && xs == ys

  -- length- and type-tracking concatenation
  (++) : HVect as -> HVect bs -> HVect (as ++ bs)
  [] ++ ys = ys
  (x :: xs) ++ ys = x :: xs ++ ys

  -- first element only
  head : HVect (a::as) -> a
  head (x :: _) = x

  -- all but first element
  tail : HVect (a::as) -> HVect as
  tail (_ :: xs) = xs

  -- "ap"ply an HVect of functions
  ap : {as : Vect n Type} ->
       {bs : Vect n Type} ->
       HVect (RowTransformer as bs) ->
       HVect as ->
       HVect bs
  ap {as = []} {bs = []} [] [] = []
  ap {as = _ :: _} {bs = _ :: _} (f :: fs) (x :: xs) = f x :: ap fs xs

namespace Tafra
  -- a "tafra" is a list of same-length vectors of various types
  -- the type is indexed by the row count and a vector of column types
  data Tafra : (rows : Nat) -> Vect cols Type -> Type where
    Nil : Tafra rows []
    (::) : Vect rows a -> Tafra rows as -> Tafra rows (a::as)

  -- we can concatenate tafras with the same column types and different
  --   row counts "vertically"
  (++) : Tafra n as -> Tafra m as -> Tafra (plus n m) as
  [] ++ [] = []
  (topFirst :: topRest) ++ (botFirst :: botRest) =
    Vect.(++) topFirst botFirst :: topRest ++ botRest

  -- we can concatenate tafras with different column types but the same row
  --   counts "horizontally"
  hcat : Tafra n as -> Tafra n bs -> Tafra n (as ++ bs)
  hcat [] y = y
  hcat (x :: xs) y = x :: hcat xs y

  -- indexing by columns is simple, and returns a column vector
  index : {cols: Nat} ->
          {as : Vect cols Type} ->
          (i : Fin cols) ->
          Tafra rows as ->
          Vect rows (index i as)
  index FZ [] impossible
  index (FS _) [] impossible
  index FZ (x :: _) = x
  index (FS i) (_ :: xs) = index i xs

  -- indexing by rows is trickier - we have to return a heterogeneous "tuple"
  rindex : Fin rows -> Tafra rows as -> HVect as
  rindex _ [] = []
  rindex i (x :: xs) = index i x :: rindex i xs

  -- we can also "slice" by multiple row or column indices
  -- we'll do this a little inefficiently

  -- "column" slices
  slice : {cols : Nat} ->
          {as : Vect cols Type} ->
          (ixs : Vect k (Fin cols)) ->
          Tafra rows as ->
          Tafra rows (vslice ixs as)
  slice [] _ = []
  slice (i :: is) xs = index i xs :: slice is xs

  -- "row" slices
  rslice : {rows : Nat} ->
           (ixs : Vect k (Fin rows)) ->
           Tafra rows as ->
           Tafra k as
  rslice _ [] = []
  rslice [] (_ :: xs) = [] :: rslice [] xs
  rslice is (x :: xs) = vslice is x :: rslice is xs

  -- just the first row
  rhead : Tafra (S n) as -> HVect as
  rhead [] = []
  rhead ((x :: _) :: ys) = x :: rhead ys

  -- all rows but the first
  rtail : Tafra (S n) as -> Tafra n as
  rtail [] = []
  rtail ((_ :: xs) :: ys) = xs :: rtail ys

  -- combine the "head" and the "tail" (we'll need this to
  --   correctly type "group by"
  rcons : HVect as -> Tafra n as -> Tafra (S n) as
  rcons [] [] = []
  rcons (x :: xs) (r :: rs) = (x :: r) :: rcons xs rs

  -- transpose to a row-oriented representation
  transpose : {rows : Nat} -> Tafra rows as -> Vect rows (HVect as)
  transpose {rows = Z} _ = []
  transpose {rows = (S m)} xs = rhead xs :: transpose (rtail xs)

  -- rebuild a tafra from a transposed representation
  untranspose : {rows : Nat} ->
                {cols : Nat} ->
                {as : Vect cols Type} ->
                Vect rows (HVect as) ->
                Tafra rows as
  untranspose {rows = Z} {as = []} [] = []
  untranspose {rows = Z} {as = (t :: ts)} [] = [] :: untranspose []
  untranspose {rows = (S m)} {as = []} _ = []
  untranspose {rows = (S m)} {as = (t :: ts)} (xs :: yss) =
    (head xs :: map head yss) :: untranspose (tail xs :: map tail yss)

  -- a zero-row tafra of any type
  empty : Tafra Z as
  empty {as = []} = []
  empty {as = (_ :: _)} = [] :: empty

  -- "map" a set of row-transformer functions over a tafra
  rmap : HVect (RowTransformer as bs) -> Tafra n as -> Tafra n bs
  rmap fs = untranspose . map (ap fs) . transpose

-- let's create some example tafrae
someData : Tafra (S (S (S Z))) [Double, Bool, List String]
someData =
  [ [23.4, 45.6, 78.9]
  , [True, False, True]
  , [ ["yo", "dawg"]
    , ["i heard you", "like structures"]
    , ["so i put structures in your structures"]
    ]
  ]

otherData : Tafra (S (S Z)) [Int, String]
otherData =
  [ [99, 45]
  , ["some", "text"]
  ]

-- instead of maps, we'll use "association lists" for associative lookup
-- the prelude helpfully provides
-- lookup : Eq a => a -> List (a, b) -> Maybe b
-- we'll define a helpful type synonym

Map : Type -> Type -> Type
Map k v = List (k, v)

-- and some additional helpers
insert : Eq k => k -> v -> Map k v -> Map k v
insert k v [] = [(k, v)]
insert k v (x@(k', _) :: xs) =
  if k == k'
     then (k, v) :: xs
     else x :: insert k v xs

-- we're almost ready to write "group by"
-- first, let's prove we can collect rows by unique combination of group-by
--   column values

-- an "accumulating" function we'll use to collect rows by unique combination
accumRows : Eq k =>
            Map k (List v) ->
            Vect n (k, v) ->
            Map k (List v)
accumRows map [] = map
accumRows map ((b, g) :: rest) = case lookup b map of
  Nothing => accumRows (insert b [g] map) rest
  -- for performance, we're going to build these "backward"
  (Just xs) => accumRows (insert b (g::xs) map) rest

-- this is also the only example I have ever seen of explicitly passing
--   a "locally named" interface implementation
rowsByUnique : (is : Vect groups (Fin cols)) -> -- indices of group-by columns
               (js : Vect aggs (Fin cols)) -> -- indices of aggregated-over columns
               {auto prf: Eq (HVect (vslice is as))} ->
               Tafra rows as ->
               Map (HVect (vslice is as)) (List (HVect (vslice js as)))
rowsByUnique {prf} is js xs =
  accumRows @{prf} [] (zip (transpose (slice is xs)) (transpose (slice js xs)))

-- ok, let's type group by
groupBy : {cols : Nat} ->
          {rows : Nat} ->
          {as : Vect cols Type} ->
          {groups : Nat} ->
          {aggs : Nat} ->
          -- indices of group-by columns
          (is : Vect groups (Fin cols)) ->
          -- indices of aggregated-over columns
          (js : Vect aggs (Fin cols)) ->
          -- aggregation function
          (aggFn : List (HVect (vslice js as)) -> HVect bs) ->
          -- a proof that group-by columns can be compared for equality
          --   (automatically found by compiler)
          {auto prf: Eq (HVect (vslice is as))} ->
          -- data frame to group-by
          Tafra rows as ->
          -- result type: an existentially quantified tafra of some
          --   row count, with columns from the group-by columns and
          --   the result of the aggregation function
          (k : Nat ** Tafra k (vslice is as ++ bs))
groupBy is js fn xs = applyAggregation fn (rowsByUnique is js xs) where
  applyAggregation : (List (HVect gs) -> HVect rs) ->
                     Map (HVect as) (List (HVect gs)) ->
                     (k : Nat ** Tafra k (as ++ rs))
  applyAggregation f [] = (Z ** empty)
  applyAggregation f ((k, rs) :: rest) with (applyAggregation f rest)
    applyAggregation f ((k, rs) :: _) | (n ** restAgg) =
      (S n ** rcons (k ++ f rs) restAgg)

-- select
--   col2,
--   join('\n', str(col1) & join(' ', col3))
-- from someData
-- group by col2
exampleGroupBy : (k : Nat ** Tafra k [Bool, String])
exampleGroupBy = groupBy
  [FS FZ]
  [FZ, FS (FS FZ)]
  ((::[]) . unlines . map (\[num, strs] => unwords (show num :: strs)))
  someData

exampleMapped : Tafra (S (S (S Z))) [Double, Bool, String]
exampleMapped = rmap [(/ 3.0), not, unwords] someData

## TafraParser.idr
module TafraParser

import Tafra

%access public export

charDigit : Char -> Maybe Int
charDigit x = if isDigit x
                 then Just (ord x - ord '0')
                 else Nothing

interface ParseMaybe a where
  parseMaybe : String -> Maybe a

ParseMaybe String where
  parseMaybe = Just

ParseMaybe Bool where
  parseMaybe "True" = Just True
  parseMaybe "False" = Just False
  parseMaybe _ = Nothing

ParseMaybe Int where
  parseMaybe str with (strM str)
    parseMaybe "" | StrNil = Nothing
    parseMaybe (strCons x xs) | (StrCons x xs) = do
      val <- charDigit x
      go val xs
    where
      go val str with (strM str)
        go val "" | StrNil = Just val
        go val (strCons y ys) | (StrCons y ys) = do
          next <- charDigit y
          go (val * 10 + next) ys

ParseMaybe Integer where
  parseMaybe str with (strM str)
    parseMaybe "" | StrNil = Nothing
    parseMaybe (strCons x xs) | (StrCons x xs) = do
      val <- cast <$> charDigit x
      go val xs
    where
      go : Integer -> String -> Maybe Integer
      go val str with (strM str)
        go val "" | StrNil = Just val
        go val (strCons y ys) | (StrCons y ys) = do
          next <- cast <$> charDigit y
          go (val * 10 + next) ys

ParseMaybe Double where
  parseMaybe str with (strM str)
    parseMaybe "" | StrNil = Nothing
    parseMaybe (strCons x xs) | (StrCons x xs) = do
      val <- cast <$> charDigit x
      go False val 0 xs
    where
      go : Bool -> Integer -> Integer -> String -> Maybe Double
      go afterDecimal whole frac str with (strM str)
        -- this is a little lazy...
        go afterDecimal whole frac "" | StrNil =
          Just (cast (show whole ++ "." ++ show frac))
        go afterDecimal whole frac (strCons y ys) | (StrCons y ys) =
          if y == '.'
             then go True whole frac ys
             else do
               next <- cast <$> charDigit y
               if afterDecimal
                  then go afterDecimal whole (frac * 10 + next) ys
                  else go afterDecimal (whole * 10 + next) frac ys

ParseMaybe (HVect []) where
  parseMaybe "" = Just []
  parseMaybe _ = Nothing

(ParseMaybe a, ParseMaybe (HVect as)) => ParseMaybe (HVect (a::as)) where
  parseMaybe xs with (split (== ',') xs)
    parseMaybe xs | [] = Nothing
    parseMaybe xs | (y :: ys) = do
      first <- parseMaybe (trim y)
      rest <- parseMaybe (concat (intersperse "," ys))
      pure (first::rest)

listToVect : List a -> (n : Nat ** Vect n a)
listToVect [] = (Z ** [])
listToVect (x :: xs) with (listToVect xs)
  listToVect (x :: xs) | (n ** v) = (S n ** x :: v)

-- skip header and parse each row
parseCSV' : ParseMaybe (HVect as) => List String -> Maybe (n : Nat ** Tafra n as)
parseCSV' @{c} csvLines = case nonEmpty csvLines of
  (Yes prf) => do
    (n ** rows) <- listToVect <$> traverse (parseMaybe @{c}) (tail csvLines)
    pure (n ** untranspose rows)
  (No _) => Nothing

parseCSV : ParseMaybe (HVect as) => String -> Maybe (n : Nat ** Tafra n as)
parseCSV = parseCSV' . lines

exampleCSV : String
exampleCSV = "age,name\n123,bobby\n456,sally\n"

exampleFrame : Maybe (n : Nat ** Tafra n [Int, String])
exampleFrame = parseCSV exampleCSV
	-- dependently-typed dataframes from scratch

	module Tafra

	%access public export

	%hide Nat -- we'll roll our own, this is Carl Sagan's apple pie

	-- natural numbers a la Peano
	data Nat : Type where
	Z : Nat
	S : Nat -> Nat

	-- addition, by recursion on the left
	plus : Nat -> Nat -> Nat
	plus Z m = m
	plus (S n) m = S (plus n m)

	-- "finite sets" - naturals less than a given number
	data Fin : Nat -> Type where
	FZ : Fin (S n) -- zero is less than any successor
	FS : Fin n -> Fin (S n) -- by induction

	-- "vectors" - known-length homogeneous sequences
	namespace Vect
	data Vect : Nat -> Type -> Type where
	Nil : Vect Z a
	(::) : a -> Vect n a -> Vect (S n) a

	-- bounds-checked element access
	index : Fin n -> Vect n a -> a
	index FZ (x :: _) = x
	index (FS i) (_ :: xs) = index i xs

	-- length-tracking concatenation
	(++) : Vect n a -> Vect m a -> Vect (plus n m) a
	[] ++ ys = ys
	(x :: xs) ++ ys = x :: xs ++ ys

	-- we can map over vectors, preserving type
	Functor (Vect n) where
	map f [] = []
	map f (x :: xs) = f x :: map f xs

	-- we'll want this later to implement "slicing"
	vslice : Vect k (Fin n) -> Vect n a -> Vect k a
	vslice [] _ = []
	vslice (i :: is) xs = index i xs :: vslice is xs

	-- we'll want this to walk two parallel vectors in "lock-step"
	zip : Vect n a -> Vect n b -> Vect n (a, b)
	zip [] [] = []
	zip (x::xs) (y::ys) = (x, y)::zip xs ys

	-- the types of "row transformer" functions for a given Tafra
	RowTransformer : Vect n Type -> Vect n Type -> Vect n Type
	RowTransformer [] [] = []
	RowTransformer (a :: as) (b :: bs) = (a -> b) :: RowTransformer as bs

	-- heterogeneous vectors - we'll need these to represent "rows"
	namespace HVect
	data HVect : Vect n Type -> Type where
	Nil : HVect []
	(::) : a -> HVect as -> HVect (a::as)

	-- bounds-checked element access, with appropriate return type
	index : {n : Nat} ->
	{as : Vect n Type} ->
	(i : Fin n) ->
	HVect as ->
	index i as
	index FZ [] impossible
	index (FS _) [] impossible
	index FZ (x :: _) = x
	index (FS i) (_ :: xs) = index i xs

	-- HVects can be compared for equality, when their components
	-- are comparable - we'll need this for "group by"

	Eq (HVect []) where
	[] == [] = True

	(Eq a, Eq (HVect as)) => Eq (HVect (a::as)) where
	(x :: xs) == (y :: ys) = x == y && xs == ys

	-- length- and type-tracking concatenation
	(++) : HVect as -> HVect bs -> HVect (as ++ bs)
	[] ++ ys = ys
	(x :: xs) ++ ys = x :: xs ++ ys

	-- first element only
	head : HVect (a::as) -> a
	head (x :: _) = x

	-- all but first element
	tail : HVect (a::as) -> HVect as
	tail (_ :: xs) = xs

	-- "ap"ply an HVect of functions
	ap : {as : Vect n Type} ->
	{bs : Vect n Type} ->
	HVect (RowTransformer as bs) ->
	HVect as ->
	HVect bs
	ap {as = []} {bs = []} [] [] = []
	ap {as = _ :: _} {bs = _ :: _} (f :: fs) (x :: xs) = f x :: ap fs xs

	namespace Tafra
	-- a "tafra" is a list of same-length vectors of various types
	-- the type is indexed by the row count and a vector of column types
	data Tafra : (rows : Nat) -> Vect cols Type -> Type where
	Nil : Tafra rows []
	(::) : Vect rows a -> Tafra rows as -> Tafra rows (a::as)

	-- we can concatenate tafras with the same column types and different
	-- row counts "vertically"
	(++) : Tafra n as -> Tafra m as -> Tafra (plus n m) as
	[] ++ [] = []
	(topFirst :: topRest) ++ (botFirst :: botRest) =
	Vect.(++) topFirst botFirst :: topRest ++ botRest

	-- we can concatenate tafras with different column types but the same row
	-- counts "horizontally"
	hcat : Tafra n as -> Tafra n bs -> Tafra n (as ++ bs)
	hcat [] y = y
	hcat (x :: xs) y = x :: hcat xs y

	-- indexing by columns is simple, and returns a column vector
	index : {cols: Nat} ->
	{as : Vect cols Type} ->
	(i : Fin cols) ->
	Tafra rows as ->
	Vect rows (index i as)
	index FZ [] impossible
	index (FS _) [] impossible
	index FZ (x :: _) = x
	index (FS i) (_ :: xs) = index i xs

	-- indexing by rows is trickier - we have to return a heterogeneous "tuple"
	rindex : Fin rows -> Tafra rows as -> HVect as
	rindex _ [] = []
	rindex i (x :: xs) = index i x :: rindex i xs

	-- we can also "slice" by multiple row or column indices
	-- we'll do this a little inefficiently

	-- "column" slices
	slice : {cols : Nat} ->
	{as : Vect cols Type} ->
	(ixs : Vect k (Fin cols)) ->
	Tafra rows as ->
	Tafra rows (vslice ixs as)
	slice [] _ = []
	slice (i :: is) xs = index i xs :: slice is xs

	-- "row" slices
	rslice : {rows : Nat} ->
	(ixs : Vect k (Fin rows)) ->
	Tafra rows as ->
	Tafra k as
	rslice _ [] = []
	rslice [] (_ :: xs) = [] :: rslice [] xs
	rslice is (x :: xs) = vslice is x :: rslice is xs

	-- just the first row
	rhead : Tafra (S n) as -> HVect as
	rhead [] = []
	rhead ((x :: _) :: ys) = x :: rhead ys

	-- all rows but the first
	rtail : Tafra (S n) as -> Tafra n as
	rtail [] = []
	rtail ((_ :: xs) :: ys) = xs :: rtail ys

	-- combine the "head" and the "tail" (we'll need this to
	-- correctly type "group by"
	rcons : HVect as -> Tafra n as -> Tafra (S n) as
	rcons [] [] = []
	rcons (x :: xs) (r :: rs) = (x :: r) :: rcons xs rs

	-- transpose to a row-oriented representation
	transpose : {rows : Nat} -> Tafra rows as -> Vect rows (HVect as)
	transpose {rows = Z} _ = []
	transpose {rows = (S m)} xs = rhead xs :: transpose (rtail xs)

	-- rebuild a tafra from a transposed representation
	untranspose : {rows : Nat} ->
	{cols : Nat} ->
	{as : Vect cols Type} ->
	Vect rows (HVect as) ->
	Tafra rows as
	untranspose {rows = Z} {as = []} [] = []
	untranspose {rows = Z} {as = (t :: ts)} [] = [] :: untranspose []
	untranspose {rows = (S m)} {as = []} _ = []
	untranspose {rows = (S m)} {as = (t :: ts)} (xs :: yss) =
	(head xs :: map head yss) :: untranspose (tail xs :: map tail yss)

	-- a zero-row tafra of any type
	empty : Tafra Z as
	empty {as = []} = []
	empty {as = (_ :: _)} = [] :: empty

	-- "map" a set of row-transformer functions over a tafra
	rmap : HVect (RowTransformer as bs) -> Tafra n as -> Tafra n bs
	rmap fs = untranspose . map (ap fs) . transpose

	-- let's create some example tafrae
	someData : Tafra (S (S (S Z))) [Double, Bool, List String]
	someData =
	[ [23.4, 45.6, 78.9]
	, [True, False, True]
	, [ ["yo", "dawg"]
	, ["i heard you", "like structures"]
	, ["so i put structures in your structures"]
	]
	]

	otherData : Tafra (S (S Z)) [Int, String]
	otherData =
	[ [99, 45]
	, ["some", "text"]
	]

	-- instead of maps, we'll use "association lists" for associative lookup
	-- the prelude helpfully provides
	-- lookup : Eq a => a -> List (a, b) -> Maybe b
	-- we'll define a helpful type synonym

	Map : Type -> Type -> Type
	Map k v = List (k, v)

	-- and some additional helpers
	insert : Eq k => k -> v -> Map k v -> Map k v
	insert k v [] = [(k, v)]
	insert k v (x@(k', _) :: xs) =
	if k == k'
	then (k, v) :: xs
	else x :: insert k v xs

	-- we're almost ready to write "group by"
	-- first, let's prove we can collect rows by unique combination of group-by
	-- column values

	-- an "accumulating" function we'll use to collect rows by unique combination
	accumRows : Eq k =>
	Map k (List v) ->
	Vect n (k, v) ->
	Map k (List v)
	accumRows map [] = map
	accumRows map ((b, g) :: rest) = case lookup b map of
	Nothing => accumRows (insert b [g] map) rest
	-- for performance, we're going to build these "backward"
	(Just xs) => accumRows (insert b (g::xs) map) rest

	-- this is also the only example I have ever seen of explicitly passing
	-- a "locally named" interface implementation
	rowsByUnique : (is : Vect groups (Fin cols)) -> -- indices of group-by columns
	(js : Vect aggs (Fin cols)) -> -- indices of aggregated-over columns
	{auto prf: Eq (HVect (vslice is as))} ->
	Tafra rows as ->
	Map (HVect (vslice is as)) (List (HVect (vslice js as)))
	rowsByUnique {prf} is js xs =
	accumRows @{prf} [] (zip (transpose (slice is xs)) (transpose (slice js xs)))

	-- ok, let's type group by
	groupBy : {cols : Nat} ->
	{rows : Nat} ->
	{as : Vect cols Type} ->
	{groups : Nat} ->
	{aggs : Nat} ->
	-- indices of group-by columns
	(is : Vect groups (Fin cols)) ->
	-- indices of aggregated-over columns
	(js : Vect aggs (Fin cols)) ->
	-- aggregation function
	(aggFn : List (HVect (vslice js as)) -> HVect bs) ->
	-- a proof that group-by columns can be compared for equality
	-- (automatically found by compiler)
	{auto prf: Eq (HVect (vslice is as))} ->
	-- data frame to group-by
	Tafra rows as ->
	-- result type: an existentially quantified tafra of some
	-- row count, with columns from the group-by columns and
	-- the result of the aggregation function
	(k : Nat ** Tafra k (vslice is as ++ bs))
	groupBy is js fn xs = applyAggregation fn (rowsByUnique is js xs) where
	applyAggregation : (List (HVect gs) -> HVect rs) ->
	Map (HVect as) (List (HVect gs)) ->
	(k : Nat ** Tafra k (as ++ rs))
	applyAggregation f [] = (Z ** empty)
	applyAggregation f ((k, rs) :: rest) with (applyAggregation f rest)
	applyAggregation f ((k, rs) :: _) \| (n ** restAgg) =
	(S n ** rcons (k ++ f rs) restAgg)

	-- select
	-- col2,
	-- join('\n', str(col1) & join(' ', col3))
	-- from someData
	-- group by col2
	exampleGroupBy : (k : Nat ** Tafra k [Bool, String])
	exampleGroupBy = groupBy
	[FS FZ]
	[FZ, FS (FS FZ)]
	((::[]) . unlines . map (\[num, strs] => unwords (show num :: strs)))
	someData

	exampleMapped : Tafra (S (S (S Z))) [Double, Bool, String]
	exampleMapped = rmap [(/ 3.0), not, unwords] someData
	module TafraParser

	import Tafra

	%access public export

	charDigit : Char -> Maybe Int
	charDigit x = if isDigit x
	then Just (ord x - ord '0')
	else Nothing

	interface ParseMaybe a where
	parseMaybe : String -> Maybe a

	ParseMaybe String where
	parseMaybe = Just

	ParseMaybe Bool where
	parseMaybe "True" = Just True
	parseMaybe "False" = Just False
	parseMaybe _ = Nothing

	ParseMaybe Int where
	parseMaybe str with (strM str)
	parseMaybe "" \| StrNil = Nothing
	parseMaybe (strCons x xs) \| (StrCons x xs) = do
	val <- charDigit x
	go val xs
	where
	go val str with (strM str)
	go val "" \| StrNil = Just val
	go val (strCons y ys) \| (StrCons y ys) = do
	next <- charDigit y
	go (val * 10 + next) ys

	ParseMaybe Integer where
	parseMaybe str with (strM str)
	parseMaybe "" \| StrNil = Nothing
	parseMaybe (strCons x xs) \| (StrCons x xs) = do
	val <- cast <$> charDigit x
	go val xs
	where
	go : Integer -> String -> Maybe Integer
	go val str with (strM str)
	go val "" \| StrNil = Just val
	go val (strCons y ys) \| (StrCons y ys) = do
	next <- cast <$> charDigit y
	go (val * 10 + next) ys

	ParseMaybe Double where
	parseMaybe str with (strM str)
	parseMaybe "" \| StrNil = Nothing
	parseMaybe (strCons x xs) \| (StrCons x xs) = do
	val <- cast <$> charDigit x
	go False val 0 xs
	where
	go : Bool -> Integer -> Integer -> String -> Maybe Double
	go afterDecimal whole frac str with (strM str)
	-- this is a little lazy...
	go afterDecimal whole frac "" \| StrNil =
	Just (cast (show whole ++ "." ++ show frac))
	go afterDecimal whole frac (strCons y ys) \| (StrCons y ys) =
	if y == '.'
	then go True whole frac ys
	else do
	next <- cast <$> charDigit y
	if afterDecimal
	then go afterDecimal whole (frac * 10 + next) ys
	else go afterDecimal (whole * 10 + next) frac ys

	ParseMaybe (HVect []) where
	parseMaybe "" = Just []
	parseMaybe _ = Nothing

	(ParseMaybe a, ParseMaybe (HVect as)) => ParseMaybe (HVect (a::as)) where
	parseMaybe xs with (split (== ',') xs)
	parseMaybe xs \| [] = Nothing
	parseMaybe xs \| (y :: ys) = do
	first <- parseMaybe (trim y)
	rest <- parseMaybe (concat (intersperse "," ys))
	pure (first::rest)

	listToVect : List a -> (n : Nat ** Vect n a)
	listToVect [] = (Z ** [])
	listToVect (x :: xs) with (listToVect xs)
	listToVect (x :: xs) \| (n v) = (S n x :: v)

	-- skip header and parse each row
	parseCSV' : ParseMaybe (HVect as) => List String -> Maybe (n : Nat ** Tafra n as)
	parseCSV' @{c} csvLines = case nonEmpty csvLines of
	(Yes prf) => do
	(n ** rows) <- listToVect <$> traverse (parseMaybe @{c}) (tail csvLines)
	pure (n ** untranspose rows)
	(No _) => Nothing

	parseCSV : ParseMaybe (HVect as) => String -> Maybe (n : Nat ** Tafra n as)
	parseCSV = parseCSV' . lines

	exampleCSV : String
	exampleCSV = "age,name\n123,bobby\n456,sally\n"

	exampleFrame : Maybe (n : Nat ** Tafra n [Int, String])
	exampleFrame = parseCSV exampleCSV