Skip to content

Instantly share code, notes, and snippets.

@eulerfx
Last active August 29, 2015 13:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eulerfx/10424088 to your computer and use it in GitHub Desktop.
Save eulerfx/10424088 to your computer and use it in GitHub Desktop.
F# CSV parsing and validation
type MerchantSkuImported with
static member Zero = { MerchantSkuImported.id = null ; merchant_id = null ; merchant_sku = null ; upc = null ; manufacturer = null ; brand = null ; part_no = null ; title = null ; description = null ; bullets = List.empty ; category = 0 ; category_path = List.empty ; multi_pack_quantity = None ; shipping_weight = None ; package_length = None ; package_width = None ; package_height = None ; main_image_url = None ; other_image_url = None ; price = None ; quantity = None ; start_selling_date = None }
static member MerchantSku = (fun (x:MerchantSkuImported) -> x.merchant_sku) |> Lens.create <| fun v x -> { x with merchant_sku = v }
static member UPC = (fun (x:MerchantSkuImported) -> x.upc) |> Lens.create <| fun v x -> { x with upc = v }
static member Title = (fun (x:MerchantSkuImported) -> x.title) |> Lens.create <| fun v x -> { x with title = v }
static member Brand = (fun (x:MerchantSkuImported) -> x.brand) |> Lens.create <| fun v x -> { x with brand = v }
static member Manufacturer = (fun (x:MerchantSkuImported) -> x.manufacturer) |> Lens.create <| fun v x -> { x with manufacturer = v }
static member PartNo = (fun (x:MerchantSkuImported) -> x.part_no) |> Lens.create <| fun v x -> { x with part_no = v }
static member Description = (fun (x:MerchantSkuImported) -> x.description) |> Lens.create <| fun v x -> { x with description = v }
static member MultiPackQuantity = (fun (x:MerchantSkuImported) -> x.multi_pack_quantity) |> Lens.create <| fun v x -> { x with multi_pack_quantity = v }
static member Bullets = (fun (x:MerchantSkuImported) -> x.bullets) |> Lens.create <| fun v x -> { x with bullets = v }
static member Category = (fun (x:MerchantSkuImported) -> x.category_path) |> Lens.create <| fun v x -> { x with category_path = v }
static member ShippingWeight = (fun (x:MerchantSkuImported) -> x.shipping_weight) |> Lens.create <| fun v x -> { x with shipping_weight = v }
static member PackageLength = (fun (x:MerchantSkuImported) -> x.package_length) |> Lens.create <| fun v x -> { x with package_length = v }
static member PackageWidth = (fun (x:MerchantSkuImported) -> x.package_width) |> Lens.create <| fun v x -> { x with package_width = v }
static member PackageHeight = (fun (x:MerchantSkuImported) -> x.package_height) |> Lens.create <| fun v x -> { x with package_height = v }
static member MainImageUrl = (fun (x:MerchantSkuImported) -> x.main_image_url) |> Lens.create <| fun v x -> { x with main_image_url = v }
static member OtherImageUrl = (fun (x:MerchantSkuImported) -> x.other_image_url) |> Lens.create <| fun v x -> { x with other_image_url = v }
static member Price = (fun (x:MerchantSkuImported) -> x.price) |> Lens.create <| fun v x -> { x with price = v }
static member Quantity = (fun (x:MerchantSkuImported) -> x.quantity) |> Lens.create <| fun v x -> { x with quantity = v }
static member StartSellingDate = (fun (x:MerchantSkuImported) -> x.start_selling_date) |> Lens.create <| fun v x -> { x with start_selling_date = v }
// A set of mappings between column names and properties on a value. Properties are exposed as
// lenses allowing for composition, such as converting between int and string, etc.
let mappings = [
"UPC", MerchantSkuImported.UPC
"Merchant SKU", MerchantSkuImported.MerchantSku
"Manufacturer", MerchantSkuImported.Manufacturer
"Mfr Part Number", MerchantSkuImported.PartNo
"Brand", MerchantSkuImported.Brand
"Product Title", MerchantSkuImported.Title
"Product Description", MerchantSkuImported.Description
"Multi-Pack Quantity", MerchantSkuImported.MultiPackQuantity |> Lens.intToString
"Bullet 1", MerchantSkuImported.Bullets |> Lens.appendList
"Bullet 2", MerchantSkuImported.Bullets |> Lens.appendList
"Bullet 3", MerchantSkuImported.Bullets |> Lens.appendList
"Bullet 4", MerchantSkuImported.Bullets |> Lens.appendList
"Bullet 5", MerchantSkuImported.Bullets |> Lens.appendList
"Category", MerchantSkuImported.Category |> Lens.appendList
"Sub-Category Level 1", MerchantSkuImported.Category |> Lens.appendList
"Sub-Category Level 2", MerchantSkuImported.Category |> Lens.appendList
]
// Defines a validation schema which when applied to a value, either returns the value or a list of validation errors.
let validate (e:MerchantSkuImported) =
puree e
<* stringLengthMax 15 ["UPC"] e.upc
<* stringLengthMax 100 ["Brand"] e.brand
<* stringLengthMax 100 ["Manufacturer"] e.brand
<* stringLengthMax 100 ["Mfr Part Number"] e.part_no
<* stringLengthMax 500 ["Product Title"] e.title
<* stringLengthMax 4000 ["Description"] e.description
<* GE' 0 ["Multi-Pack Quantity"] e.multi_pack_quantity
<* validator (List.length >> (<=) 6) ["Bullets.Length"] e.bullets
try
let file = FSharp.Data.CsvFile.Load(__SOURCE_DIRECTORY__ + "../../scripts/sample_merchant_product_file.csv", ",", '"', true, true)
let parseRow = Csv.parseRow (file.Headers) mappings MerchantSkuImported.Zero
// A list of either a parsed value or a list of validation errors.
let parsed = file.Rows |> Seq.mapi (fun i row -> parseRow i row |> validate)
parsed |> Seq.iter (function Choice1Of2 x -> printfn "Parsed=%A" x | Choice2Of2 errs -> printfn "Errors=%A" errs)
printfn "Done!"
with ex ->
printfn "Error=%O" ex
// A lens is an abstraction of a property with get/set allowings for composition.
type Lens<'a, 'b> = {
get: 'a -> 'b
set: 'b -> 'a -> 'a
}
with member l.update f a = let b = l.get a |> f in l.set b a
module Lens =
let inline get a (l:Lens<'a, 'b>) = l.get a
let inline set b a (l:Lens<'a, 'b>) = l.set b a
//let inline update f a (l:Lens<'a, 'b>) = let b = l.get a |> f in l.set b a
let inline create get set = { Lens.get = get ; set = set }
let inline iso f g (l:Lens<'a, 'b>) : Lens<'a, 'c> = { get = l.get >> f ; set = g >> l.set }
let inline (>>|) (l1:Lens<'a, 'b>) (l2:Lens<'b, 'c>) : Lens<'a, 'c> =
{ get = l1.get >> l2.get
set = l2.set >> l1.update }
let inline intToString (l:Lens<'a, int option>) : Lens<'a, string> = l |> iso string Int32.parse
let liftOpt (l:Lens<'a, 'b>) : Lens<'a, 'b option> =
{ get = l.get >> Some
set = fun v x -> match v with Some v -> l.set v x | None -> x }
let appendList (l:Lens<'a, 'b list>) : Lens<'a, 'b> =
{ get = l.get >> List.head
set = fun v -> l.update (fun xs -> v::xs) }
let appendListOpt (l:Lens<'a, 'b list>) : Lens<'a, 'b option> =
{ get = fun x -> let xs = l.get x in match xs with h::rest -> Some (h) | _ -> None
set = fun v x -> match v with Some v -> l.update (fun xs -> v::xs) x | None -> x }
#r "../packages/FSharp.Data.2.0.4/lib/net40/FSharp.Data.dll"
open FSharp.Data
/// A mapping between a column name and a lens.
type CsvMapping<'a> = string * Lens<'a, string>
module Csv =
// Given a set of headers, mappings and an initial value, will parse a CSV row into a value of the specified type.
let parseRow (headers:string[] option) (mappings:CsvMapping<'a> seq) (zero:'a) =
let matchedMaps =
let headers =
match headers with
| Some hs -> hs |> Seq.mapi (fun i h -> h.ToUpperInvariant(),i) |> dict
| None -> [] |> dict
mappings
|> Seq.map (fun (col,lens) -> Dict.tryGetValue (col.ToUpperInvariant()) headers,col,lens)
let parseRow i (row:CsvRow) =
printfn "parsing row=%i" i
matchedMaps
|> Seq.map (fun (idx,col,lens) ->
match idx with
| Some idx ->
printfn "parsing column='%s' at %i -> " col idx
let value = row.GetColumn(idx)
Some (lens.set value)
| None ->
printfn "missing column=%s" col
None
)
|> Seq.choose id
|> Seq.concatM Monoid.endo<'a>
<| zero
parseRow
/// A validation error.
type Error = string list
[<AutoOpen>]
module Validate =
/// Given a predicate and an error message, creates a validator.
let validator (predicate:'a -> bool) (err:'err) x =
if predicate x then Choice1Of2 x
else Choice2Of2 err
/// Given a value, creates a choice 1. (Applicative functor)
let puree = Choice1Of2
/// Given a function in a choice and a choice value x, applies the function to the value if available,
/// otherwise propagates the second choice. (Applicative functor)
let ap (f:Choice<'a -> 'b, Error>) (a:Choice<'a, Error>) : Choice<'b, Error> =
match f,a with
| Choice1Of2 f, Choice1Of2 a -> Choice1Of2 (f a)
| Choice2Of2 e, Choice1Of2 _ -> Choice2Of2 e
| Choice1Of2 _, Choice2Of2 e -> Choice2Of2 e
| Choice2Of2 e1, Choice2Of2 e2 -> Choice2Of2 (e1 @ e2)
let (<*>) = ap
/// Applies the function to the choice 1 value and returns the result as a choice 1, if matched,
/// otherwise returns the original choice 2 value. (Functor)
let map (f:'a -> 'b) (a:Choice<'a, 'err>) : Choice<'b, 'err> =
match a with
| Choice1Of2 a -> f a |> puree
| Choice2Of2 e -> Choice2Of2 e
let inline (<!>) f x = map f x
/// Lifts a two argument function to operate over the choice type.
let inline lift2 f a b = f <!> a <*> b
/// Composes two choice types, passing the case-1 type of the right value.
let inline ( *>) a b = lift2 (fun _ z -> z) a b
/// Composes two choice types, passing the case-2 type of the left value.
let inline ( <*) a b = lift2 (fun z _ -> z) a b
/// Kleisli composition.
let (>=>) f g a = f a <* g a
let inline sequence s =
let inline cons a b = lift2 (fun x xs -> x::xs) a b
List.foldBack cons s (puree [])
let seqValidator f =
let inline cons a b = lift2 (fun xs x -> x::xs) a b
Seq.map f >> Seq.fold cons (puree [])
let inline mapM f x = sequence (List.map f x)
///////////////////////////////////////////////////////////////////////////////////////
let private (==) = LanguagePrimitives.PhysicalEquality
let inline private (!=) a b = not (a == b)
let notNull e = validator ((!=) null) e
let notEmptyString e = validator (fun (s:string) -> s.Length > 0) e
let stringLengthMax max e = validator (fun (s:string) -> s = null || s.Length <= max) e
let liftV (p:'a -> bool) = function Some v -> p v | None -> true
let inline GE c = validator ((>=) c)
let inline GE' c = validator (((>=) c) |> liftV)
let inline GT c = validator ((>) c)
let inline GT' c = validator (((>) c) |> liftV)
let inline LT c = validator ((<) c)
let inline LT' c = validator (((<) c) |> liftV)
let inline LE c = validator ((<=) c)
let inline LE' c = validator (((<=) c) |> liftV)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment