Last active
August 29, 2015 13:58
-
-
Save eulerfx/10424088 to your computer and use it in GitHub Desktop.
F# CSV parsing and validation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type MerchantSkuImported with | |
static member Zero = { MerchantSkuImported.id = null ; merchant_id = null ; merchant_sku = null ; upc = null ; manufacturer = null ; brand = null ; part_no = null ; title = null ; description = null ; bullets = List.empty ; category = 0 ; category_path = List.empty ; multi_pack_quantity = None ; shipping_weight = None ; package_length = None ; package_width = None ; package_height = None ; main_image_url = None ; other_image_url = None ; price = None ; quantity = None ; start_selling_date = None } | |
static member MerchantSku = (fun (x:MerchantSkuImported) -> x.merchant_sku) |> Lens.create <| fun v x -> { x with merchant_sku = v } | |
static member UPC = (fun (x:MerchantSkuImported) -> x.upc) |> Lens.create <| fun v x -> { x with upc = v } | |
static member Title = (fun (x:MerchantSkuImported) -> x.title) |> Lens.create <| fun v x -> { x with title = v } | |
static member Brand = (fun (x:MerchantSkuImported) -> x.brand) |> Lens.create <| fun v x -> { x with brand = v } | |
static member Manufacturer = (fun (x:MerchantSkuImported) -> x.manufacturer) |> Lens.create <| fun v x -> { x with manufacturer = v } | |
static member PartNo = (fun (x:MerchantSkuImported) -> x.part_no) |> Lens.create <| fun v x -> { x with part_no = v } | |
static member Description = (fun (x:MerchantSkuImported) -> x.description) |> Lens.create <| fun v x -> { x with description = v } | |
static member MultiPackQuantity = (fun (x:MerchantSkuImported) -> x.multi_pack_quantity) |> Lens.create <| fun v x -> { x with multi_pack_quantity = v } | |
static member Bullets = (fun (x:MerchantSkuImported) -> x.bullets) |> Lens.create <| fun v x -> { x with bullets = v } | |
static member Category = (fun (x:MerchantSkuImported) -> x.category_path) |> Lens.create <| fun v x -> { x with category_path = v } | |
static member ShippingWeight = (fun (x:MerchantSkuImported) -> x.shipping_weight) |> Lens.create <| fun v x -> { x with shipping_weight = v } | |
static member PackageLength = (fun (x:MerchantSkuImported) -> x.package_length) |> Lens.create <| fun v x -> { x with package_length = v } | |
static member PackageWidth = (fun (x:MerchantSkuImported) -> x.package_width) |> Lens.create <| fun v x -> { x with package_width = v } | |
static member PackageHeight = (fun (x:MerchantSkuImported) -> x.package_height) |> Lens.create <| fun v x -> { x with package_height = v } | |
static member MainImageUrl = (fun (x:MerchantSkuImported) -> x.main_image_url) |> Lens.create <| fun v x -> { x with main_image_url = v } | |
static member OtherImageUrl = (fun (x:MerchantSkuImported) -> x.other_image_url) |> Lens.create <| fun v x -> { x with other_image_url = v } | |
static member Price = (fun (x:MerchantSkuImported) -> x.price) |> Lens.create <| fun v x -> { x with price = v } | |
static member Quantity = (fun (x:MerchantSkuImported) -> x.quantity) |> Lens.create <| fun v x -> { x with quantity = v } | |
static member StartSellingDate = (fun (x:MerchantSkuImported) -> x.start_selling_date) |> Lens.create <| fun v x -> { x with start_selling_date = v } | |
// A set of mappings between column names and properties on a value. Properties are exposed as | |
// lenses allowing for composition, such as converting between int and string, etc. | |
let mappings = [ | |
"UPC", MerchantSkuImported.UPC | |
"Merchant SKU", MerchantSkuImported.MerchantSku | |
"Manufacturer", MerchantSkuImported.Manufacturer | |
"Mfr Part Number", MerchantSkuImported.PartNo | |
"Brand", MerchantSkuImported.Brand | |
"Product Title", MerchantSkuImported.Title | |
"Product Description", MerchantSkuImported.Description | |
"Multi-Pack Quantity", MerchantSkuImported.MultiPackQuantity |> Lens.intToString | |
"Bullet 1", MerchantSkuImported.Bullets |> Lens.appendList | |
"Bullet 2", MerchantSkuImported.Bullets |> Lens.appendList | |
"Bullet 3", MerchantSkuImported.Bullets |> Lens.appendList | |
"Bullet 4", MerchantSkuImported.Bullets |> Lens.appendList | |
"Bullet 5", MerchantSkuImported.Bullets |> Lens.appendList | |
"Category", MerchantSkuImported.Category |> Lens.appendList | |
"Sub-Category Level 1", MerchantSkuImported.Category |> Lens.appendList | |
"Sub-Category Level 2", MerchantSkuImported.Category |> Lens.appendList | |
] | |
// Defines a validation schema which when applied to a value, either returns the value or a list of validation errors. | |
let validate (e:MerchantSkuImported) = | |
puree e | |
<* stringLengthMax 15 ["UPC"] e.upc | |
<* stringLengthMax 100 ["Brand"] e.brand | |
<* stringLengthMax 100 ["Manufacturer"] e.brand | |
<* stringLengthMax 100 ["Mfr Part Number"] e.part_no | |
<* stringLengthMax 500 ["Product Title"] e.title | |
<* stringLengthMax 4000 ["Description"] e.description | |
<* GE' 0 ["Multi-Pack Quantity"] e.multi_pack_quantity | |
<* validator (List.length >> (<=) 6) ["Bullets.Length"] e.bullets | |
try | |
let file = FSharp.Data.CsvFile.Load(__SOURCE_DIRECTORY__ + "../../scripts/sample_merchant_product_file.csv", ",", '"', true, true) | |
let parseRow = Csv.parseRow (file.Headers) mappings MerchantSkuImported.Zero | |
// A list of either a parsed value or a list of validation errors. | |
let parsed = file.Rows |> Seq.mapi (fun i row -> parseRow i row |> validate) | |
parsed |> Seq.iter (function Choice1Of2 x -> printfn "Parsed=%A" x | Choice2Of2 errs -> printfn "Errors=%A" errs) | |
printfn "Done!" | |
with ex -> | |
printfn "Error=%O" ex |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// A lens is an abstraction of a property with get/set allowings for composition. | |
type Lens<'a, 'b> = { | |
get: 'a -> 'b | |
set: 'b -> 'a -> 'a | |
} | |
with member l.update f a = let b = l.get a |> f in l.set b a | |
module Lens = | |
let inline get a (l:Lens<'a, 'b>) = l.get a | |
let inline set b a (l:Lens<'a, 'b>) = l.set b a | |
//let inline update f a (l:Lens<'a, 'b>) = let b = l.get a |> f in l.set b a | |
let inline create get set = { Lens.get = get ; set = set } | |
let inline iso f g (l:Lens<'a, 'b>) : Lens<'a, 'c> = { get = l.get >> f ; set = g >> l.set } | |
let inline (>>|) (l1:Lens<'a, 'b>) (l2:Lens<'b, 'c>) : Lens<'a, 'c> = | |
{ get = l1.get >> l2.get | |
set = l2.set >> l1.update } | |
let inline intToString (l:Lens<'a, int option>) : Lens<'a, string> = l |> iso string Int32.parse | |
let liftOpt (l:Lens<'a, 'b>) : Lens<'a, 'b option> = | |
{ get = l.get >> Some | |
set = fun v x -> match v with Some v -> l.set v x | None -> x } | |
let appendList (l:Lens<'a, 'b list>) : Lens<'a, 'b> = | |
{ get = l.get >> List.head | |
set = fun v -> l.update (fun xs -> v::xs) } | |
let appendListOpt (l:Lens<'a, 'b list>) : Lens<'a, 'b option> = | |
{ get = fun x -> let xs = l.get x in match xs with h::rest -> Some (h) | _ -> None | |
set = fun v x -> match v with Some v -> l.update (fun xs -> v::xs) x | None -> x } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "../packages/FSharp.Data.2.0.4/lib/net40/FSharp.Data.dll" | |
open FSharp.Data | |
/// A mapping between a column name and a lens. | |
type CsvMapping<'a> = string * Lens<'a, string> | |
module Csv = | |
// Given a set of headers, mappings and an initial value, will parse a CSV row into a value of the specified type. | |
let parseRow (headers:string[] option) (mappings:CsvMapping<'a> seq) (zero:'a) = | |
let matchedMaps = | |
let headers = | |
match headers with | |
| Some hs -> hs |> Seq.mapi (fun i h -> h.ToUpperInvariant(),i) |> dict | |
| None -> [] |> dict | |
mappings | |
|> Seq.map (fun (col,lens) -> Dict.tryGetValue (col.ToUpperInvariant()) headers,col,lens) | |
let parseRow i (row:CsvRow) = | |
printfn "parsing row=%i" i | |
matchedMaps | |
|> Seq.map (fun (idx,col,lens) -> | |
match idx with | |
| Some idx -> | |
printfn "parsing column='%s' at %i -> " col idx | |
let value = row.GetColumn(idx) | |
Some (lens.set value) | |
| None -> | |
printfn "missing column=%s" col | |
None | |
) | |
|> Seq.choose id | |
|> Seq.concatM Monoid.endo<'a> | |
<| zero | |
parseRow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// A validation error. | |
type Error = string list | |
[<AutoOpen>] | |
module Validate = | |
/// Given a predicate and an error message, creates a validator. | |
let validator (predicate:'a -> bool) (err:'err) x = | |
if predicate x then Choice1Of2 x | |
else Choice2Of2 err | |
/// Given a value, creates a choice 1. (Applicative functor) | |
let puree = Choice1Of2 | |
/// Given a function in a choice and a choice value x, applies the function to the value if available, | |
/// otherwise propagates the second choice. (Applicative functor) | |
let ap (f:Choice<'a -> 'b, Error>) (a:Choice<'a, Error>) : Choice<'b, Error> = | |
match f,a with | |
| Choice1Of2 f, Choice1Of2 a -> Choice1Of2 (f a) | |
| Choice2Of2 e, Choice1Of2 _ -> Choice2Of2 e | |
| Choice1Of2 _, Choice2Of2 e -> Choice2Of2 e | |
| Choice2Of2 e1, Choice2Of2 e2 -> Choice2Of2 (e1 @ e2) | |
let (<*>) = ap | |
/// Applies the function to the choice 1 value and returns the result as a choice 1, if matched, | |
/// otherwise returns the original choice 2 value. (Functor) | |
let map (f:'a -> 'b) (a:Choice<'a, 'err>) : Choice<'b, 'err> = | |
match a with | |
| Choice1Of2 a -> f a |> puree | |
| Choice2Of2 e -> Choice2Of2 e | |
let inline (<!>) f x = map f x | |
/// Lifts a two argument function to operate over the choice type. | |
let inline lift2 f a b = f <!> a <*> b | |
/// Composes two choice types, passing the case-1 type of the right value. | |
let inline ( *>) a b = lift2 (fun _ z -> z) a b | |
/// Composes two choice types, passing the case-2 type of the left value. | |
let inline ( <*) a b = lift2 (fun z _ -> z) a b | |
/// Kleisli composition. | |
let (>=>) f g a = f a <* g a | |
let inline sequence s = | |
let inline cons a b = lift2 (fun x xs -> x::xs) a b | |
List.foldBack cons s (puree []) | |
let seqValidator f = | |
let inline cons a b = lift2 (fun xs x -> x::xs) a b | |
Seq.map f >> Seq.fold cons (puree []) | |
let inline mapM f x = sequence (List.map f x) | |
/////////////////////////////////////////////////////////////////////////////////////// | |
let private (==) = LanguagePrimitives.PhysicalEquality | |
let inline private (!=) a b = not (a == b) | |
let notNull e = validator ((!=) null) e | |
let notEmptyString e = validator (fun (s:string) -> s.Length > 0) e | |
let stringLengthMax max e = validator (fun (s:string) -> s = null || s.Length <= max) e | |
let liftV (p:'a -> bool) = function Some v -> p v | None -> true | |
let inline GE c = validator ((>=) c) | |
let inline GE' c = validator (((>=) c) |> liftV) | |
let inline GT c = validator ((>) c) | |
let inline GT' c = validator (((>) c) |> liftV) | |
let inline LT c = validator ((<) c) | |
let inline LT' c = validator (((<) c) |> liftV) | |
let inline LE c = validator ((<=) c) | |
let inline LE' c = validator (((<=) c) |> liftV) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment