Skip to content

Instantly share code, notes, and snippets.

@halcwb
Created March 16, 2019 16:19
Show Gist options
  • Save halcwb/72353b756e193f21fc9646c703723133 to your computer and use it in GitHub Desktop.
Save halcwb/72353b756e193f21fc9646c703723133 to your computer and use it in GitHub Desktop.
Parsing a data set with strings to required types and restore to cleaned up string data
module StringParser =
open System
type StringParserType =
| IntType
| FloatType
| BoolType
| DateTimeType
| StringType
type StringParserResult =
| IntRes of int
| FloatRes of float
| BoolRes of bool
| DateTimeRes of DateTime
| StringRes of string
| NoResult
let (|IsInt|IsFloat|IsBool|IsDateTime|IsString|) s =
match s with
| _ when s |> Int32.TryParse |> fst -> IsInt
| _ when s |> Double.TryParse |> fst -> IsFloat
| _ when s |> Boolean.TryParse |> fst -> IsBool
| _ when s |> DateTime.TryParse |> fst -> IsDateTime
| _ -> IsString
let parse pt s =
match pt with
| IntType ->
match s with
| IsInt -> s |> Int32.Parse |> IntRes
| _ -> NoResult
| FloatType ->
match s with
| IsFloat -> s |> Double.Parse |> FloatRes
| _ -> NoResult
| BoolType ->
match s with
| IsBool -> s |> Boolean.Parse |> BoolRes
| _ -> NoResult
| DateTimeType ->
match s with
| IsDateTime -> s |> DateTime.Parse |> DateTimeRes
| _ -> NoResult
| StringType -> s |> StringRes
let dataToResult tl data =
let tc = tl |> List.length
data
|> List.map (fun row ->
if row |> List.length <> tc then []
else
List.zip tl row
|> List.map (fun (rt, rc) ->
parse rt rc
)
)
let resultToData rl =
rl
|> List.map (fun row ->
row
|> List.map (fun cell ->
match cell with
| IntRes x -> sprintf "%A" x
| FloatRes x -> sprintf "%A" x
| BoolRes x -> sprintf "%A" x
| DateTimeRes x -> sprintf "%A" x
| StringRes x -> x
| NoResult -> ""
)
)
module StringParserTests =
let test () =
[ [ "1";"1.5";"True";"2018-1-23 12:30" ]
[ "";"";"";"" ]
[ "b";"False";"foo";"no date" ]
[ "2";"3.5";"False";"2016-6-28 15:40" ]
[ "blah";"3.5";"False";"2016-6-28 15:40" ]
[ "2";"blah";"False";"2016-6-28 15:40" ]
[ "2";"3.5";"blah";"2016-6-28 15:40" ]
[ "2";"3.5";"False";"blah" ]
]
|> dataToResult [ IntType; FloatType; BoolType; DateTimeType ]
|> resultToData
|> List.iter (fun row ->
row |> List.iter (printf "%s\t;")
printfn ""
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment