Created
March 16, 2019 16:19
-
-
Save halcwb/72353b756e193f21fc9646c703723133 to your computer and use it in GitHub Desktop.
Parsing a data set with strings to required types and restore to cleaned up string data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module StringParser = | |
open System | |
type StringParserType = | |
| IntType | |
| FloatType | |
| BoolType | |
| DateTimeType | |
| StringType | |
type StringParserResult = | |
| IntRes of int | |
| FloatRes of float | |
| BoolRes of bool | |
| DateTimeRes of DateTime | |
| StringRes of string | |
| NoResult | |
let (|IsInt|IsFloat|IsBool|IsDateTime|IsString|) s = | |
match s with | |
| _ when s |> Int32.TryParse |> fst -> IsInt | |
| _ when s |> Double.TryParse |> fst -> IsFloat | |
| _ when s |> Boolean.TryParse |> fst -> IsBool | |
| _ when s |> DateTime.TryParse |> fst -> IsDateTime | |
| _ -> IsString | |
let parse pt s = | |
match pt with | |
| IntType -> | |
match s with | |
| IsInt -> s |> Int32.Parse |> IntRes | |
| _ -> NoResult | |
| FloatType -> | |
match s with | |
| IsFloat -> s |> Double.Parse |> FloatRes | |
| _ -> NoResult | |
| BoolType -> | |
match s with | |
| IsBool -> s |> Boolean.Parse |> BoolRes | |
| _ -> NoResult | |
| DateTimeType -> | |
match s with | |
| IsDateTime -> s |> DateTime.Parse |> DateTimeRes | |
| _ -> NoResult | |
| StringType -> s |> StringRes | |
let dataToResult tl data = | |
let tc = tl |> List.length | |
data | |
|> List.map (fun row -> | |
if row |> List.length <> tc then [] | |
else | |
List.zip tl row | |
|> List.map (fun (rt, rc) -> | |
parse rt rc | |
) | |
) | |
let resultToData rl = | |
rl | |
|> List.map (fun row -> | |
row | |
|> List.map (fun cell -> | |
match cell with | |
| IntRes x -> sprintf "%A" x | |
| FloatRes x -> sprintf "%A" x | |
| BoolRes x -> sprintf "%A" x | |
| DateTimeRes x -> sprintf "%A" x | |
| StringRes x -> x | |
| NoResult -> "" | |
) | |
) | |
module StringParserTests = | |
let test () = | |
[ [ "1";"1.5";"True";"2018-1-23 12:30" ] | |
[ "";"";"";"" ] | |
[ "b";"False";"foo";"no date" ] | |
[ "2";"3.5";"False";"2016-6-28 15:40" ] | |
[ "blah";"3.5";"False";"2016-6-28 15:40" ] | |
[ "2";"blah";"False";"2016-6-28 15:40" ] | |
[ "2";"3.5";"blah";"2016-6-28 15:40" ] | |
[ "2";"3.5";"False";"blah" ] | |
] | |
|> dataToResult [ IntType; FloatType; BoolType; DateTimeType ] | |
|> resultToData | |
|> List.iter (fun row -> | |
row |> List.iter (printf "%s\t;") | |
printfn "" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment