Created
July 25, 2018 03:17
-
-
Save dburriss/4fd75fb874efb3ee41d0c31b14387fdf to your computer and use it in GitHub Desktop.
Helper F# script file for working with csv files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System.IO | |
open Microsoft.FSharp.Reflection | |
open System | |
type Array = | |
static member join delimiter xs = | |
xs | |
|> Array.map (fun x -> x.ToString()) | |
|> String.concat delimiter | |
type Seq = | |
static member write (path:string) (data:seq<'a>): unit = | |
use writer = new StreamWriter(path) | |
data | |
|> Seq.iter writer.WriteLine | |
static member toCsv (separator:string) (useEnclosure:bool) (headerMapping:string -> string) (data:seq<'a>) = | |
seq { | |
let dataType = typeof<'a> | |
let stringSeqDataType = typeof<System.Collections.Generic.IEnumerable<string>> | |
let inline enclose s = | |
match useEnclosure with | |
| true -> "\"" + (string s) + "\"" | |
| false -> string s | |
let header = | |
match dataType with | |
| ty when FSharpType.IsRecord ty -> | |
FSharpType.GetRecordFields dataType | |
|> Array.map (fun info -> headerMapping info.Name) | |
| ty when FSharpType.IsTuple ty -> | |
FSharpType.GetTupleElements dataType | |
|> Array.mapi (fun idx info -> headerMapping(string idx) ) | |
| ty when ty.IsAssignableFrom stringSeqDataType -> | |
data :?> seq<seq<string>> |> Seq.head | |
|> Seq.toArray | |
| _ -> dataType.GetProperties() | |
|> Array.map (fun info -> headerMapping info.Name) | |
yield header |> Array.map enclose |> Array.join separator | |
let lines = | |
match dataType with | |
| ty when FSharpType.IsRecord ty -> | |
data |> Seq.map FSharpValue.GetRecordFields | |
| ty when FSharpType.IsTuple ty -> | |
data |> Seq.map FSharpValue.GetTupleFields | |
| ty when ty.IsAssignableFrom stringSeqDataType -> | |
data :?> seq<seq<string>> |> Seq.tail | |
|> Seq.map (fun ss -> Seq.toArray ss |> Array.map (fun s -> s :> obj) ) | |
| _ -> | |
let props = dataType.GetProperties() | |
data |> Seq.map ( fun line -> | |
props |> Array.map ( fun prop -> | |
prop.GetValue(line, null) )) | |
|> Seq.map (Array.map enclose) | |
yield! lines |> Seq.map (Array.join separator) | |
} | |
static member read (path:string) = | |
seq { | |
use reader = new StreamReader(path) | |
let mutable valid = true | |
while(valid) do | |
let line = reader.ReadLine() | |
if(isNull line) then | |
valid <- false | |
else | |
yield line | |
} | |
static member fromCsv<'a> (separator:string) (removeEnclosure:bool) (headerMapping:string -> string) (valueMapping:string -> string -> obj) (lines:seq<string>) : seq<'a> = | |
let dataType = typeof<'a> | |
let extract(gd:Map<string,string>) = | |
let flds = FSharpType.GetRecordFields(dataType) | |
let vals = [| for f in flds -> (valueMapping f.Name gd.[f.Name]) |] | |
FSharpValue.MakeRecord(dataType, vals) :?> 'a | |
let split (s:string) (l:string) = l.Split([|s|], StringSplitOptions.None) | |
let inline unenclose (s:string) = | |
match removeEnclosure with | |
| true -> | |
match s with | |
| "\"" -> "" | |
| "\"\"" -> "" | |
| s when s.StartsWith("\"") && s.EndsWith("\"") -> s.Substring(1, s.Length - 2) | |
| s when s.StartsWith("\"") -> s.Substring(1, s.Length - 1) | |
| s when s.EndsWith("\"") -> s.Substring(0, s.Length - 2) | |
| false -> s | |
let propNames = lines |> Seq.head |> split separator |> Array.map (unenclose >> headerMapping) | |
let data = lines | |
|> Seq.tail | |
|> Seq.map (fun l -> l |> split separator |> Array.zip propNames |> Map.ofArray |> extract) | |
data | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment