Skip to content

Instantly share code, notes, and snippets.

@pirrmann
Last active Oct 12, 2016
Embed
What would you like to do?
Titanic CSV loading
let file = System.IO.File.ReadAllText(__SOURCE_DIRECTORY__ + "/training.csv")
let parseLine line =
let makeWord = Seq.rev >> Seq.toArray >> System.String
let rec parseFields chars =
match chars with
| '\"' :: chars' -> parseEscaped [] chars'
| chars' -> parseUnescaped [] chars'
and parseEscaped acc chars = seq {
match chars with
| '\"' :: '\"' :: chars' ->
yield! parseEscaped ('\"' :: acc) chars'
| '\"' :: ',' :: chars' ->
yield acc |> makeWord
yield! parseFields chars'
| '\"' :: [] ->
yield acc |> makeWord
| c :: chars' ->
yield! parseEscaped (c:: acc) chars'
| [] -> failwith "The file is malformed!" }
and parseUnescaped acc chars = seq {
match chars with
| [] ->
yield acc |> makeWord
| ',' :: chars' ->
yield acc |> makeWord
yield! parseFields chars'
| c :: chars' ->
yield! parseUnescaped (c:: acc) chars' }
line |> Seq.toList |> parseFields |> Seq.toArray
file |> Array.map parseLine
let parseLine2 =
let regex = new System.Text.RegularExpressions.Regex("(^|,)((\"(?<escaped>((\"\"|[^\"])*))\")|(?<unescaped>([^\",]*)))(?=($|,))")
fun line ->
[|
for m in regex.Matches(line) do
if m.Groups.["unescaped"].Success then
yield m.Groups.["unescaped"].Value
else
yield m.Groups.["escaped"].Value.Replace("\"\"", "\"")
|]
#r "../packages/FSharp.Data/lib/net40/FSharp.Data.dll"
open FSharp.Data
let file = System.IO.File.ReadAllText(__SOURCE_DIRECTORY__ + "/training.csv")
type Titanic = CsvProvider<"training.csv", HasHeaders = true>
let lines =
Titanic.Parse(file).Rows
|> Seq.map (fun l -> l.Name)
|> Seq.toArray
printf "%A" lines
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment