Last active
August 29, 2015 14:20
-
-
Save DominicFinn/2a5f9f57b41eb715dc74 to your computer and use it in GitHub Desktop.
Processing CSV Files, all this does at the moment is match to find the rows that have values in them. We need to store them then or something like that and then we could have another script to process them perhaps? Not sure..... Once we have them like this though we can do what we want with them
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open FSharp.Data | |
open System.IO | |
open System | |
let load(file: FileInfo) = | |
let rows = File.ReadAllLines(file.FullName) | |
rows | |
let split(rows: string[]) = | |
let clean(columns: string[]) = | |
columns | |
|> Seq.map (fun c -> c.Trim()) | |
rows | |
|> Seq.map (fun row -> clean(row.Split(','))) | |
type EmptyRow = { value: string } | |
type TitleRow = { SiteName: string; PmacId: string; ChannelNumber: string } | |
type DataRow = { Time: DateTime; Pressure: Decimal } | |
type Row = | |
| Empty of EmptyRow | |
| Unknown of EmptyRow | |
| Title of TitleRow | |
| Data of DataRow | |
let calculateRowType(row: seq<string>) = | |
let get(r: seq<string>, index: int) = | |
let rArray = Seq.toArray r | |
rArray.GetValue(index).ToString() | |
// Tries and gets the data out of the row. If we can't parse the datetime then return an empty row. | |
// may need to to the decimal parse too.... | |
let TryGetData(row: seq<string>) = | |
let mutable time = DateTime.MinValue | |
if DateTime.TryParse(get(row, 0), &time) then | |
Data { Time = time ; Pressure = Decimal.Parse(get(row, 1)) } | |
else | |
Empty { value = String.Empty } | |
match Seq.length row with | |
| 0 -> Empty { value = String.Empty } | |
| 2 -> TryGetData row | |
| 6 -> Title { SiteName = get(row, 1); PmacId = get(row,3); ChannelNumber = get(row, (5)) } | |
| _ -> Empty { value = String.Empty } | |
let normalize(table: seq<seq<string>>) = | |
table | |
|> Seq.map (fun row -> calculateRowType row) | |
let filterOutEmptyAndUnknowns(rows: seq<Row>) = | |
rows | |
|> Seq.filter(fun r -> | |
match r with | |
| Title r -> true | |
| Data r -> true | |
| Unknown r -> false | |
| Empty r -> false) | |
[<EntryPoint>] | |
let main argv = | |
let dir = new DirectoryInfo("C:\\git\\unversioned\\csvCruncj\\Cruncher\\Cruncher\\bin\\Debug\\files") | |
let files = dir.GetFiles("*.csv") | |
let print(rows: seq<Row>) = | |
for r in rows do | |
printfn "%A" r | |
// for each file do the following to to it | |
files | |
|> Seq.map load // get the lines of text out of the file | |
|> Seq.map split // split it up on the comma | |
|> Seq.map normalize // decide what's in the lines | |
|> Seq.map filterOutEmptyAndUnknowns // get rid of the empty lines | |
|> Seq.iter print // print them out | |
Console.ReadKey() |> ignore | |
0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment