Skip to content

Instantly share code, notes, and snippets.

@DominicFinn
Last active August 29, 2015 14:20
Show Gist options
  • Save DominicFinn/2a5f9f57b41eb715dc74 to your computer and use it in GitHub Desktop.
Save DominicFinn/2a5f9f57b41eb715dc74 to your computer and use it in GitHub Desktop.
Processing CSV Files, all this does at the moment is match to find the rows that have values in them. We need to store them then or something like that and then we could have another script to process them perhaps? Not sure..... Once we have them like this though we can do what we want with them
open FSharp.Data
open System.IO
open System
let load(file: FileInfo) =
let rows = File.ReadAllLines(file.FullName)
rows
let split(rows: string[]) =
let clean(columns: string[]) =
columns
|> Seq.map (fun c -> c.Trim())
rows
|> Seq.map (fun row -> clean(row.Split(',')))
type EmptyRow = { value: string }
type TitleRow = { SiteName: string; PmacId: string; ChannelNumber: string }
type DataRow = { Time: DateTime; Pressure: Decimal }
type Row =
| Empty of EmptyRow
| Unknown of EmptyRow
| Title of TitleRow
| Data of DataRow
let calculateRowType(row: seq<string>) =
let get(r: seq<string>, index: int) =
let rArray = Seq.toArray r
rArray.GetValue(index).ToString()
// Tries and gets the data out of the row. If we can't parse the datetime then return an empty row.
// may need to to the decimal parse too....
let TryGetData(row: seq<string>) =
let mutable time = DateTime.MinValue
if DateTime.TryParse(get(row, 0), &time) then
Data { Time = time ; Pressure = Decimal.Parse(get(row, 1)) }
else
Empty { value = String.Empty }
match Seq.length row with
| 0 -> Empty { value = String.Empty }
| 2 -> TryGetData row
| 6 -> Title { SiteName = get(row, 1); PmacId = get(row,3); ChannelNumber = get(row, (5)) }
| _ -> Empty { value = String.Empty }
let normalize(table: seq<seq<string>>) =
table
|> Seq.map (fun row -> calculateRowType row)
let filterOutEmptyAndUnknowns(rows: seq<Row>) =
rows
|> Seq.filter(fun r ->
match r with
| Title r -> true
| Data r -> true
| Unknown r -> false
| Empty r -> false)
[<EntryPoint>]
let main argv =
let dir = new DirectoryInfo("C:\\git\\unversioned\\csvCruncj\\Cruncher\\Cruncher\\bin\\Debug\\files")
let files = dir.GetFiles("*.csv")
let print(rows: seq<Row>) =
for r in rows do
printfn "%A" r
// for each file do the following to to it
files
|> Seq.map load // get the lines of text out of the file
|> Seq.map split // split it up on the comma
|> Seq.map normalize // decide what's in the lines
|> Seq.map filterOutEmptyAndUnknowns // get rid of the empty lines
|> Seq.iter print // print them out
Console.ReadKey() |> ignore
0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment