Skip to content

Instantly share code, notes, and snippets.

@nfaggian
Last active August 29, 2015 14:11
Show Gist options
  • Save nfaggian/2d19e6756535c3adb777 to your computer and use it in GitHub Desktop.
Save nfaggian/2d19e6756535c3adb777 to your computer and use it in GitHub Desktop.
Experimentation with "Deedle"
#I "/home/accounts/nfaggian/Desktop/fsharp/analysis/packages/Deedle.1.0.6/"
#load "Deedle.fsx"
#time;;
open System
open System.IO
open Deedle
let rec allFilesUnder baseFolder =
// Steps through all the files in a basefolder.
seq {
yield! Directory.GetFiles(baseFolder)
for subDir in Directory.GetDirectories(baseFolder) do
yield! allFilesUnder subDir
}
let frame_mean (filename: string) =
// Compute the mean of each forecast day
let group = Frame.ReadCsv(filename) |> Frame.groupRowsByInt "forecast_day"
group?forecast_error |> Stats.levelMean Pair.get1Of2
type frame_spec = {date: string; forecast_day: int; forecast_error: float;}
let frame_records (errors: seq<float>, date: System.DateTime) =
// Form records for each error sequence.
let date_strings = [for x in 1 .. Seq.length(errors) -> date.ToString()]
let forecast_days = [for x in 0 .. Seq.length(errors) -> x]
Seq.zip3 forecast_days date_strings errors
|> Seq.map (fun (a, b, c) -> {date=b; forecast_day=a; forecast_error=c;})
let date_parser (filename: string) =
System.DateTime.ParseExact(Seq.last(filename.ToString().Split('_')).Split('.').[0], "yyyyMM", null)
let data = allFilesUnder "/work/nfaggian/verification_notebooks/results/"
|> Seq.filter (fun x -> x.Contains "NSW")
|> Seq.sort
|> Seq.map (fun x -> (date_parser x, frame_mean x))
let verification_frame = [for x in data -> x]
|> Seq.map (fun (a, b) -> frame_records(b.Values, a))
|> Seq.concat
|> Frame.ofRecords
|> Frame.indexRowsDate "date"
|> Frame.groupRowsByInt "forecast_day"
type describe_spec = {count: int; min: float option; max:float option; median:float}
let describe (x: Series<(int * DateTime),float> ) =
// Report some descriptive statistics
{count=Stats.count(x); min=Stats.min(x); max=Stats.max(x); median=Stats.median(x)}
// What are the forecast day descriptive statistics (very broad brush)
[for x in 1..7 -> x]
|> Seq.map (fun x -> describe(verification_frame?forecast_error.[x,*]) )
|> Frame.ofRecords
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment