Skip to content

Instantly share code, notes, and snippets.

@archer884
Forked from davidkellis/vms_analysis.fsx
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save archer884/dd517b70955252604772 to your computer and use it in GitHub Desktop.
Save archer884/dd517b70955252604772 to your computer and use it in GitHub Desktop.
open System
module Decimal =
let Zero = 0M
let One = 1M
let ceil (d: decimal): decimal = System.Math.Ceiling(d)
let floor (d: decimal): decimal = System.Math.Floor(d)
let round (d: decimal): decimal = System.Math.Round(d)
let roundTo (fractionalDigits: int) (d: decimal): decimal = System.Math.Round(d, fractionalDigits)
// This implementation is based on http://en.wikipedia.org/wiki/Quantiles#Estimating_the_quantiles_of_a_population
// For additional information, see:
// http://www.stanford.edu/class/archive/anthsci/anthsci192/anthsci192.1064/handouts/calculating%20percentiles.pdf
// http://en.wikipedia.org/wiki/Percentile
// http://www.mathworks.com/help/stats/quantiles-and-percentiles.html
//
// hFn is the function:
// (n: decimal) -> (p: decimal) -> decimal
// such that hFn returns a 1-based real-valued index (which may or may not be a whole-number) into the array of sorted values in xs
// qSubPFn is the function:
// (getIthX: (int -> decimal)) -> (h: decimal) -> decimal
// such that getIthX returns the zero-based ith element from the array of sorted values in xs
// percentages is a sequence of percentages expressed as real numbers in the range [0.0, 100.0]
let quantiles
(hFn: decimal -> decimal -> decimal)
(qSubPFn: (int -> decimal) -> decimal -> decimal)
(interpolate: bool)
(isSorted: bool)
(percentages: seq<decimal>)
(xs: seq<decimal>)
: seq<decimal> =
let sortedXs = (if isSorted then xs else Seq.sort xs) |> Seq.toArray
let n = decimal sortedXs.Length // n is the sample size
let q = 100m
let p k = k / q
let subtract1 = (fun x -> x - 1m)
let hs = Seq.map (p >> hFn n >> subtract1) percentages // NOTE: these indices are 0-based indices into sortedXs
let getIthX = Array.get sortedXs
if interpolate then // interpolate
Seq.map
(fun h ->
let i = Decimal.floor h // i is a 0-based index into sortedXs (smaller index to interpolate between)
let j = Decimal.ceil h // j is a 0-based index into sortedXs (larger index to interpolate between)
let f = h - i // f is the fractional part of real-valued index h
let intI = int i
let intJ = int j
(1m - f) * getIthX intI + f * getIthX intJ // [1] - (1-f) * x_k + f * x_k+1 === x_k + f*(x_k+1 - x_k)
// [1]:
// see: http://web.stanford.edu/class/archive/anthsci/anthsci192/anthsci192.1064/handouts/calculating%20percentiles.pdf
// also: (1-f) * x_k + f * x_k+1 === x_k - f*x_k + f*x_k+1 === x_k + f*(x_k+1 - x_k) which is what I'm after
)
hs
else // floor the index instead of interpolating
Seq.map
(fun h ->
let i = int (Decimal.floor h) // i is a 0-based index into sortedXs
getIthX i
)
hs
// implementation based on description of R-1 at http://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population
let quantilesR1 (interpolate: bool) (isSorted: bool) (percentages: seq<decimal>) (xs: seq<decimal>): seq<decimal> =
quantiles
(fun (n: decimal) (p: decimal) -> if p = 0m then 1m else n * p + 0.5m)
(fun (getIthX: (int -> decimal)) (h: decimal) -> getIthX (int (Decimal.ceil (h - 0.5m))))
interpolate
isSorted
percentages
xs
// The R manual claims that "Hyndman and Fan (1996) ... recommended type 8"
// see: http://stat.ethz.ch/R-manual/R-patched/library/stats/html/quantile.html
// implementation based on description of R-8 at http://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population
let OneThird = 1m / 3m
let TwoThirds = 2m / 3m
let quantilesR8 (interpolate: bool) (isSorted: bool) (percentages: seq<decimal>) (xs: seq<decimal>): seq<decimal> =
quantiles
(fun (n: decimal) (p: decimal) ->
if p < TwoThirds / (n + OneThird) then 1m
elif p >= (n - OneThird) / (n + OneThird) then n
else (n + OneThird) * p + OneThird
)
(fun (getIthX: (int -> decimal)) (h: decimal) ->
let floorHDec = Decimal.floor h
let floorH = int floorHDec
getIthX floorH + (h - floorHDec) * (getIthX (floorH + 1) - getIthX floorH)
)
interpolate
isSorted
percentages
xs
// we use the type 8 quantile method because the R manual claims that "Hyndman and Fan (1996) ... recommended type 8"
let percentiles percentages xs = quantilesR8 true false percentages xs
let percentilesSorted percentages xs = quantilesR8 true true percentages xs
let sampleWithReplacement xs n =
let length = Array.length xs
let rnd = new Random()
seq { for i in 1 .. n do yield xs.[rnd.Next(length)] } |> Seq.toArray
(* let randSeq n a b =
let rnd = new Random(int DateTime.Now.Ticks)
seq { for i in 1 .. n do yield rnd.Next(a, b) } |> Seq.toArray *)
(* let buildSamplingDistribution nSamples nObservationsPerSample buildSampleFn computeSampleStatisticFn =
seq {
for i in 1 .. nSamples do
yield (buildSampleFn nObservationsPerSample |> computeSampleStatisticFn)
} *)
// returns array of sampling distributions s.t. the ith sampling distribution is the sampling distribution of the statistic computed by compute_sample_statistic_fns[i]
(* let buildSamplingDistributions nSamples nObservationsPerSample buildSampleFn computeSampleStatisticFns =
let samplingDistributions = Array.init (Array.length computeSampleStatisticFns) (fun _ -> Array.create nSamples 0m)
for sampleIndex = 0 to (nSamples - 1) do
let sample = buildSampleFn nObservationsPerSample
computeSampleStatisticFns |> Seq.iteri (fun i computeSampleStatisticFn -> samplingDistributions.[i].[sampleIndex] <- computeSampleStatisticFn sample)
samplingDistributions *)
// returns array of sampling distributions s.t. the ith sampling distribution is the sampling distribution of the statistic computed by compute_sample_statistic_fns[i]
let buildSamplingDistributionsFromOneMultiStatisticFn nSamples nObservationsPerSample (buildSampleFn: int -> array<decimal>) (multiStatisticFn: array<decimal> -> array<decimal>): array<array<decimal>> =
let diagnosticSample = [|1m; 2m; 3m|]
let numberOfSamplingDistributions = multiStatisticFn diagnosticSample |> Seq.length
let samplingDistributions = Array.init numberOfSamplingDistributions (fun _ -> Array.create nSamples 0m)
for sampleIndex = 0 to (nSamples - 1) do
let sample = buildSampleFn nObservationsPerSample
let sampleStatistics = multiStatisticFn sample
sampleStatistics |> Seq.iteri (fun i sampleStatistic -> samplingDistributions.[i].[sampleIndex] <- sampleStatistic)
samplingDistributions
let calculateCumulativeReturn sequenceOfReturns: decimal = sequenceOfReturns |> Seq.reduce (fun acc d -> acc * d)
let buildSample nObservations buildObservationFn = seq { for i in 1 .. nObservations do yield buildObservationFn () } |> Seq.toArray
// let monthlyReturnsF = [23.2, 15.1, 2.4, -3.9, 25.1, 7.6, -2.8, -12.6, -3.5, 5.6, -2.2, 11.8, 16.5, 30.9, 5.0, 35.9, -2.8, -25.5, 21.3, 9.4, 15.0, 22.5, -5.5, 18.1, -13.7, 20.3, -3.9, 10.5, -0.3, 0.2, -11.5, 31.6, -13.3, 14.4, 1.1, 12.9, 5.0, -16.8, -0.7, 1.3, 0.2, 18.9, 15.5, -11.7, 9.2, -11.8]
// let monthlyReturnsF = [23.2, 15.1, 2.4, -3.9, 25.1, 7.6, -2.8, -12.6, -3.5, 5.6, -2.2, 11.8, 16.5, 30.9, 5.0, 35.9, -2.8, -25.5, 21.3, 9.4, 15.0, 22.5, -5.5, 18.1, -13.7, 20.3, -3.9, 10.5, -0.3, 0.2, -11.5, 31.6, -13.3, 14.4, 1.1, 12.9, 5.0, -16.8, -0.7, 1.3, 0.2, 18.9, 15.5, -11.7, 9.2, -11.8, -25.6]
// let monthlyReturnsF = [0.2, 18.9, 15.5, -11.7, 9.2, -11.8]
// let monthlyReturnsF = [-16.8, 0.2, 18.9, 15.5, -11.7, 9.2, -11.8]
let monthlyReturnsF = [| 0.2; 18.9; 15.5; -11.7; 9.2; -11.8 |]
let monthlyReturns = monthlyReturnsF |> Array.map (fun r -> (decimal r + 100m) / 100m)
let build1YearReturnObservationFn = fun () -> sampleWithReplacement monthlyReturns 12 |> calculateCumulativeReturn
let build5YearReturnObservationFn = fun () -> sampleWithReplacement monthlyReturns 60 |> calculateCumulativeReturn
// returns an array of returns
let build1YearReturnSample nObservations: array<decimal> = buildSample nObservations build1YearReturnObservationFn
let build5YearReturnSample nObservations: array<decimal> = buildSample nObservations build5YearReturnObservationFn
let computeSampleMean sample =
let n = Seq.length sample |> decimal
Seq.sum sample / n
(* let compute1stPercentile sample = percentiles([1], sample) |> Seq.head
let compute5thPercentile sample = percentiles([5], sample) |> Seq.head
let compute10thPercentile sample = percentiles([10], sample) |> Seq.head
let compute20thPercentile sample = percentiles([20], sample) |> Seq.head
let compute30thPercentile sample = percentiles([30], sample) |> Seq.head
let compute40thPercentile sample = percentiles([40], sample) |> Seq.head
let compute50thPercentile sample = percentiles([50], sample) |> Seq.head
let compute60thPercentile sample = percentiles([60], sample) |> Seq.head
let compute70thPercentile sample = percentiles([70], sample) |> Seq.head
let compute80thPercentile sample = percentiles([80], sample) |> Seq.head
let compute90thPercentile sample = percentiles([90], sample) |> Seq.head
let compute95thPercentile sample = percentiles([95], sample) |> Seq.head
let compute99thPercentile sample = percentiles([99], sample) |> Seq.head
let sampleStatisticFns = [
compute1stPercentile
compute5thPercentile
compute10thPercentile
compute20thPercentile
compute30thPercentile
compute40thPercentile
compute50thPercentile
compute60thPercentile
compute70thPercentile
compute80thPercentile
compute90thPercentile
compute95thPercentile
compute99thPercentile
compute1stPercentile
] *)
let computeAllStats (sample: seq<decimal>): array<decimal> = Array.append [| computeSampleMean sample |] <| (percentiles [|1m; 5m; 10m; 20m; 30m; 40m; 50m; 60m; 70m; 80m; 90m; 95m; 99m|] sample |> Seq.toArray)
// sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_sample_mean_fn)
// sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_1st_percentile_fn)
// sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_5th_percentile_fn)
// sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_10th_percentile_fn)
// sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_30th_percentile_fn)
// sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_50th_percentile_fn)
// sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_75th_percentile_fn)
// sampling_distribution = build_sampling_distribution(10000, 10000, build_1_year_return_sample_fn, compute_95th_percentile_fn)
// percentiles = percentiles([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100], sampling_distribution)
// print out the sampling distribution of the mean 1-year return
// pp [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
// pp percentiles.map(&:to_f)
let t1 = DateTime.Now
let numberOfSamples = 10000
let numberOfObservationsPerSample = 10000
// samplingDistributions = build_sampling_distributions(number_of_samples, number_of_observations_per_sample, build_1_year_return_sample_fn, sample_statistic_fns)
let samplingDistributions = buildSamplingDistributionsFromOneMultiStatisticFn numberOfSamples numberOfObservationsPerSample build1YearReturnSample computeAllStats
let t2 = DateTime.Now
printfn "Building sampling distributions from %i samples, each containing %i observations, took %A seconds." numberOfSamples numberOfObservationsPerSample (t2 - t1)
let returnPercentiles = samplingDistributions |> Seq.map (percentiles [0m; 10m; 20m; 30m; 40m; 50m; 60m; 70m; 80m; 90m; 100m])
printfn "%A" returnPercentiles
let samplingDistributionMeans = samplingDistributions |> Seq.map computeSampleMean
printfn "%A" samplingDistributionMeans
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment