Skip to content

Instantly share code, notes, and snippets.

@HarryMcCarney
Last active March 3, 2023 16:45
Show Gist options
  • Save HarryMcCarney/5348ff466ff938ff1e04b7288936cf02 to your computer and use it in GitHub Desktop.
Save HarryMcCarney/5348ff466ff938ff1e04b7288936cf02 to your computer and use it in GitHub Desktop.
// Think Bayes - Allen B. Downey
//Chapter 1 - Probability
#r "nuget: FSharp.Stats, 0.4.12-preview.1"
#r "nuget: fsharp.data"
#r "nuget: Plotly.NET"
open FSharp.Data
open FSharp.Stats
open FSharp.Stats.Distributions
open Plotly.NET
[<Literal>]
let file =
"https://raw.githubusercontent.com/AllenDowney/ThinkBayes2/master/data/gss_bayes.csv"
type Data = CsvProvider<file, HasHeaders=true>
let data = Data.Load(file)
let bankers = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Indus10 = 6870))
let females = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Sex = 2))
let liberals = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Polviews <= 3))
let democrats = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Partyid <= 1))
let prob s =
s |> Seq.averageBy (fun (c, b) -> if b = true then 1. else 0)
let conj p1 p2 =
p1 |> Seq.zip p2 |> Seq.map (fun (b, d) -> fst b, (snd b) && (snd d))
let cond p given =
given
|> Seq.zip p
|> Seq.filter (fun (p, g) -> snd g)
|> Seq.map (fun (p, g) -> p)
bankers |> prob
females |> prob
liberals |> prob
democrats |> prob
conj democrats bankers |> prob
cond females bankers |> prob
cond liberals females |> prob
conj liberals democrats |> cond females |> prob
conj liberals females |> cond bankers |> prob
// Excercises
//1.1
conj females bankers |> conj liberals |> conj democrats |> prob
//1.2
cond liberals democrats |> prob
cond democrats liberals |> prob
//1.3
let young = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Age < 30))
let old = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Age >= 65))
let conservative = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Polviews >= 5))
young |> prob
old |> prob
conservative |> prob
conj young liberals |> prob
cond liberals young |> prob
cond old conservative |> prob
cond conservative old |> prob
cond old conservative |> prob
//Chapter 2 - Bayes Theorem
type Prior =
{ Hypothesis: string
Prior: float
Likelihood: float }
type Posterior =
{ Hypothesis: string
Prior: float
Likelihood: float
Posterior: float }
let calcPosteriors (priors: Prior list) : Posterior list =
let totalProbability = priors |> List.sumBy (fun r -> r.Prior * r.Likelihood)
priors
|> List.map (fun h ->
{ Hypothesis = h.Hypothesis
Prior = h.Prior
Likelihood = h.Likelihood
Posterior = ((h.Prior * h.Likelihood) / totalProbability) })
// Probability of Vanilla
let priors =
[ { Hypothesis = "Bowl 1"
Prior = 0.5
Likelihood = 0.75 }
{ Hypothesis = "Bowl 2"
Prior = 0.5
Likelihood = 0.50 } ]
priors |> calcPosteriors
// Dice Problem
let diceTable =
[ { Hypothesis = "6 Sided"
Prior = 0.3333333333
Likelihood = 0.1666666667 }
{ Hypothesis = "8 Sided"
Prior = 0.3333333333
Likelihood = 0.125 }
{ Hypothesis = "12 Sided"
Prior = 0.3333333333
Likelihood = 0.08333333333 } ]
diceTable |> calcPosteriors
// Monty Hall Problem
let montyTable =
[ // you first choose door one and monty has opened door three to reveal a goat
{ Hypothesis = "Door 1"
Prior = 0.3333333333
Likelihood = 0.5 // if it were behind door 1 there a 50/50 that month would open door 3
}
{ Hypothesis = "Door 2"
Prior = 0.3333333333
Likelihood = 1 //if it were behind door 2 monty has to open door 3 a we have chosen door 1
} // likelihood that monthy opens door 3
{ Hypothesis = "Door 3"
Prior = 0.3333333333
Likelihood = 0 // cant be here as month has opened door 3
} ]
montyTable
|> calcPosteriors
//Exercises
// 2.1
[ { Hypothesis = "! Head"
Prior = 0.5
Likelihood = 0.5 }
{ Hypothesis = "2 Head"
Prior = 0.5
Likelihood = 1. } ]
|> calcPosteriors
//2.2
[ { Hypothesis = "both girls"
Prior = 0.25
Likelihood = 1. }
{ Hypothesis = "both boys"
Prior = 0.25
Likelihood = 0. }
{ Hypothesis = "1st is girl 2nd is boy"
Prior = 0.25
Likelihood = 0.5 }
{ Hypothesis = "1st is boy 2nd is girl"
Prior = 0.25
Likelihood = 0.5 } ]
|> calcPosteriors
//2.3
let montyDoor2 =
[ { Hypothesis = "Door 1"
Prior = 0.3333333333
Likelihood = 0.5 }
{ Hypothesis = "Door 2"
Prior = 0.3333333333
Likelihood = 1 }
{ Hypothesis = "Door 3"
Prior = 0.3333333333
Likelihood = 0 } ]
let montyDoor2 =
[ { Hypothesis = "Door 1"
Prior = 0.3333333333
Likelihood = 0.5 }
{ Hypothesis = "Door 2"
Prior = 0.3333333333
Likelihood = 0 }
{ Hypothesis = "Door 3"
Prior = 0.3333333333
Likelihood = 0.5 } ]
montyDoor2 |> calcPosteriors
let montyDoor3 =
[ { Hypothesis = "Door 1"
Prior = 0.3333333333
Likelihood = 0 }
{ Hypothesis = "Door 2"
Prior = 0.3333333333
Likelihood = 1 }
{ Hypothesis = "Door 3"
Prior = 0.3333333333
Likelihood = 0 } ]
montyDoor3 |> calcPosteriors
//2.4
let MnMs =
[ { Hypothesis = "Bag 1"
Prior = 0.50
Likelihood = 0.2 }
{ Hypothesis = "Bag 2"
Prior = 0.50
Likelihood = 0.13 } ]
|> calcPosteriors
//Chapter 3 - Distributions
let getProbabilities (probs: 'a array) (map: Map<'a, float>) : float array = probs |> Array.map (fun p -> map[p])
let categories = [ 1; 2; 3; 4; 5; 6 ] |> Set.ofSeq
let die =
EmpiricalDistribution.createNominal (Template = categories) [ 1; 2; 3; 3; 6; 6 ]
die[4]
let lettersOld =
"mississippi".ToCharArray()
|> Array.map string
|> Array.toList
|> Frequency.createGeneric
|> Empirical.ofHistogram
lettersOld["i"]
let myAlphabetNum =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" |> Set.ofSeq
let letters =
EmpiricalDistribution.createNominal (Template = myAlphabetNum) "mississippi"
getProbabilities [| 1; 4; 6 |] die
letters.['i']
letters.['z']
//Cookie problem revisited
let priorDist = EmpiricalDistribution.createNominal () [ "Bowl 1"; "Bowl 2" ]
let likelihoodVanilla = [ "Bowl 1", 0.75; "Bowl 2", 0.5 ] |> Map.ofSeq
let likelihoodChocolate = [ "Bowl 1", 0.25; "Bowl 2", 0.5 ] |> Map.ofSeq
let normalise (dist: seq<'a * float>) =
let totalProbability = dist |> Seq.sumBy snd
dist |> Seq.map (fun (k, v) -> k, v / totalProbability) |> Map.ofSeq
let updatePosteriorDist (likelihoods: Map<'a, float>) (priorDist: Map<'a, float>) =
priorDist
|> Map.toSeq
|> Seq.map (fun (k, v) ->
match (likelihoods.TryFind k) with
| Some l -> (k, v * l)
| None -> (k, v))
|> normalise
updatePosteriorDist likelihoodVanilla priorDist
|> updatePosteriorDist likelihoodVanilla
|> updatePosteriorDist likelihoodChocolate
//101 Bowls
let drawChart priorDist posteriorDist vanillas chocolates =
let posteriorAfterOneVanillaLine =
Chart.Line((posteriorDist |> Map.toSeq), Name = "Posterior")
let prior101DistLine = Chart.Line((priorDist |> Map.toSeq), Name = "Prior")
let desc =
ChartDescription.create
(sprintf "Posterior after %i vanilla cookies and %i chocolate cookies" vanillas chocolates)
""
[ posteriorAfterOneVanillaLine; prior101DistLine ]
|> Chart.combine
|> Chart.withXAxisStyle ("Bowl")
|> Chart.withYAxisStyle ("PMF")
|> Chart.withDescription (desc)
|> Chart.show
let prior101Dist = EmpiricalDistribution.createNominal () ([ 0..100 ] |> Seq.ofList)
let likelihoodVanilla =
[ 0..100 ] |> List.map (fun i -> i, (float i / 100.)) |> Map.ofList
let likelihoodChocolate =
[ 0..100 ] |> List.map (fun i -> i, 1. - (float i / 100.)) |> Map.ofList
let posterior2 =
updatePosteriorDist likelihoodVanilla prior101Dist
|> updatePosteriorDist likelihoodVanilla
drawChart prior101Dist posterior2 2 0
let posterior3 =
updatePosteriorDist likelihoodVanilla prior101Dist
|> updatePosteriorDist likelihoodVanilla
|> updatePosteriorDist likelihoodChocolate
drawChart prior101Dist posterior3 2 1
let posterior3 =
updatePosteriorDist likelihoodVanilla prior101Dist
|> updatePosteriorDist likelihoodVanilla
|> updatePosteriorDist likelihoodChocolate
|> Empirical.maxLike
//The dice problem
let priorDiceDist = EmpiricalDistribution.createNominal () ([ 6; 8; 12 ])
let diceLikelihoods1 = [ 6, (1. / 6.); 8, (1. / 8.); 12, (1. / 12.) ] |> Map.ofList
let diceLikelihoods7 = [ 6, 0.; 8, (1. / 8.); 12, (1. / 12.) ] |> Map.ofList
let postDice =
updatePosteriorDist diceLikelihoods1 priorDiceDist
|> updatePosteriorDist diceLikelihoods7
let updateDice data priorDist =
let likelihood =
priorDist
|> Map.toSeq
|> Seq.map (fun (k, v) -> k, (if data > k then 0. else 1. / (float k)))
|> Map.ofSeq
updatePosteriorDist likelihood priorDist
updateDice 7 priorDiceDist
// Excercises 3.1
let priorDice31 = EmpiricalDistribution.createNominal () ([ 6; 8; 12 ])
updateDice 1 priorDice31 |> updateDice 3 |> updateDice 5 |> updateDice 7
//3.2
let dice32 =
EmpiricalDistribution.createNominal () ([ 4; 6; 6; 8; 8; 8; 12; 12; 12; 12; 20; 20; 20; 20; 20 ])
updateDice 7 dice32
//3.3
let priorDist = EmpiricalDistribution.createNominal () [ "Drawer 1"; "Drawer 2" ]
let likelihoodMatching =
[ "Drawer 1", (0.5 * 0.5); "Drawer 2", (0.333 * 0.333) ] |> Map.ofSeq
(updatePosteriorDist priorDist likelihoodMatching)["Drawer 1"] * 0.5
(updatePosteriorDist priorDist likelihoodMatching)["Drawer 1"]
(updatePosteriorDist priorDist likelihoodMatching)["Drawer 2"]
// Chapter 4 - Estimating Proportions
let binomial = DiscreteDistribution.binomial 0.5 250
let getBinomialProbs (binomialDist: DiscreteDistribution<float, int>) (data: int seq) =
data |> Seq.map (fun i -> i, binomialDist.PMF i) |> Map.ofSeq
let getBinomialProbsCDF (binomialDist: DiscreteDistribution<float, int>) (data: int seq) =
data |> Seq.map (fun i -> i, binomialDist.CDF i) |> Map.ofSeq
let prior101DistLine =
Chart.Line((getBinomialProbs binomial [ 0..250 ] |> Map.toSeq), Name = "n = 25, p = 0.5")
prior101DistLine
|> Chart.withXAxisStyle ("Number of Heads(k)")
|> Chart.withYAxisStyle ("PMF")
|> Chart.show
let highestp = (getBinomialProbs binomial [ 0..250 ]) |> Empirical.maxLike // this should really return the key as well
(getBinomialProbs binomial [ 0..250 ])
|> Map.toSeq
|> Seq.filter (fun (k, v) -> v = highestp)
1. - (getBinomialProbsCDF binomial [ 139 ] |> Map.toSeq |> Seq.head |> snd) // p of 140 k or more -
//this should really be a function with greater than and less than variants.
//Bayesian estimation
let prior = [ 0.0..0.01..1 ] |> List.map (fun i -> i, 1.) |> Map.ofSeq // uniform prior meaning we have no idea how likely each proportion of heads is.
let likelihoodHeads = prior
let likelihoodTails =
prior
|> Map.map (fun k v ->
k
1. - v)
type FlipResult =
| Head
| Tail
let data = [ 1..250 ] |> List.map (fun i -> if i <= 5 then Head else Tail)
let updatePriorWithData prior data : Map<float, float> =
data
|> List.fold
(fun pmf d ->
match d with
| Head -> updatePosteriorDist likelihoodHeads pmf
| Tail -> updatePosteriorDist likelihoodTails pmf)
prior
let result = updatePriorWithData prior data
let posteriorLine = Chart.Line((result |> Map.toSeq), Name = "140 Heads out of 250")
posteriorLine
|> Chart.withXAxisStyle ("Proportion of Heads")
|> Chart.withYAxisStyle ("robability")
|> Chart.show
//Triangle prior
let triangleRes = updatePriorWithData tprior tdata // swapping priors with enough data we get same posterior result
let uniRes = updatePriorWithData prior tdata
let tprior =
[ 0.69 .. - 0.024 .. 0.0 ]
|> List.append [ 0.0..0.01..0.7 ]
|> List.zip [ 0.0..0.01..0.99 ]
|> normalise
open System
let rnd = Random()
let tdata = [ 1..10 ] |> List.map (fun i -> if i % 2 = 0 then Head else Tail)
let unfairPriorRes = updatePriorWithData tprior tdata // swapping priors with enough data we get same posterior result
let posteriorLine = Chart.Line((unfairPriorRes |> Map.toSeq), Name = "Posterior")
let priorLine = Chart.Line((tprior |> Map.toSeq), Name = "Prior")
[ posteriorLine; priorLine ]
|> Chart.combine
|> Chart.withXAxisStyle ("Proportion of Heads")
|> Chart.withYAxisStyle ("robability")
|> Chart.show
//Binomial likelihood function
let uniformPrior = [ 0.0..0.01..1 ] |> List.map (fun i -> i, 1.) |> Map.ofSeq
let likelihoods =
uniformPrior
|> Map.toSeq
|> Seq.map (fun (p, v) -> p, (DiscreteDistribution.binomial p 25000).PMF 14000) // creates a binomial for 100 values of p which should be more effcient than playing
//throuhall the data.
////It mean we can call updatePosteriorDist directly just once rather than via updatePriorWithData for every data point
|> Map.ofSeq
let post = updatePosteriorDist likelihoods uniformPrior |> Map.toSeq //
Chart.Line(post, Name = "Using binomial to update dist")
|> Chart.withXAxisStyle ("Proportion of Heads")
|> Chart.withYAxisStyle ("robability")
|> Chart.show
// Excercises
//4.1
let linspace start stop count =
[| start .. ((stop - start) / (count - 1.)) .. stop |]
let uniformPrior = (linspace 0.1 0.4 101) |> Array.map (fun i -> i, 1.) |> normalise
let genLike prior n k =
prior
|> Map.toSeq
|> Seq.map (fun (p, v) -> p, (DiscreteDistribution.binomial p n).PMF k)
|> Map.ofSeq
let priorAfter25 = updatePosteriorDist uniformPrior (genLike uniformPrior 100 25)
let priorAfter28 = updatePosteriorDist uniformPrior (genLike uniformPrior 103 28)
let priorAfter25and33 =
updatePosteriorDist (priorAfter25) (genLike uniformPrior 3 3)
[ Chart.Line(priorAfter25 |> Map.toSeq, Name = "Probability of hit after 25/100")
Chart.Line(priorAfter28 |> Map.toSeq, Name = "Probability of hit after 28/103")
Chart.Line(priorAfter25and33 |> Map.toSeq, Name = "Probability of hit after 25/100 and then 3/3")
Chart.Line((uniformPrior |> Map.toSeq), Name = "Uniform Probability") ]
|> Chart.combine
|> Chart.withXAxisStyle ("Proportion of Hits")
|> Chart.withYAxisStyle ("Probability")
|> Chart.show
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment