Skip to content

Instantly share code, notes, and snippets.

@sir-deenicus
Created March 24, 2013 23:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sir-deenicus/5234043 to your computer and use it in GitHub Desktop.
Save sir-deenicus/5234043 to your computer and use it in GitHub Desktop.
type Variable = Strings of float | Number of float * float * float
type Probability = Map<string , Map<string,Variable> * float>
let inline pairAdd (a,b) (c,d) = a + c , b + d
let inline div (a,b) = if b = 0. then a else a / b
let strOfVar = function Strings(n) -> n | _ -> 0.
let numOfVar = function Number (var,mean, n) -> (var,mean,n) | _ -> (0.,0.,0.)
let mapAddFeature (m,col) item =
let key, n,isnum = match isDouble item with
| Some n -> string col, n, true
| None -> item, 0., false
(if Map.containsKey key m then
m.Add(key,match m.[key] with
| Strings(v) -> Strings(v + 1.)
| Number(var, mean, count) -> Number(online_variance_Mean var mean count n ))
else m.Add(key, (if isnum then Number(online_variance_Mean 0. n 1. n) else Strings(1.)))), col + 1
let mapAddClass m a x = mapAddGeneric m a (fun (_, i) -> x,i + 1.) (x,1.)
let tryGetClassProb (probabilities:Probability) dclass = mapGet probabilities dclass (Map.empty , 0.)
let trainRow (probabilities : Probability) (datum : string) =
let data, dclass = splitDataClass datum [|","|]
let conditionalFeatures, classCount = tryGetClassProb probabilities dclass
(data |> Array.fold mapAddFeature (conditionalFeatures,0)) |> fst
|> mapAddClass probabilities dclass
let trainData p data = data |> Array.fold trainRow p
let pi = System.Math.PI
let inline gaussian x mean stddev = 1./(stddev * sqrt(2. * pi)) * exp(-0.5 * ((x - mean)/stddev)**2.0)
let conditionalProbCounts cp featName col =
let condprob = fst cp
let key, x, isnum = match isDouble featName with Some x -> string col,x, true |_-> featName , 0., false
let p = if Map.containsKey key condprob then
match condprob.[key] with
| Strings v-> v
| Number (var,mean, n) -> let dof , variance = (log n) ** 2., if var = 0. then 1. else var/(n-1.)
if dof <= 30. then
let student = StudentT (mean, sqrt variance, dof) in student.Density(x)
else gaussian x mean (sqrt variance)
else 0.
if isnum then p ,0. else 0.5 + p, 1.0 + (cp |> snd)
let conditionalProb cp featName col = conditionalProbCounts cp featName col |> div
let classProbability className (p:Probability) =
p |> Map.fold(fun (n,d) k v -> let fv = snd v
if k = className then (fv + n, fv + d)
else (n, fv + d)) (0.0,0.0) |> div
let featureProb feature (p : Probability) =
p |> Map.fold (fun (pr,c) cName fdist -> (pairAdd (conditionalProbCounts fdist feature c) pr), c + 1) ((0. , 0.), 0) |> fst |> div
let naiveBayes dclass (features : string []) (p:Probability) =
let cp = tryGetClassProb p dclass
let fprob,_ = Array.fold (fun (prob, col) feat -> prob * conditionalProb cp feat col, col + 1) (1.,0) features
(classProbability dclass p) * fprob
let naiveBayesProbability dclass (features : string []) (p:Probability) =
p |> Map.fold (fun (n,d) curClass _ ->
let p = naiveBayes curClass features p
if curClass = dclass then (p, p + d) else (n , d + p)) (0.,0.) |> div
let classMap p features nb = p |> Map.fold (fun listOfProbabilities className _ -> (className, nb className features p) :: listOfProbabilities) []
let classifyProbabilities p features = classMap p features naiveBayesProbability
let classify p features = classMap p features naiveBayes |> List.maxBy snd
//////////////////
type NaiveBayesClass () =
inherit ClassifierAbstract ()
let mutable probability = Map.empty
override self.Train data =
probability <- trainData probability (dataAsStr data)
override self.Classify feature = classify probability (feature |> strArrFromDPoint)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment