Skip to content

Instantly share code, notes, and snippets.

@redwrasse
Created July 8, 2017 04:59
Show Gist options
  • Save redwrasse/8b4fc708948330e0bd8b4420ade64548 to your computer and use it in GitHub Desktop.
Save redwrasse/8b4fc708948330e0bd8b4420ade64548 to your computer and use it in GitHub Desktop.
Naive Bayes v1
package main
import (
"fmt"
"strings"
"crypto/sha1"
"encoding/base64"
)
type Document struct {
Text string
ClassLabel string
}
func (d *Document) GetClassWordCounts() map[string]int {
wordsMp := make(map[string]int)
words := strings.Split(d.Text, " ")
for _, w := range words {
wordsMp[w] += 1
}
return wordsMp
}
func ClassWordHash(classLabel string,
word string) string {
h := sha1.New()
h.Write([]byte(classLabel + word))
return base64.URLEncoding.EncodeToString(h.Sum(nil))
}
type NaiveBayes struct {
ClassLabels map[string]bool
ClassWordCounts map[string]int
ClassWordProbs map[string]float64
ClassCounts map[string]int
ClassProbs map[string]float64
}
// Update probabilities upon adding new counts
func (nv *NaiveBayes) UpdateProbs() {
sm1 := 0.0
for _, v := range nv.ClassWordCounts {
sm1 += float64(v)
}
for k, _ := range nv.ClassWordCounts {
nv.ClassWordProbs[k] = float64(nv.ClassWordCounts[k]) / sm1
}
sm2 := 0.0
for _, v := range nv.ClassCounts {
sm2 += float64(v)
}
for k, _ := range nv.ClassCounts {
nv.ClassProbs[k] = float64(nv.ClassCounts[k]) / sm2
}
}
func (nv *NaiveBayes) Add(d Document) {
newCts := d.GetClassWordCounts()
nv.ClassCounts[d.ClassLabel] += 1
nv.ClassLabels[d.ClassLabel] = true
for k, v := range newCts {
nv.ClassWordCounts[ClassWordHash(d.ClassLabel, k)] += v
}
nv.UpdateProbs()
}
func (nv *NaiveBayes) GetClassProbs(words []string) map[string]float64 {
classProbs := make(map[string]float64)
for cl, _ := range nv.ClassLabels {
classProbs[cl] = 1.0
for _, word := range words {
hashKey := ClassWordHash(cl, word)
p := nv.ClassWordProbs[hashKey]
classProbs[cl] *= p
}
classProbs[cl] *= nv.ClassProbs[cl]
}
// normalize
sm := 0.0
for _, v := range classProbs {
sm += v
}
// if each unnormalized class probability is zero, return zero for each
// otherwise normalize
if sm != 0.0 {
for k, _ := range classProbs {
classProbs[k] = classProbs[k] / sm
}
}
return classProbs
}
func main() {
nv := &NaiveBayes{}
nv.ClassLabels = make(map[string]bool)
nv.ClassWordCounts = make(map[string]int)
nv.ClassCounts = make(map[string]int)
nv.ClassProbs = make(map[string]float64)
nv.ClassWordProbs = make(map[string]float64)
d1 := Document{Text: "Foo Bar I", ClassLabel: "foo"}
d2 := Document{Text: "I Took The Bar Exam", ClassLabel: "law"}
for i := 0; i < 100; i++ {
nv.Add(d1)
}
for j := 0; j < 10; j++ {
nv.Add(d2)
}
words := []string{"Bar", "I"}
classProbs := nv.GetClassProbs(words)
for classLabel, prob := range classProbs {
fmt.Printf("(Words: %v) Class label: %s Prob: %v\n", words, classLabel, prob)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment