Created
December 14, 2018 12:03
-
-
Save malisetti/9a173e4b87a0341deac227ca6ce57a19 to your computer and use it in GitHub Desktop.
document distance using vector space model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"fmt" | |
"math" | |
"strings" | |
) | |
// document distance using vector dot product | |
func main() { | |
// make map of words with occurances | |
// use . product on these maps | |
// use the angle as the basis of comparision | |
str1 := "hello world cat hello hello hello world cat" | |
str2 := "hello world cat hello hello world cat hello" | |
wc1 := countWords(str1) | |
wc2 := countWords(str2) | |
var val1 float64 | |
var div1 float64 | |
var div2 float64 | |
for w, p1 := range wc1 { | |
div1 += p1 * p1 | |
if p2, ok := wc2[w]; ok { | |
val1 += float64(p1 * p2) | |
div2 += p2 * p2 | |
} | |
} | |
val1 = val1 / (math.Sqrt(div1) * math.Sqrt(div2)) | |
fmt.Println(val1) | |
fmt.Println(57.296, math.Acos(val1), "degrees") | |
} | |
func countWords(str string) (wordCount map[string]float64) { | |
wordCount = make(map[string]float64) | |
scanner := bufio.NewScanner(strings.NewReader(str)) | |
scanner.Split(bufio.ScanWords) | |
position := 0 | |
for scanner.Scan() { | |
wordCount[scanner.Text()] += float64(1 + position) | |
position++ | |
} | |
return wordCount | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment