Created
February 26, 2010 22:47
-
-
Save dakrone/316271 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn score-words | |
"Score a list of words linearly based on how far they are from the | |
term. Base score is optional and is 1 by default. Case sensitive." | |
([term words] | |
(score-words term words 1)) | |
([term words basescore] | |
(let [index (.indexOf words term)] | |
(if (= -1 index) | |
(map #(vector % 0) words) ; no matches | |
(map | |
(fn | |
[word] | |
(let [idx (.indexOf words word) | |
dist (abs (- index idx)) | |
score (/ basescore (+ 1 dist))] | |
(if (> dist 2) | |
(vector word 0) | |
(vector word score)))) words))))) | |
;opennlp.tools.filters=> words | |
;["bobby" "fire" "truck" "city" "department" "state" "colorado"] | |
;opennlp.tools.filters=> (score-words "truck" words) | |
;(["bobby" 1/3] ["fire" 1/2] ["truck" 1] ["city" 1/2] ["department" 1/3] ["state" 0] ["colorado" 0]) | |
;opennlp.tools.filters=> (score-words "truck" words 10) | |
;(["bobby" 10/3] ["fire" 5] ["truck" 10] ["city" 5] ["department" 10/3] ["state" 0] ["colorado" 0]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment