Created
May 19, 2010 03:22
-
-
Save dakrone/405912 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns clomoios.seededcontextsearcher | |
(:use [clomoios.core :as core]) | |
(:require [opennlp.nlp :as nlp])) | |
(defprotocol SeededSearcher | |
"An interface for searching using seeded text" | |
(add-seed [this seedtext] "Add seed text to this searcher") | |
(add-score-words [this words] "Add score words to this searcher") | |
(score-words [this term] "Get the computer score words for a given term" | |
[this term text] "Get the computed score words for a given term and text") | |
(score [this term text] "Score this text in similarity") | |
(rank [this term text] "Rank sentences in this text")) | |
(defrecord SeededContextSearcher [seeded-score-words seeded-text get-sentences tokenize pos-tag]) | |
(extend-protocol SeededSearcher SeededContextSearcher | |
(add-seed | |
[this seedtext] | |
(let [get-sentences (:get-sentences this) | |
tokenizer (:tokenize this) | |
pos-tagger (:pos-tag this) | |
seeded-text (:seeded-text this)] | |
(swap! seeded-text concat [seedtext]))) | |
(add-score-words | |
[this words] | |
(let [seeded-score-words (:seeded-score-words this)] | |
(swap! seeded-score-words merge words))) | |
(score-words | |
[this term] | |
(get-terms this term)) | |
(score-words | |
[this term text] | |
(get-terms this term text)) | |
(score | |
[this term text] | |
(let [get-sentences (:get-sentences this) | |
tokenizer (:tokenize this)] | |
(core/score-text text (get-terms this term text) get-sentences tokenizer))) | |
(rank | |
[this term text] | |
(let [get-sentences (:get-sentences this) | |
tokenizer (:tokenize this)] | |
(reverse (sort-by second (core/score-sentences text (get-terms this term text) get-sentences tokenizer)))))) | |
;user=> (use 'clomoios.seededcontextsearcher) | |
;nil | |
;user=> (def scs (make-seeded-context-searcher "models/EnglishSD.bin.gz" "models/EnglishTok.bin.gz" "models/tag.bin.gz")) | |
;#'user/scs | |
;user=> scs | |
;#:clomoios.seededcontextsearcher.SeededContextSearcher{:seeded-score-words #<Atom@7b99f8e6: {}>, :seeded-text #<Atom@52fc9d2b: []>, :get-sentences #<nlp$make_sentence_detector__480$sentenizer__481 opennlp.nlp$make_sentence_detector__480$sentenizer__481@685f1ba8>, :tokenize #<nlp$make_tokenizer__484$tokenizer__485 opennlp.nlp$make_tokenizer__484$tokenizer__485@79f7abae>, :pos-tag #<nlp$make_pos_tagger__490$pos_tagger__491 opennlp.nlp$make_pos_tagger__490$pos_tagger__491@628d2280>} | |
;user=> (score-words scs "test") | |
;java.lang.IllegalArgumentException: Wrong number of args passed to: seededcontextsearcher$eval--699$fn (NO_SOURCE_FILE:0) | |
;user=> (score-words scs "test" "test") | |
;{"test" 1} | |
;user=> (score-words scs "test") | |
;java.lang.IllegalArgumentException: Wrong number of args passed to: seededcontextsearcher$eval--699$fn (NO_SOURCE_FILE:0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment