Skip to content

Instantly share code, notes, and snippets.

@dakrone
Created May 19, 2010 03:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dakrone/405912 to your computer and use it in GitHub Desktop.
Save dakrone/405912 to your computer and use it in GitHub Desktop.
(ns clomoios.seededcontextsearcher
(:use [clomoios.core :as core])
(:require [opennlp.nlp :as nlp]))
(defprotocol SeededSearcher
"An interface for searching using seeded text"
(add-seed [this seedtext] "Add seed text to this searcher")
(add-score-words [this words] "Add score words to this searcher")
(score-words [this term] "Get the computer score words for a given term"
[this term text] "Get the computed score words for a given term and text")
(score [this term text] "Score this text in similarity")
(rank [this term text] "Rank sentences in this text"))
(defrecord SeededContextSearcher [seeded-score-words seeded-text get-sentences tokenize pos-tag])
(extend-protocol SeededSearcher SeededContextSearcher
(add-seed
[this seedtext]
(let [get-sentences (:get-sentences this)
tokenizer (:tokenize this)
pos-tagger (:pos-tag this)
seeded-text (:seeded-text this)]
(swap! seeded-text concat [seedtext])))
(add-score-words
[this words]
(let [seeded-score-words (:seeded-score-words this)]
(swap! seeded-score-words merge words)))
(score-words
[this term]
(get-terms this term))
(score-words
[this term text]
(get-terms this term text))
(score
[this term text]
(let [get-sentences (:get-sentences this)
tokenizer (:tokenize this)]
(core/score-text text (get-terms this term text) get-sentences tokenizer)))
(rank
[this term text]
(let [get-sentences (:get-sentences this)
tokenizer (:tokenize this)]
(reverse (sort-by second (core/score-sentences text (get-terms this term text) get-sentences tokenizer))))))
;user=> (use 'clomoios.seededcontextsearcher)
;nil
;user=> (def scs (make-seeded-context-searcher "models/EnglishSD.bin.gz" "models/EnglishTok.bin.gz" "models/tag.bin.gz"))
;#'user/scs
;user=> scs
;#:clomoios.seededcontextsearcher.SeededContextSearcher{:seeded-score-words #<Atom@7b99f8e6: {}>, :seeded-text #<Atom@52fc9d2b: []>, :get-sentences #<nlp$make_sentence_detector__480$sentenizer__481 opennlp.nlp$make_sentence_detector__480$sentenizer__481@685f1ba8>, :tokenize #<nlp$make_tokenizer__484$tokenizer__485 opennlp.nlp$make_tokenizer__484$tokenizer__485@79f7abae>, :pos-tag #<nlp$make_pos_tagger__490$pos_tagger__491 opennlp.nlp$make_pos_tagger__490$pos_tagger__491@628d2280>}
;user=> (score-words scs "test")
;java.lang.IllegalArgumentException: Wrong number of args passed to: seededcontextsearcher$eval--699$fn (NO_SOURCE_FILE:0)
;user=> (score-words scs "test" "test")
;{"test" 1}
;user=> (score-words scs "test")
;java.lang.IllegalArgumentException: Wrong number of args passed to: seededcontextsearcher$eval--699$fn (NO_SOURCE_FILE:0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment