dakrone/foo.clj

## foo.clj

(defprotocol Searcher
  "An interface for searching"
  (score  [this term text] "Score this text in similarity")
  (rank   [this term text] "Rank sentences in this text"))

(defrecord ContextSearcher [get-sentences tokenize pos-tag])

; ... lots of impelementation details here ...

(extend-protocol Searcher ContextSearcher
  (score
    [{:keys [get-sentences tokenizer pos-tagger]} term text]
    (let [words (get-scored-terms text term get-sentences tokenizer pos-tagger)]
      (score-text text words get-sentences tokenizer)))

  (rank
    [{:keys [get-sentences tokenizer pos-tagger]} term text]
    (let [words (get-scored-terms text term get-sentences tokenizer pos-tagger)]
      (reverse (sort-by second (score-sentences text words get-sentences tokenizer))))))


(defn make-context-searcher
  "Generate a new Context Searcher using the given models. 3 models are
  required, a sentence detector model, a tokenizing model and a pos-tagging
  model."
  [smodel tmodel pmodel]
  (let [get-sentences (nlp/make-sentence-detector smodel)
        tokenizer (nlp/make-tokenizer tmodel)
        pos-tagger (nlp/make-pos-tagger pmodel)]
    (ContextSearcher. get-sentences tokenizer pos-tagger)))


;user=> (def cs (make-context-searcher "models/EnglishSD.bin.gz" "models/EnglishTok.bin.gz" "models/tag.bin.gz"))
;#'user/cs
;user=> (score cs "foo" "This is some foo text which has some foo things in it.")
;java.lang.Exception: Unable to resolve symbol: score in this context (NO_SOURCE_FILE:3)

	(defprotocol Searcher
	"An interface for searching"
	(score [this term text] "Score this text in similarity")
	(rank [this term text] "Rank sentences in this text"))

	(defrecord ContextSearcher [get-sentences tokenize pos-tag])

	; ... lots of impelementation details here ...

	(extend-protocol Searcher ContextSearcher
	(score
	[{:keys [get-sentences tokenizer pos-tagger]} term text]
	(let [words (get-scored-terms text term get-sentences tokenizer pos-tagger)]
	(score-text text words get-sentences tokenizer)))

	(rank
	[{:keys [get-sentences tokenizer pos-tagger]} term text]
	(let [words (get-scored-terms text term get-sentences tokenizer pos-tagger)]
	(reverse (sort-by second (score-sentences text words get-sentences tokenizer))))))


	(defn make-context-searcher
	"Generate a new Context Searcher using the given models. 3 models are
	required, a sentence detector model, a tokenizing model and a pos-tagging
	model."
	[smodel tmodel pmodel]
	(let [get-sentences (nlp/make-sentence-detector smodel)
	tokenizer (nlp/make-tokenizer tmodel)
	pos-tagger (nlp/make-pos-tagger pmodel)]
	(ContextSearcher. get-sentences tokenizer pos-tagger)))


	;user=> (def cs (make-context-searcher "models/EnglishSD.bin.gz" "models/EnglishTok.bin.gz" "models/tag.bin.gz"))
	;#'user/cs
	;user=> (score cs "foo" "This is some foo text which has some foo things in it.")
	;java.lang.Exception: Unable to resolve symbol: score in this context (NO_SOURCE_FILE:3)