Skip to content

Instantly share code, notes, and snippets.

@thejefflarson
Created October 17, 2012 19:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thejefflarson/3907552 to your computer and use it in GitHub Desktop.
Save thejefflarson/3907552 to your computer and use it in GitHub Desktop.
class Query
include Frequency
attr_accessor :freq
def initialize(query)
@query = query
@tokens = tokenize
end
def clean_text
@query
end
def ids
@ids ||= @tokens.reduce(Message.corpus.index @tokens.first) do |memo, it|
memo & Message.corpus.index(it)
end.flatten.uniq
end
def docs(page)
mailings = Mailing.limited.paginate(:page => page, :per_page => 10).includes(:campaign).order('first_seen DESC').where('id in (?)', ids)
docs = Message.doc_length
scores = mailings.map do |m|
max_word = keys.map {|k| freq[k] }.max
score = @tokens.reduce(0.0) do |memo, tok|
memo += (m.freq[tok] / max_word) * Math.log(docs / (1.0 + Message.corpus.docs(tok))) if !m.freq[tok].nil?
memo
end
{:score => score, :mailing => m}
end
scores.sort_by {|s|
(s[:score] / (s[:mailing].keys.length * s[:mailing].freq.values.reduce(&:+))) * -1.0
}.map {|s| s[:mailing] }
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment