edbond (owner)

Revisions

  • a10241 edbond Thu Apr 09 02:45:16 -0700 2009
  • 2c86ee edbond Thu Apr 09 02:26:41 -0700 2009
  • 12d2e0 edbond Tue Apr 07 07:06:07 -0700 2009
  • 501a8b edbond Tue Apr 07 06:30:09 -0700 2009
gist: 91234 Download_button fork
public
Public Clone URL: git://gist.github.com/91234.git
Embed All Files: show embed
query.rb #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/ruby
require 'rubygems'
require 'activesupport'
require 'couchrest'
 
$KCODE='u'
require 'jcode'
 
NGRAM_SIZE=3
@db = CouchRest.database("http://localhost:5984/geo_#{NGRAM_SIZE}")
 
terms=Hash.new(0.0)
word = (ARGV[0] || 'кришатик').mb_chars
puts word.to_s.inspect
 
(0..word.size-3).each do |i|
  s=word.slice(i,3)
  rows = @db.view('z/trgm', :key => s)['rows']
  next if rows.empty?
  rows.each do |r|
    k=r["value"]
    terms[k]+=1
  end
end
 
# normalize by length
terms.keys.each do |k|
  d = k.mb_chars.size-word.size
  d *= -1 if d<0
  next if d.zero?
 
  terms[k] /= d.to_f
end
 
puts terms.sort{|a,b| (a[1]<=>b[1])}[-4..-1].inspect
puts terms.sort{|a,b| a[1] <=> b[1]}.last.inspect
 
trigrams #
1
2
3
4
5
6
7
8
9
10
11
12
13
// output
// "abc" -> ["cabc", "abck", "jabcr"]
 
// map
function(doc) {
  var l=doc.title.length;
  for(var i=0; i<(l-2); i++) {
    var s=doc.title.substr(i,3);
    emit(s, doc.title);
  };
}
 
// reduce?