infused (owner)

Revisions

gist: 117022 Download_button fork
public
Public Clone URL: git://gist.github.com/117022.git
Embed All Files: show embed
soundex2.rb #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Soundex.rb
#
# Implementation of the soundex algorithm as described by Knuth
# in volume 3 of The Art of Computer Programming
#
# author: Michael Neumann (neumann@s-direktnet.de)
# version: 1.0
# date: 26.07.2000
# license: GNU GPL
#
 
 
module Text
module Soundex
 
  def soundex(str_or_arr)
    case str_or_arr
    when String
      soundex_str(str_or_arr)
    when Array
      str_or_arr.collect{|ele| soundex_str(ele)}
    else
      nil
    end
  end
  module_function :soundex
  
  private
 
  #
  # returns nil if the value couldn't be calculated (empty-string, wrong-character)
  # do not change the parameter "str"
  #
  def soundex_str(str)
    return nil if str.empty?
 
    str = str.upcase
    last_code = get_code(str[0,1])
    soundex_code = str[0,1]
 
    for index in 1...(str.size) do
      return soundex_code if soundex_code.size == 4
 
      code = get_code(str[index,1])
      
      if code == "0" then
        last_code = nil
      elsif code == nil then
        return nil
      elsif code != last_code then
        soundex_code += code
        last_code = code
      end
    end # for
    
    return soundex_code + "000"[0,4-soundex_code.size]
  end
  module_function :soundex_str
            
  def get_code(char)
    char.tr! "AEIOUYWHBPFVCSKGJQXZDTLMNR", "00000000111122222222334556"
  end
  module_function :get_code
 
end # module Soundex
end # module Text
 
 
 
 
 
 
#
# test-program
#
if __FILE__ == $0
  testvec = "Euler Ellery Gauss Ghosh Hilbert Heilbronn Knuth Kant Lloyd Ladd Lukasiewicz Lissajous".split(" ")
  resvec = "E460 E460 G200 G200 H416 H416 K530 K530 L300 L300 L222 L222".split(" ")
   
  wrong = 0
  
  testvec.each_with_index {|str,i|
    res = Text::Soundex.soundex(str)
    print "#{str.ljust(11)} => #{res} ... "
    if res == resvec[i] then
      puts "ok"
    else
      wrong += 1
      puts "failed, had to be #{resvec[i]}"
    end
  }
  puts "summary: #{wrong} of #{testvec.size} tests failed!"
  readline
end