Skip to content

Instantly share code, notes, and snippets.

@nebuta
Created November 8, 2011 07:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nebuta/1347248 to your computer and use it in GitHub Desktop.
Save nebuta/1347248 to your computer and use it in GitHub Desktop.
Make dictionary data after subtraction of ASCII
#subtract.rb
def normalize
norm = 65536
$vector.each_key{|key|
sqsum = $vector[key].inject(0){|sum,e| sum += e*e}
p sqsum
factor = Math.sqrt(sqsum)
$vector[key].map!{|e| e.to_f * norm / factor}
}
end
def parse(lines)
arr = Array.new(65536)
start = 0
lines.each{|line|
arr[start,256]=line.chomp.split("\t").map{|e| e.to_f}
start += 256
}
return arr
end
def print_vector
$vector.each_key{|key|
puts "Normalized sum:#{key}: " + $vector[key].inject(0){|sum,e| sum += e}.to_s
open("sub_"+key.to_s+'.txt','w'){|out|
for i in 0..255
start = i*256
row = ($vector[key][start,256])
out.puts(row.map{|e| "%.1f"%e}.join("\t"))
end
}
}
end
def main
$vector = Hash.new
$vectorascii = parse(IO.readlines("vector_ascii.txt"))
$vector[:utf8] = parse(IO.readlines("vector_utf8.txt"))
$vector[:eucjp] = parse(IO.readlines("vector_eucjp.txt"))
$vector[:iso] = parse(IO.readlines("vector_iso.txt"))
$vector[:shiftjis] = parse(IO.readlines("vector_shiftjis.txt"))
$vector.each_key{|key|
$vector[key] = $vector[key].zip($vectorascii).map{|v| v[1] != 0 ? 0 : v[0]}
}
normalize
print_vector
end
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment