Skip to content

Instantly share code, notes, and snippets.

@nebuta
Created November 8, 2011 07:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nebuta/1347254 to your computer and use it in GitHub Desktop.
Save nebuta/1347254 to your computer and use it in GitHub Desktop.
Make dictionary data with lower resolution (=smaller size)
def normalize(v, norm)
ret = Hash.new
v.each_key{|key|
sqsum = v[key].inject(0){|sum,e| sum += e*e}
factor = Math.sqrt(sqsum)
ret[key] = v[key].map{|e| e.to_f * norm / factor}
}
ret
end
def parse(lines)
arr = Array.new(65536)
start = 0
lines.each{|line|
arr[start,256]=line.chomp.split("¥t").map{|e| e.to_f}
start += 256
}
return arr
end
def print_vector(v,out,key,dim)
# puts "Normalized sum:#{key}: " + $vector[key].inject(0){|sum,e| sum += e}.to_s
for i in 0..(dim-1)
start = i*dim
row = v[key][start,dim]
out.puts(row.map{|e| "%.1f"%e}.join("¥t"))
end
end
def coarsevectors(size)
#"size" should be divisor of 256
return nil if not [1,2,4,8,16,32,64,128,256].include? size
dim = 256 / size
v = Hash.new
$vector.each_key{|key|
v[key] = Array.new(256**2)
}
$vector.each_key{|key|
(0..(dim-1)).step(1){|m|
(0..(dim-1)).step(1){|n|
sum = 0
for i in (m*size)..(m*size+size-1)
for j in (n*size)..(n*size+size-1)
e = $vector[key][i*256+j]
sum += e*e
end
end
norm = Math.sqrt(sum/(size*size))
for i in (m*size)..(m*size+size-1)
for j in (n*size)..(n*size+size-1)
v[key][i*256+j] = norm
end
end
}
}
}
v
end
def main
$vector = Hash.new
$vector[:ascii] = parse(IO.readlines("vector_ascii.txt"))
$vector[:utf8] = parse(IO.readlines("sub_utf8.txt"))
$vector[:eucjp] = parse(IO.readlines("sub_eucjp.txt"))
$vector[:iso] = parse(IO.readlines("sub_iso.txt"))
$vector[:shiftjis] = parse(IO.readlines("sub_shiftjis.txt"))
coarse = [1,2,4,8,16,32,64,128]
coarse.each{|c|
puts "coarse: #{c}"
size = 256/c
v = coarsevectors(c)
v = normalize(v, 65536)
v.each_key{|key|
out = open("sub_#{key.to_s}_c#{c}.txt",'w')
print_vector(v,out,key,256)
out.close
}
}
end
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment