Created
April 16, 2011 06:25
MySQL 5.5.11 unicode_ci で同一視される文字
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/ruby | |
# -*- coding: utf-8 -*- | |
require 'cgi' | |
filename, = ARGV # path of file `ctype-uca.c' | |
ctype_uca = File.read(filename) | |
ctype_uca =~ /^uchar uca_length\[256\]={\n(.*?)};$/m or raise 'uca_length not found' | |
uca_length = $1.gsub(/\s/,'').split(/,/).map(&:to_i) | |
uca = Hash.new{|h,k| h[k] = []} | |
ctype_uca.scan(/^uint16 page([0-9A-F]+)data\[\]= {.*?\n(.*?)};$/m) do |page, data| | |
page = page.hex | |
data.gsub(/\s/,'').split(/,/).each_slice(uca_length[page]).each_with_index do |data, i| | |
data.reject!{|d| d == '0x0000'} | |
next if data.empty? | |
data = data.join(',') | |
code = page * 256 + i | |
uca[data].push code | |
end | |
end | |
puts <<EOS | |
<html> | |
<head> | |
<style type="text/css"> | |
body {font-family: monospace} | |
table {border-collapse: collapse; empty-cells: show} | |
th,td {border: 1px solid} | |
</style> | |
<title>MySQL 5.5.11 unicode_ci で同一視される文字</title> | |
</head> | |
<body> | |
<h1>MySQL 5.5.11 unicode_ci で同一視される文字</h1> | |
EOS | |
uca.sort_by{|k,v| v}.each do |data, code_list| | |
next if code_list.size == 1 | |
row1 = '' | |
row2 = '' | |
puts '<table>' | |
code_list.each do |code| | |
uni = "%04X" % code | |
chr = eval("\"\\u#{uni}\"") | |
row1.concat "<td>#{uni}</td>" | |
row2.concat "<td>#{CGI.escapeHTML chr}</td>" | |
end | |
puts '<tr><th>Unicode</th>', row1, '</tr>' | |
puts '<tr><th>Character</th>', row2, '</tr>' | |
puts '</table><br>' | |
end | |
puts <<EOS | |
</body> | |
</html> | |
EOS |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment