Last active
March 17, 2023 17:40
-
-
Save rhysd/f61494366357fd1c07cf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# encoding: utf-8 | |
# | |
# 絵文字の内部符号とそれに対応する絵文字の意味を日本語で抜き出すスクリプト | |
require 'open-uri' | |
require 'pp' | |
URLs = %w( | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%bc%ab%c1%b3%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%c0%b1%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%bf%a2%ca%aa%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%bf%a9%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%b9%d4%bb%f6%a1%a6%ca%b8%b2%bd%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%b8%e4%b3%da%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%a5%b9%a5%dd%a1%bc%a5%c4%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%b7%fa%c3%db%ca%aa%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%c6%b0%ca%aa%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%c6%b0%ca%aa%ca%d42%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%bf%cd%c2%ce%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%bf%cd%b4%d6%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%c9%fe%be%fe%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%b4%ef%ca%aa%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%b4%ef%ca%aa%ca%d42%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%a5%aa%a5%d5%a5%a3%a5%b9%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%a5%cf%a1%bc%a5%c8%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%c6%c3%bc%ec%b5%ad%b9%e6 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%bb%fe%b7%d7%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%c6%c3%c4%ea%ca%b8%b2%bd%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%b4%e9%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%b8%f2%c4%cc%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%c9%b8%bc%b1%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%a5%c8%a5%e9%a5%f3%a5%d7%ca%d4%29 | |
http://seesaawiki.jp/w/qvarie/d/%a5%e6%a5%cb%a5%b3%a1%bc%a5%c96.0%b0%ca%b9%df%a4%c7%bb%c8%cd%d1%a4%c7%a4%ad%a4%eb%b3%a8%ca%b8%bb%fa%28%b4%e9%ca%d42%29 | |
) | |
RE = %r(<td>ユニコード</td><td>U\+([[:xdigit:]]+)</td>) | |
SEPARATOR = %r([[:upper:] -]+ - .*【&#\d+;】) | |
IMIs = [%r(^<li>意味[:;]?\s*(.+)</li>), %r(<h4 id="[^"]+".* - (.*)【.+】)] | |
puts "JPMap = {" | |
URLs.each do |u| | |
open(u).read.encode('utf-8').each_line.slice_before(SEPARATOR). | |
select{|c| c.find{|l| l =~ RE}. | |
select{|c| c.find{|l| IMIs.any?{|i| l =~ i}}}. | |
map do |c| | |
m1 = c.find{|l| l =~ RE}.match(RE) | |
m2 = nil | |
IMIs.each do |i| | |
if d = c.find{|l| l =~ i} | |
m2 = d.match(i) | |
end | |
end | |
if m2 | |
[m1[1], m2[1]] | |
else | |
nil | |
end | |
end.compact.sort_by(&:first).uniq(&:first). | |
each do |code, desc| | |
puts " 0x#{code} => '#{desc}'," | |
end | |
end | |
puts "}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment