Skip to content

Instantly share code, notes, and snippets.

@kddnewton
Created November 28, 2023 17:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kddnewton/089d23d49adb5551792293fdb5bf64a0 to your computer and use it in GitHub Desktop.
Save kddnewton/089d23d49adb5551792293fdb5bf64a0 to your computer and use it in GitHub Desktop.
Maybe encoding bug?
# frozen_string_literal: true
class Context < BasicObject
def method_missing(name, *) = :identifier
def self.const_missing(name) = :constant
end
$context = Context.new
def evaluate(encoding, codepoint)
character = codepoint.chr(encoding)
if character.match?(/[[:lower:]]/)
type = $context.instance_eval("# encoding: #{encoding.name}\n#{character}")
puts "#{encoding.name} 0x#{codepoint.to_s(16)}" if type != :identifier
elsif character.match?(/[[:upper:]]/)
type = $context.instance_eval("# encoding: #{encoding.name}\n#{character}")
puts "#{encoding.name} 0x#{codepoint.to_s(16)}" if type != :constant
end
end
encodings_7bits = [
Encoding::US_ASCII
]
encodings_7bits.each do |encoding|
(0..0x7F).each do |codepoint|
evaluate(encoding, codepoint)
end
end
encodings_1byte = [
Encoding::ASCII_8BIT,
Encoding::ISO8859_1,
Encoding::ISO8859_2,
Encoding::ISO8859_3,
Encoding::ISO8859_4,
Encoding::ISO8859_5,
Encoding::ISO8859_6,
Encoding::ISO8859_7,
Encoding::ISO8859_8,
Encoding::ISO8859_9,
Encoding::ISO8859_10,
Encoding::ISO8859_11,
Encoding::ISO8859_13,
Encoding::ISO8859_14,
Encoding::ISO8859_15,
Encoding::ISO8859_16,
Encoding::CP878,
Encoding::KOI8_U,
Encoding::WINDOWS_1250,
Encoding::WINDOWS_1251,
Encoding::WINDOWS_1252,
Encoding::WINDOWS_1253,
Encoding::WINDOWS_1254,
Encoding::WINDOWS_1257,
Encoding::CP437,
Encoding::CP720,
Encoding::CP737,
Encoding::CP775,
Encoding::IBM850,
Encoding::IBM852,
Encoding::CP852,
Encoding::IBM855,
Encoding::CP855,
Encoding::CP857,
Encoding::CP860,
Encoding::CP861,
Encoding::CP862,
Encoding::CP863,
Encoding::CP864,
Encoding::CP865,
Encoding::CP866,
Encoding::CP869,
Encoding::CP1258,
Encoding::GB1988,
Encoding::MACCENTEURO,
Encoding::MACCROATIAN,
Encoding::MACCYRILLIC,
Encoding::MACGREEK,
Encoding::MACICELAND,
Encoding::MACROMAN,
Encoding::MACROMANIA,
Encoding::MACTHAI,
Encoding::MACTURKISH,
Encoding::MACUKRAINE,
Encoding::Windows_1256,
Encoding::WINDOWS_1255,
Encoding::TIS_620,
Encoding::Windows_874
]
encodings_1byte.each do |encoding|
(0..0xFF).each do |codepoint|
evaluate(encoding, codepoint)
end
end
encodings_2byte = [
Encoding::BIG5,
Encoding::BIG5_HKSCS_2008,
Encoding::BIG5_UAO,
Encoding::EUCKR,
Encoding::EUCTW,
Encoding::SHIFT_JIS,
Encoding::SJIS_DOCOMO,
Encoding::SJIS_KDDI,
Encoding::SJIS_SOFTBANK,
Encoding::WINDOWS_31J,
Encoding::CP949,
Encoding::GB18030,
Encoding::CP936,
Encoding::CP950,
Encoding::CP951,
Encoding::EUCCN,
Encoding::GB12345,
Encoding::MACJAPANESE
]
encodings_2byte.each do |encoding|
(0..0xFFFF).each do |codepoint|
evaluate(encoding, codepoint)
rescue RangeError
end
end
encodings_3byte = [
Encoding::EUCJP,
Encoding::EUC_JP_MS,
Encoding::EUC_JISX0213,
Encoding::CP51932
]
encodings_3byte.each do |encoding|
(0..0xFFFFFF).each do |codepoint|
evaluate(encoding, codepoint)
rescue RangeError
end
end
encodings_unicode = [
Encoding::UTF_8,
Encoding::UTF8_MAC,
Encoding::UTF8_DoCoMo,
Encoding::UTF8_KDDI,
Encoding::UTF8_SoftBank,
Encoding::CP65001,
Encoding::CESU_8
]
encodings_unicode.each do |encoding|
(0...0xD800).each do |codepoint|
evaluate(encoding, codepoint)
end
(0xE000..0x10FFFF).each do |codepoint|
evaluate(encoding, codepoint)
end
end
encodings_4byte = [
Encoding::EMACS_MULE,
Encoding::STATELESS_ISO_2022_JP,
Encoding::STATELESS_ISO_2022_JP_KDDI,
]
encodings_4byte.each do |encoding|
(0..0xFFFFFFFF).each do |codepoint|
evaluate(encoding, codepoint)
rescue RangeError
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment