Created
September 6, 2018 02:11
-
-
Save ufcpp/1573a1a453bce1827b6b5025f79ed18a to your computer and use it in GitHub Desktop.
Latin-1 文字の Unicode Category
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://github.com/dotnet/coreclr/issues/19706 これを見て試しただけ。 | |
// ほんとに Latin-1 文字だけだった。 | |
// | |
// (§, A7, OtherSymbol, OtherPunctuation) | |
// (a, AA, LowercaseLetter, OtherLetter) | |
// (-, AD, DashPunctuation, Format) | |
// (¶, B6, OtherSymbol, OtherPunctuation) | |
// (o, BA, LowercaseLetter, OtherLetter) | |
// | |
// char の方は Unicode 4.0、CharUnicodeInfo の方は Unicode 5.0 の定義に沿ってるらしい? | |
// | |
// 日本語の ・(U+30FB, KATAKANA MIDDLE DOT)も同時期(Unicode 5.0 のとき)に変更されてるっぽいけど、 | |
// ・は普通に char と CharUnicodeInfo で結果一致するのに… | |
using System.Globalization; | |
using static System.Console; | |
static class Program | |
{ | |
static void Main() | |
{ | |
for (int i = 0; i < 0xffff; i++) | |
{ | |
var c = (char)i; | |
var c1 = char.GetUnicodeCategory(c); | |
var c2 = CharUnicodeInfo.GetUnicodeCategory(c); | |
if (c1 != c2) WriteLine((c, i.ToString("X"), c1, c2)); | |
} | |
for (int i = 0x10000; i < 0x10ffff; i++) | |
{ | |
var c = char.ConvertFromUtf32(i); | |
var c1 = char.GetUnicodeCategory(c, 0); | |
var c2 = CharUnicodeInfo.GetUnicodeCategory(c, 0); | |
if (c1 != c2) WriteLine((c, i.ToString("X"), c1, c2)); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment