Last active
February 3, 2018 05:11
-
-
Save ufcpp/5b2cf9a9bf7d0b8743714a0b88f7edc5 to your computer and use it in GitHub Desktop.
「Unicode」
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Collections.Generic; | |
using System.Globalization; | |
using System.IO; | |
using System.Text; | |
class Program | |
{ | |
static readonly Encoding utf8 = Encoding.UTF8; | |
static void Main() | |
{ | |
using (var w = new StreamWriter("puncpair.txt", false, Encoding.UTF8)) | |
{ | |
foreach (var (o, c) in OpenClosePunctuationPair()) | |
{ | |
w.Write(o); | |
w.Write(c); | |
} | |
} | |
} | |
private static IEnumerable<(char open, char close)> OpenClosePunctuationPair() | |
{ | |
var open = '\0'; | |
for (int i = 0; i < 0x10000; i++) | |
{ | |
var c = (char)i; | |
var cat = char.GetUnicodeCategory(c); | |
switch (cat) | |
{ | |
case UnicodeCategory.InitialQuotePunctuation: | |
case UnicodeCategory.OpenPunctuation: | |
open = c; | |
break; | |
case UnicodeCategory.FinalQuotePunctuation: | |
case UnicodeCategory.ClosePunctuation: | |
// 全角英数・半角カナ除外 (removes Full-witdh Alphanumerics and Half-width Katakana.) | |
var d = c.ToString().Normalize(NormalizationForm.FormKD)[0]; | |
if (c != d) break; | |
// たいてい、ペアになってる括弧・引用符は隣り合ってるっぽい | |
// (In usual, open-close pairs are adjucent in Unicode code point. | |
if (i - open != 1) break; | |
yield return (open, c); | |
break; | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment