Skip to content

Instantly share code, notes, and snippets.

@ufcpp
Last active February 3, 2018 05:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ufcpp/5b2cf9a9bf7d0b8743714a0b88f7edc5 to your computer and use it in GitHub Desktop.
Save ufcpp/5b2cf9a9bf7d0b8743714a0b88f7edc5 to your computer and use it in GitHub Desktop.
「Unicode」
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Text;
class Program
{
static readonly Encoding utf8 = Encoding.UTF8;
static void Main()
{
using (var w = new StreamWriter("puncpair.txt", false, Encoding.UTF8))
{
foreach (var (o, c) in OpenClosePunctuationPair())
{
w.Write(o);
w.Write(c);
}
}
}
private static IEnumerable<(char open, char close)> OpenClosePunctuationPair()
{
var open = '\0';
for (int i = 0; i < 0x10000; i++)
{
var c = (char)i;
var cat = char.GetUnicodeCategory(c);
switch (cat)
{
case UnicodeCategory.InitialQuotePunctuation:
case UnicodeCategory.OpenPunctuation:
open = c;
break;
case UnicodeCategory.FinalQuotePunctuation:
case UnicodeCategory.ClosePunctuation:
// 全角英数・半角カナ除外 (removes Full-witdh Alphanumerics and Half-width Katakana.)
var d = c.ToString().Normalize(NormalizationForm.FormKD)[0];
if (c != d) break;
// たいてい、ペアになってる括弧・引用符は隣り合ってるっぽい
// (In usual, open-close pairs are adjucent in Unicode code point.
if (i - open != 1) break;
yield return (open, c);
break;
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment