Last active
November 9, 2023 00:25
-
-
Save panda728/4301d5f82bec7e9619439c07587f4381 to your computer and use it in GitHub Desktop.
はしごだかの変換ツール髙->高
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Buffers; | |
using System.Text; | |
namespace Panda728.Extensions; | |
public static class ShiftJisExtension | |
{ | |
// Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); | |
readonly static Encoding _sjis = Encoding.GetEncoding("shift-jis"); | |
const int ARRAY_POOL_LIMIT = 2048; | |
/// <summary>異字体辞書</summary> | |
readonly static (char ng, char ok)[] _canNotPrintChars = new (char, char)[] { | |
('髙','高'),('﨑','崎'),('濵','浜'),('賴','頼'),('瀨','瀬'),('德','徳'),('蓜','配'),('昻','昂'),('桒','桑'),('栁','柳'),('犾','犹'),('琪','棋'), | |
('裵','裴'),('魲','鱸'),('羽','羽'),('焏','丞'),('祥','祥'),('曻','昇'),('敎','教'),('澈','徹'),('曺','曹'),('黑','黒'),('塚','塚'),('閒','間'), | |
('彅','薙'),('匤','匡'),('冝','宜'),('埇','甬'),('鮏','鮭'),('伹','但'),('杦','杉'),('罇','樽'),('柀','披'),('﨤','返'),('寬','寛'),('神','神'), | |
('福','福'),('礼','礼'),('贒','賢'),('逸','逸'),('隆','隆'),('靑','青'),('飯','飯'),('飼','飼'),('緖','緒'),('埈','峻') | |
}; | |
// http://www13.plala.or.jp/bigdata/jis_3.html JIS第3水準漢字一覧表【全1259字】(JIS X 0213:2004) | |
// https://qiita.com/mindwood/items/3cc4fbf76caa38aa743c 環境依存文字をJIS X 0208の異字体に置換するソフトを作ってみた | |
// https://qiita.com/kasei-san/items/cfb993786153231e5413 本当は怖くないCP932 | |
// https://qiita.com/matarillo/items/91b9656428bed7a1a797 文字って何かね? | |
// https://zenn.dev/niccari/articles/9394aa721e6bb2 (整理用) unicodeにおいて、入力フォームやデータベースの文字列処理で考慮しておきたい文字群 | |
readonly static Dictionary<char, char> _canNotPrintCharsDic = _canNotPrintChars.ToDictionary(x => x.ng, x => x.ok); | |
/// <summary>印刷装置で印字不可な文字を印字可能な文字に置き換える</summary> | |
/// <remarks>例:髙->高 﨑->崎</remarks> | |
public static string ToSafeChars(this ReadOnlySpan<char> span) | |
{ | |
if (span.Length > ARRAY_POOL_LIMIT) | |
{ | |
var sb = new StringBuilder(span.Length); | |
foreach (char c in span) | |
{ | |
sb.Append(_canNotPrintCharsDic.ContainsKey(c) ? _canNotPrintCharsDic[c] : c); | |
} | |
return sb.ToString(); | |
} | |
var arrayToReturnToPool = ArrayPool<char>.Shared.Rent(span.Length); | |
try | |
{ | |
var pos = 0; | |
foreach (char c in span) | |
{ | |
arrayToReturnToPool[pos++] = _canNotPrintCharsDic.ContainsKey(c) ? _canNotPrintCharsDic[c] : c; | |
} | |
var result = new string(arrayToReturnToPool[..pos]); | |
return result; | |
} | |
catch | |
{ | |
throw; | |
} | |
finally | |
{ | |
ArrayPool<char>.Shared.Return(arrayToReturnToPool); | |
} | |
} | |
/// <summary>全角と半角の相互変換用辞書</summary> | |
readonly static (char narrow, char wide)[] _replaceChars = new (char, char)[] { | |
('0','0'),('1','1'),('2','2'),('3','3'),('4','4'),('5','5'),('6','6'),('7','7'),('8','8'),('9','9'), | |
('A','A'),('B','B'),('C','C'),('D','D'),('E','E'),('F','F'),('G','G'),('H','H'),('I','I'),('J','J'), | |
('K','K'),('L','L'),('M','M'),('N','N'),('O','O'),('P','P'),('Q','Q'),('R','R'),('S','S'),('T','T'), | |
('U','U'),('V','V'),('W','W'),('X','X'),('Y','Y'),('Z','Z'), | |
('a','a'),('b','b'),('c','c'),('d','d'),('e','e'),('f','f'),('g','g'),('h','h'),('i','i'),('j','j'), | |
('k','k'),('l','l'),('m','m'),('n','n'),('o','o'),('p','p'),('q','q'),('r','r'),('s','s'),('t','t'), | |
('u','u'),('v','v'),('w','w'),('x','x'),('y','y'),('z','z'),('!','!'), | |
('\"','”'),('#','#'),('$','$'),('%','%'),('&','&'),('\'','’'),('(','('),(')',')'),('=','='),('-','-'), | |
('^','^'),('~','~'),('\\','¥'),('|','|'),('@','@'),('`','‘'),('[','['),('{','{'),(';',';'),('+','+'), | |
(':',':'),('*','*'),(']',']'),('}','}'),(',',','),('<','<'),('.','.'),('>','>'),('?','?'), | |
('/','/'),('_','_'),(' ',' '), | |
('ア','ア'),('イ','イ'),('ウ','ウ'),('エ','エ'),('オ','オ'),('カ','カ'),('キ','キ'),('ク','ク'),('ケ','ケ'),('コ','コ'), | |
('サ','サ'),('シ','シ'),('ス','ス'),('セ','セ'),('ソ','ソ'),('タ','タ'),('チ','チ'),('ツ','ツ'),('テ','テ'),('ト','ト'), | |
('ナ','ナ'),('ニ','ニ'),('ヌ','ヌ'),('ネ','ネ'),('ノ','ノ'),('ハ','ハ'),('ヒ','ヒ'),('フ','フ'),('ヘ','ヘ'),('ホ','ホ'), | |
('マ','マ'),('ミ','ミ'),('ム','ム'),('メ','メ'),('モ','モ'),('ヤ','ヤ'),('ユ','ユ'),('ヨ','ヨ'), | |
('ラ','ラ'),('リ','リ'),('ル','ル'),('レ','レ'),('ロ','ロ'),('ワ','ワ'),('ヲ','ヲ'),('ン','ン'), | |
('ァ','ァ'),('ィ','ィ'),('ゥ','ゥ'),('ェ','ェ'),('ォ','ォ'),('ッ','ッ'),('ャ','ャ'),('ュ','ュ'),('ョ','ョ'),('゙','゛'),('゚','゜'), | |
}; | |
/// <summary>全角から半角への変換用辞書</summary> | |
readonly static (char narrow, char wide)[] _replaceCharToNarrowOnlys = new (char, char)[] { ('-', 'ー'), }; | |
readonly static Dictionary<char, char> _toWideCharDic = _replaceChars.ToDictionary(x => x.narrow, x => x.wide); | |
readonly static Dictionary<char, char> _toNarrowCharDic = _replaceChars.Concat(_replaceCharToNarrowOnlys).ToDictionary(x => x.wide, x => x.narrow); | |
/// <summary>半角が2文字になる場合の変換用辞書</summary> | |
readonly static (string narrow, string wide)[] _replaceWords = new (string, string)[] { | |
("ガ", "ガ"),("ギ", "ギ"),("グ", "グ"),("ゲ", "ゲ"),("ゴ", "ゴ"),("ザ", "ザ"),("ジ", "ジ"),("ズ", "ズ"),("ゼ", "ゼ"), | |
("ゾ", "ゾ"),("ダ", "ダ"),("ヂ", "ヂ"),("ヅ", "ヅ"),("デ", "デ"),("ド", "ド"),("バ", "バ"),("ビ", "ビ"),("ブ", "ブ"),("ベ", "ベ"),("ボ", "ボ"), | |
("パ", "パ"),("ピ", "ピ"),("プ", "プ"),("ペ", "ペ"),("ポ", "ポ"),("ヴ", "ヴ") | |
}; | |
readonly static Dictionary<string, char> _toWideWordDic = _replaceWords.ToDictionary(x => x.narrow, x => x.wide.ToArray().First()); | |
readonly static Dictionary<char, char[]> _toNarrowWordDic = _replaceWords.ToDictionary(x => x.wide.ToArray().First(), x => x.narrow.ToArray()); | |
/// <summary>全角文字に変換</summary> | |
public static string ToWide(this ReadOnlySpan<char> span) | |
{ | |
if (span.Length > ARRAY_POOL_LIMIT) | |
{ | |
var sb = new StringBuilder(span.Length * 2); | |
sb.Append(span); | |
foreach (var (narrow, wide) in _toWideWordDic) | |
{ | |
sb.Replace(narrow, $"{wide}"); | |
} | |
foreach (var (narrow, wide) in _toWideCharDic) | |
{ | |
sb.Replace(narrow, wide); | |
} | |
return sb.ToString(); | |
} | |
var arrayToReturnToPool = ArrayPool<char>.Shared.Rent(span.Length); | |
try | |
{ | |
var pos = 0; | |
for (int i = 0; i < span.Length; i++) | |
{ | |
if (i < span.Length - 1) | |
{ | |
var s = new string(span[i..(i + 2)]); | |
if (_toWideWordDic.ContainsKey(s)) | |
{ | |
arrayToReturnToPool[pos++] = _toWideWordDic[s]; | |
continue; | |
} | |
} | |
arrayToReturnToPool[pos++] = _toWideCharDic.ContainsKey(span[i]) ? _toWideCharDic[span[i]] : span[i]; | |
} | |
var result = new string(arrayToReturnToPool[..pos]); //.TrimEnd('\0'); | |
return result; | |
} | |
catch | |
{ | |
throw; | |
} | |
finally | |
{ | |
ArrayPool<char>.Shared.Return(arrayToReturnToPool); | |
} | |
} | |
/// <summary>半角文字に変換</summary> | |
public static string ToNarrow(this ReadOnlySpan<char> span) | |
{ | |
if (span.Length > ARRAY_POOL_LIMIT) | |
{ | |
var sb = new StringBuilder(span.Length * 2); | |
sb.Append(span); | |
foreach (var (narrow, wide) in _toNarrowWordDic) | |
{ | |
sb.Replace(new string(wide), $"{narrow}"); | |
} | |
foreach (var (narrow, wide) in _toNarrowCharDic) | |
{ | |
sb.Replace(wide, narrow); | |
} | |
return sb.ToString(); | |
} | |
var arrayToReturnToPool = ArrayPool<char>.Shared.Rent(span.Length * 2); | |
try | |
{ | |
var pos = 0; | |
for (int i = 0; i < span.Length; i++) | |
{ | |
if (_toNarrowWordDic.ContainsKey(span[i])) | |
{ | |
foreach (var c in _toNarrowWordDic[span[i]]) | |
{ | |
arrayToReturnToPool[pos++] = c; | |
} | |
continue; | |
} | |
arrayToReturnToPool[pos++] = _toNarrowCharDic.ContainsKey(span[i]) ? _toNarrowCharDic[span[i]] : span[i]; | |
} | |
var result = new string(arrayToReturnToPool[..pos]); | |
return result; | |
} | |
catch | |
{ | |
throw; | |
} | |
finally | |
{ | |
ArrayPool<char>.Shared.Return(arrayToReturnToPool); | |
} | |
} | |
/// <summary>ShiftJis返還後のバイト数</summary> | |
public static int GetSjisByteCount(this ReadOnlySpan<char> span) | |
=> _sjis.GetByteCount(span); | |
/// <summary>ShiftJisの文字数で切り出し</summary> | |
public static string SubstringSjisByteLength(this ReadOnlySpan<char> span, int startPos, int finishPos) | |
{ | |
if (startPos > _sjis.GetByteCount(span)) | |
{ | |
return ""; | |
} | |
var sb = new StringBuilder(span.Length); | |
int totalLength = 0; | |
foreach (char c in span) | |
{ | |
int length = _sjis.GetByteCount(new char[] { c }); | |
totalLength += length; | |
if (totalLength > startPos + finishPos) | |
break; | |
if (totalLength > startPos) | |
sb.Append(c); | |
} | |
return sb.ToString(); | |
} | |
/// <summary>ShiftJisのバイト数基準で切り出し</summary> | |
public static string[] SubstringSjisByteLength(this ReadOnlySpan<char> span, int byteLength) | |
{ | |
var result = new List<string>(); | |
int len = _sjis.GetByteCount(span); | |
int pos = 0; | |
while (pos < len) | |
{ | |
result.Add(SubstringSjisByteCount(span, pos, byteLength)); | |
pos += _sjis.GetByteCount(result[^1]); | |
} | |
return result.ToArray(); | |
} | |
/// <summary>バイトデータをShift-JISでエンコードして文字列を返す</summary> | |
public static string EncodeSjis(this ReadOnlySpan<byte> span) | |
=> _sjis.GetString(span.ToArray(), 0, span.Length); | |
/// <summary>バイトデータをShift-JISでエンコードして文字列を返す</summary> | |
public static string EncodeSjisTrimed(this ReadOnlySpan<byte> span) | |
=> _sjis.GetString(span.ToArray(), 0, span.Length).Trim(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment