Skip to content

Instantly share code, notes, and snippets.

@panda728
Last active November 9, 2023 00:25
Show Gist options
  • Save panda728/4301d5f82bec7e9619439c07587f4381 to your computer and use it in GitHub Desktop.
Save panda728/4301d5f82bec7e9619439c07587f4381 to your computer and use it in GitHub Desktop.
はしごだかの変換ツール髙->高
using System.Buffers;
using System.Text;
namespace Panda728.Extensions;
public static class ShiftJisExtension
{
// Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
readonly static Encoding _sjis = Encoding.GetEncoding("shift-jis");
const int ARRAY_POOL_LIMIT = 2048;
/// <summary>異字体辞書</summary>
readonly static (char ng, char ok)[] _canNotPrintChars = new (char, char)[] {
('髙','高'),('﨑','崎'),('濵','浜'),('賴','頼'),('瀨','瀬'),('德','徳'),('蓜','配'),('昻','昂'),('桒','桑'),('栁','柳'),('犾','犹'),('琪','棋'),
('裵','裴'),('魲','鱸'),('羽','羽'),('焏','丞'),('祥','祥'),('曻','昇'),('敎','教'),('澈','徹'),('曺','曹'),('黑','黒'),('塚','塚'),('閒','間'),
('彅','薙'),('匤','匡'),('冝','宜'),('埇','甬'),('鮏','鮭'),('伹','但'),('杦','杉'),('罇','樽'),('柀','披'),('﨤','返'),('寬','寛'),('神','神'),
('福','福'),('礼','礼'),('贒','賢'),('逸','逸'),('隆','隆'),('靑','青'),('飯','飯'),('飼','飼'),('緖','緒'),('埈','峻')
};
// http://www13.plala.or.jp/bigdata/jis_3.html JIS第3水準漢字一覧表【全1259字】(JIS X 0213:2004)
// https://qiita.com/mindwood/items/3cc4fbf76caa38aa743c 環境依存文字をJIS X 0208の異字体に置換するソフトを作ってみた
// https://qiita.com/kasei-san/items/cfb993786153231e5413 本当は怖くないCP932
// https://qiita.com/matarillo/items/91b9656428bed7a1a797 文字って何かね?
// https://zenn.dev/niccari/articles/9394aa721e6bb2 (整理用) unicodeにおいて、入力フォームやデータベースの文字列処理で考慮しておきたい文字群
readonly static Dictionary<char, char> _canNotPrintCharsDic = _canNotPrintChars.ToDictionary(x => x.ng, x => x.ok);
/// <summary>印刷装置で印字不可な文字を印字可能な文字に置き換える</summary>
/// <remarks>例:髙->高 﨑->崎</remarks>
public static string ToSafeChars(this ReadOnlySpan<char> span)
{
if (span.Length > ARRAY_POOL_LIMIT)
{
var sb = new StringBuilder(span.Length);
foreach (char c in span)
{
sb.Append(_canNotPrintCharsDic.ContainsKey(c) ? _canNotPrintCharsDic[c] : c);
}
return sb.ToString();
}
var arrayToReturnToPool = ArrayPool<char>.Shared.Rent(span.Length);
try
{
var pos = 0;
foreach (char c in span)
{
arrayToReturnToPool[pos++] = _canNotPrintCharsDic.ContainsKey(c) ? _canNotPrintCharsDic[c] : c;
}
var result = new string(arrayToReturnToPool[..pos]);
return result;
}
catch
{
throw;
}
finally
{
ArrayPool<char>.Shared.Return(arrayToReturnToPool);
}
}
/// <summary>全角と半角の相互変換用辞書</summary>
readonly static (char narrow, char wide)[] _replaceChars = new (char, char)[] {
('0','0'),('1','1'),('2','2'),('3','3'),('4','4'),('5','5'),('6','6'),('7','7'),('8','8'),('9','9'),
('A','A'),('B','B'),('C','C'),('D','D'),('E','E'),('F','F'),('G','G'),('H','H'),('I','I'),('J','J'),
('K','K'),('L','L'),('M','M'),('N','N'),('O','O'),('P','P'),('Q','Q'),('R','R'),('S','S'),('T','T'),
('U','U'),('V','V'),('W','W'),('X','X'),('Y','Y'),('Z','Z'),
('a','a'),('b','b'),('c','c'),('d','d'),('e','e'),('f','f'),('g','g'),('h','h'),('i','i'),('j','j'),
('k','k'),('l','l'),('m','m'),('n','n'),('o','o'),('p','p'),('q','q'),('r','r'),('s','s'),('t','t'),
('u','u'),('v','v'),('w','w'),('x','x'),('y','y'),('z','z'),('!','!'),
('\"','”'),('#','#'),('$','$'),('%','%'),('&','&'),('\'','’'),('(','('),(')',')'),('=','='),('-','-'),
('^','^'),('~','~'),('\\','¥'),('|','|'),('@','@'),('`','‘'),('[','['),('{','{'),(';',';'),('+','+'),
(':',':'),('*','*'),(']',']'),('}','}'),(',',','),('<','<'),('.','.'),('>','>'),('?','?'),
('/','/'),('_','_'),(' ',' '),
('ア','ア'),('イ','イ'),('ウ','ウ'),('エ','エ'),('オ','オ'),('カ','カ'),('キ','キ'),('ク','ク'),('ケ','ケ'),('コ','コ'),
('サ','サ'),('シ','シ'),('ス','ス'),('セ','セ'),('ソ','ソ'),('タ','タ'),('チ','チ'),('ツ','ツ'),('テ','テ'),('ト','ト'),
('ナ','ナ'),('ニ','ニ'),('ヌ','ヌ'),('ネ','ネ'),('ノ','ノ'),('ハ','ハ'),('ヒ','ヒ'),('フ','フ'),('ヘ','ヘ'),('ホ','ホ'),
('マ','マ'),('ミ','ミ'),('ム','ム'),('メ','メ'),('モ','モ'),('ヤ','ヤ'),('ユ','ユ'),('ヨ','ヨ'),
('ラ','ラ'),('リ','リ'),('ル','ル'),('レ','レ'),('ロ','ロ'),('ワ','ワ'),('ヲ','ヲ'),('ン','ン'),
('ァ','ァ'),('ィ','ィ'),('ゥ','ゥ'),('ェ','ェ'),('ォ','ォ'),('ッ','ッ'),('ャ','ャ'),('ュ','ュ'),('ョ','ョ'),('゙','゛'),('゚','゜'),
};
/// <summary>全角から半角への変換用辞書</summary>
readonly static (char narrow, char wide)[] _replaceCharToNarrowOnlys = new (char, char)[] { ('-', 'ー'), };
readonly static Dictionary<char, char> _toWideCharDic = _replaceChars.ToDictionary(x => x.narrow, x => x.wide);
readonly static Dictionary<char, char> _toNarrowCharDic = _replaceChars.Concat(_replaceCharToNarrowOnlys).ToDictionary(x => x.wide, x => x.narrow);
/// <summary>半角が2文字になる場合の変換用辞書</summary>
readonly static (string narrow, string wide)[] _replaceWords = new (string, string)[] {
("ガ", "ガ"),("ギ", "ギ"),("グ", "グ"),("ゲ", "ゲ"),("ゴ", "ゴ"),("ザ", "ザ"),("ジ", "ジ"),("ズ", "ズ"),("ゼ", "ゼ"),
("ゾ", "ゾ"),("ダ", "ダ"),("ヂ", "ヂ"),("ヅ", "ヅ"),("デ", "デ"),("ド", "ド"),("バ", "バ"),("ビ", "ビ"),("ブ", "ブ"),("ベ", "ベ"),("ボ", "ボ"),
("パ", "パ"),("ピ", "ピ"),("プ", "プ"),("ペ", "ペ"),("ポ", "ポ"),("ヴ", "ヴ")
};
readonly static Dictionary<string, char> _toWideWordDic = _replaceWords.ToDictionary(x => x.narrow, x => x.wide.ToArray().First());
readonly static Dictionary<char, char[]> _toNarrowWordDic = _replaceWords.ToDictionary(x => x.wide.ToArray().First(), x => x.narrow.ToArray());
/// <summary>全角文字に変換</summary>
public static string ToWide(this ReadOnlySpan<char> span)
{
if (span.Length > ARRAY_POOL_LIMIT)
{
var sb = new StringBuilder(span.Length * 2);
sb.Append(span);
foreach (var (narrow, wide) in _toWideWordDic)
{
sb.Replace(narrow, $"{wide}");
}
foreach (var (narrow, wide) in _toWideCharDic)
{
sb.Replace(narrow, wide);
}
return sb.ToString();
}
var arrayToReturnToPool = ArrayPool<char>.Shared.Rent(span.Length);
try
{
var pos = 0;
for (int i = 0; i < span.Length; i++)
{
if (i < span.Length - 1)
{
var s = new string(span[i..(i + 2)]);
if (_toWideWordDic.ContainsKey(s))
{
arrayToReturnToPool[pos++] = _toWideWordDic[s];
continue;
}
}
arrayToReturnToPool[pos++] = _toWideCharDic.ContainsKey(span[i]) ? _toWideCharDic[span[i]] : span[i];
}
var result = new string(arrayToReturnToPool[..pos]); //.TrimEnd('\0');
return result;
}
catch
{
throw;
}
finally
{
ArrayPool<char>.Shared.Return(arrayToReturnToPool);
}
}
/// <summary>半角文字に変換</summary>
public static string ToNarrow(this ReadOnlySpan<char> span)
{
if (span.Length > ARRAY_POOL_LIMIT)
{
var sb = new StringBuilder(span.Length * 2);
sb.Append(span);
foreach (var (narrow, wide) in _toNarrowWordDic)
{
sb.Replace(new string(wide), $"{narrow}");
}
foreach (var (narrow, wide) in _toNarrowCharDic)
{
sb.Replace(wide, narrow);
}
return sb.ToString();
}
var arrayToReturnToPool = ArrayPool<char>.Shared.Rent(span.Length * 2);
try
{
var pos = 0;
for (int i = 0; i < span.Length; i++)
{
if (_toNarrowWordDic.ContainsKey(span[i]))
{
foreach (var c in _toNarrowWordDic[span[i]])
{
arrayToReturnToPool[pos++] = c;
}
continue;
}
arrayToReturnToPool[pos++] = _toNarrowCharDic.ContainsKey(span[i]) ? _toNarrowCharDic[span[i]] : span[i];
}
var result = new string(arrayToReturnToPool[..pos]);
return result;
}
catch
{
throw;
}
finally
{
ArrayPool<char>.Shared.Return(arrayToReturnToPool);
}
}
/// <summary>ShiftJis返還後のバイト数</summary>
public static int GetSjisByteCount(this ReadOnlySpan<char> span)
=> _sjis.GetByteCount(span);
/// <summary>ShiftJisの文字数で切り出し</summary>
public static string SubstringSjisByteLength(this ReadOnlySpan<char> span, int startPos, int finishPos)
{
if (startPos > _sjis.GetByteCount(span))
{
return "";
}
var sb = new StringBuilder(span.Length);
int totalLength = 0;
foreach (char c in span)
{
int length = _sjis.GetByteCount(new char[] { c });
totalLength += length;
if (totalLength > startPos + finishPos)
break;
if (totalLength > startPos)
sb.Append(c);
}
return sb.ToString();
}
/// <summary>ShiftJisのバイト数基準で切り出し</summary>
public static string[] SubstringSjisByteLength(this ReadOnlySpan<char> span, int byteLength)
{
var result = new List<string>();
int len = _sjis.GetByteCount(span);
int pos = 0;
while (pos < len)
{
result.Add(SubstringSjisByteCount(span, pos, byteLength));
pos += _sjis.GetByteCount(result[^1]);
}
return result.ToArray();
}
/// <summary>バイトデータをShift-JISでエンコードして文字列を返す</summary>
public static string EncodeSjis(this ReadOnlySpan<byte> span)
=> _sjis.GetString(span.ToArray(), 0, span.Length);
/// <summary>バイトデータをShift-JISでエンコードして文字列を返す</summary>
public static string EncodeSjisTrimed(this ReadOnlySpan<byte> span)
=> _sjis.GetString(span.ToArray(), 0, span.Length).Trim();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment