Skip to content

Instantly share code, notes, and snippets.

@furyutei
Last active May 4, 2024 05:14
Show Gist options
  • Save furyutei/a5b63e25fb904facb5d140c2f8ce2e53 to your computer and use it in GitHub Desktop.
Save furyutei/a5b63e25fb904facb5d140c2f8ce2e53 to your computer and use it in GitHub Desktop.
[VBA] 文字列の正規化

[VBA] 文字列の正規化

文字列中の、2文字にわかれている濁音/半濁音(例「た゛か゛は゜」)を、1文字(例「だがぱ」)に変換するような処理の例。
※元ネタ:橋ビロ子@hassy_birowさんのツイート
自分の📝メモ用ツリー

処理の例

参考

Option Explicit
Function 濁音正規化(ByVal SourceText As String) As String
SourceText = Replace(Replace(SourceText, ChrW(&H309B), ChrW(&H3099)), ChrW(&H309C), ChrW(&H309A))
' U+309B:KATAKANA-HIRAGANA VOICED SOUND MARK→U+3099:COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
' U+309C:KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK→U+309A:COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
Dim Delimiter
Delimiter = ChrW(&H3099): GoSub NORMALIZATION
Delimiter = ChrW(&H309A): GoSub NORMALIZATION
濁音正規化 = SourceText
Exit Function
NORMALIZATION:
Dim CodeOffset As Long: CodeOffset = IIf(Delimiter = ChrW(&H3099), 1, 2)
Dim Parts: Parts = Split(SourceText, Delimiter)
Dim Index As Long
Dim Pos As Long
Dim Part
Dim WorkChar
On Error Resume Next
For Index = 0 To UBound(Parts) - 1
Part = Parts(Index)
WorkChar = Right(Part, 1)
Select Case True
Case Delimiter = ChrW(&H3099) And WorkChar = "ウ"
Mid(Parts(Index), Len(Part), 1) = "ヴ"
Case Delimiter = ChrW(&H3099) And InStr("かきくけこさしすせそたちつてとはひふへほカキクケコサシスセソタチツテトハヒフヘホ", WorkChar) > 0, _
Delimiter = ChrW(&H309A) And InStr("はひふへほハヒフヘホ", WorkChar) > 0
Mid(Parts(Index), Len(Part), 1) = ChrW(AscW(WorkChar) + CodeOffset)
Case Else
Parts(Index) = Part & Delimiter
End Select
Next
On Error GoTo 0
SourceText = Join(Parts, "")
Return
End Function
Option Explicit
Private Declare PtrSafe Function NormalizeString Lib "normaliz" ( _
ByVal NormForm As Long, _
ByVal lpSrcString As LongPtr, _
ByVal cwSrcLength As Long, _
ByVal lpDstString As LongPtr, _
ByVal cwDstLength As Long) As Long ' NormForm: NFC(1) NFD(2) NFKC(5) NFKD(6)
Public Enum NORM_FORM
NormalizationOther = 0
NormalizationC = &H1
NormalizationD = &H2
NormalizationKC = &H5
NormalizationKD = &H6
End Enum
Private Const ERROR_INSUFFICIENT_BUFFER = 122
Private Const NORMALIZE_ERROR_NUMBER_OFFSET = 5000
Public Function 文字列正規化(ByVal SourceText As String, Optional NormalizationForm As NORM_FORM = NORM_FORM.NormalizationC) As String
Const MaxTryCount = 10
SourceText = Replace(Replace(SourceText, ChrW(&H309B), ChrW(&H3099)), ChrW(&H309C), ChrW(&H309A)) & vbNullChar
' U+309B:KATAKANA-HIRAGANA VOICED SOUND MARK→U+3099:COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
' U+309C:KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK→U+309A:COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
Dim BufferSize As Long
Dim Buffer As String
BufferSize = NormalizeString(NormalizationForm, StrPtr(SourceText), -1, 0, 0) ' 必要なバッファ長の見積もり(不十分な場合もあることに注意)
Dim TryCount As Long
For TryCount = 1 To MaxTryCount
Buffer = String(BufferSize, vbNullChar)
BufferSize = NormalizeString(NormalizationForm, StrPtr(SourceText), -1, StrPtr(Buffer), BufferSize)
If 0 < BufferSize Then
' 文字列正規化 = Left(Buffer, InStr(Buffer, vbNullChar) - 1)
文字列正規化 = Left(Buffer, BufferSize - 1)
Exit Function
End If
If Err.LastDllError <> ERROR_INSUFFICIENT_BUFFER Then
Exit For
End If
BufferSize = -BufferSize ' 新たなバッファ長の見積もりは負の整数で返される
Next
Err.Raise NORMALIZE_ERROR_NUMBER_OFFSET + Err.LastDllError, Description:="Failure to normalize unicode string"
End Function
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment