Skip to content

Instantly share code, notes, and snippets.

@brucemcpherson
Last active December 22, 2015 04:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brucemcpherson/6415587 to your computer and use it in GitHub Desktop.
Save brucemcpherson/6415587 to your computer and use it in GitHub Desktop.
pinyin converter custom vba functions
'gistThat@mcpher.com :do not modify this line - see ramblings.mcpher.com for details: updated on 02/09/2013 19:06:03 : from manifest:6415598 gist https://gist.github.com/brucemcpherson/6415587/raw/pinyin.vba
Option Explicit
Private Const NOTONE = 5
Private Const COLORTYPENONE = 0
Private Const COLORTYPEHTML = 1
' v1.0
'
' accents a pinyin string with optional html colorizing
' @param {string} inputString the string to be converted
' @param {number=} optColoringType - 0 = no coloring(default), 1 = return html with coloring options given
' @param {string=} optColors - a comma separated string of colors to use for tones 1-5 (5=no tone)- a default will be used if omitted
' return {string} the converted string (including colorization if required)
'
Public Function pinyinToToneMarks(inputString As String, _
Optional optColoringType As Long = COLORTYPENONE, Optional optColors As String = vbNullString) As String
pinyinToToneMarks = pinyin(inputString, optColoringType, optColors, False)
End Function
'
' replaces accents in pinyin string with tone numbers and adds optional html colorizing
' @param {string} inputString the string to be converted
' @param {number} optColoringType - 0 = no coloring(default), 1 = return html with coloring options given
' @param {string} optColors - a comma separated string of colors to use for tones 1-5 (5=no tone)- a default will be used if omitted
' return {string} the converted string (including colorization if required)
'
Public Function pinyinToToneNumbers(inputString As String, _
Optional optColoringType As Long = COLORTYPENONE, Optional optColors As String = vbNullString) As String
pinyinToToneNumbers = pinyin(inputString, optColoringType, optColors, True)
End Function
Private Function pinyin(inputString As String, _
Optional optColoringType As Long = COLORTYPENONE, Optional optColors As String = vbNullString, _
Optional optReversePinyin As Boolean = False)
Dim colors As Variant
If (optColors = vbNullString) Then
colors = Array("red", "orange", "green", "blue", "gray")
Else
colors = Split(optColors, ",")
End If
If (length(colors) <> 5) Then throw ("you need 5 colors")
If (optReversePinyin) Then
pinyin = pinyinReverse(inputString, colors, optColoringType)
Else
pinyin = pinyinRegular(inputString, colors, optColoringType)
End If
End Function
' convert from tone numbers to marked up pinyin
Private Function pinyinRegular(originalText As String, colors As Variant, coloringType As Long) As String
Dim matches As MatchCollection, match As match, consumedPos As Long, allWords As String, _
r As RegExp, text As String, catchup As String, t As String, tone As Long, _
word As String, dips As Variant, d As Long, k As String, _
vowels As Variant, transform As Variant, i As Long
consumedPos = 0
allWords = vbNullString
Set r = setUpSyllableRegex()
dips = getDips()
vowels = getVowels()
transform = getTransform()
' v hack - v equvalent to ü (I think)
text = replaceAll(originalText, "v", "ü")
' this regex is a table of all valid syllables
Set matches = r.Execute(text)
For Each match In matches
' this is a catch up separators and other stuff
catchup = Mid(text, consumedPos + 1, match.FirstIndex - consumedPos)
consumedPos = match.FirstIndex + match.length
' lookahead for a tone
t = Mid(text, consumedPos + 1, 1)
tone = NOTONE
' if there's a tone, then we want to remove it from the addbehind buffer (consume it)
If (t > "0" And t < CStr(NOTONE)) Then
consumedPos = consumedPos + 1
tone = Int(t)
End If
' if anything other than separators then its an unknown syllable
If (Len(catchup) > 0 And replaceAll(catchup, "\W", "XX") = catchup) Then
throw ("unknown syllable " & catchup & " before " & match.Value)
End If
' identify where character subs need to happen in vowel sets
word = match.Value
For d = LBound(dips) To UBound(dips)
k = dips(d)(LBound(dips(d)))
If (InStr(1, word, k, vbTextCompare) > 0) Then
word = replace(word, k, CStr(dips(d)(UBound(dips(d)))))
Exit For
End If
Next d
' now replace them
For i = LBound(vowels) To UBound(vowels)
word = replace(word, CStr(vowels(i)), CStr(transform(tone - 1)(i)))
Next i
' colorize
If (coloringType = COLORTYPEHTML And tone <> NOTONE And Len(word) > 0) Then
word = "<font color=" & colors(tone - 1) & ">" + word + "</font>"
End If
' add the finished product
allWords = allWords & catchup & word
Next match
' finish it up
If (consumedPos < Len(text)) Then allWords = allWords + Mid(text, consumedPos + 1)
pinyinRegular = allWords
End Function
' convert from marked up pinyin to tone numbers
Private Function pinyinReverse(text As String, colors As Variant, coloringType As Long) As String
Dim r As String, tones As Collection, vowelPos As Collection, flatten As String, _
i As Long, transform As Variant, newText As String, _
matches As MatchCollection, match As match, pat As RegExp, pos As Long, _
v As String, vowels As Variant, re As RegExp, consumedPos As Long, allWords As String, _
catchup As String, t As String, tone As Long, _
word As String, dips As Variant, d As Long, k As String
r = vbNullString
flatten = vbNullString
transform = getTransform()
vowels = getVowels()
newText = text
Set tones = New Collection
Set vowelPos = New Collection
Set re = setUpSyllableRegex()
' create a regex from the transform vowel table
For i = LBound(transform) To UBound(transform)
If (i <> NOTONE - 1) Then
If (r <> vbNullString) Then r = r & "|"
r = r & join(transform(i), "|")
flatten = flatten & join(transform(i), "")
End If
Next i
' we'll transform the markers to regular vowels and remember their tone numbers
Set pat = New RegExp
With pat
.Global = True
.Pattern = r
Set matches = .Execute(text)
End With
' look at each syllable
For Each match In matches
' should find vowel cluster
pos = InStr(1, flatten, match.Value)
If (pos < 1) Then throw ("could not find " & match.Value & " equivalent")
' this is the substitute vowel
v = Mid(vowels((pos - 1) Mod length(transform(0))), 1, 1)
newText = Mid(newText, 1, match.FirstIndex) & v & Mid(newText, 1 + match.FirstIndex + Len(v))
''newText = newText.substr(0, match.index) + v + newText.substr(match.index+v.length);
' remember where this happened and what the tone was
k = "pos" & match.FirstIndex
tones.Add 1 + Int((pos - 1) / length(transform(0))), k
vowelPos.Add v, k
Next match
' now we can use the logic for parsing the other direction
Set matches = re.Execute(newText)
For Each match In matches
' this is a catch up separators and other stuff
catchup = Mid(text, consumedPos + 1, match.FirstIndex - consumedPos)
consumedPos = match.FirstIndex + match.length
' figure out the tone
tone = NOTONE
For i = match.FirstIndex To match.FirstIndex + Len(match.Value) - 1
k = "pos" & i
If (exists(tones, k)) Then
tone = tones(k)
If (vowelPos(k) <> Mid(newText, i + 1, 1)) Then
throw ("vowel mismatch at " & match.Value & ":" & i & ":" & vowelPos(k))
End If
End If
Next i
' if there is a tone we need to insert it
allWords = allWords & catchup & match.Value
If (tone <> NOTONE) Then allWords = allWords & tone
Next match
' finish it up
If (consumedPos < Len(text)) Then allWords = allWords + Mid(text, consumedPos + 1)
pinyinReverse = allWords
Set tones = Nothing
Set vowelPos = Nothing
End Function
Private Function exists(co As Collection, k As String) As Boolean
Dim A As Variant
On Error GoTo crapped
A = co(k)
exists = True
Exit Function
crapped:
exists = False
Exit Function
End Function
Private Sub throw(message)
Err.Raise vbObjectError + 22000, "pinyin convertor", message
End Sub
Private Function cond(test As Boolean, ifTrue As Variant, ifFalse As Variant) As Variant
If (test) Then
cond = ifTrue
Else
cond = ifFalse
End If
End Function
Private Function getSylReg() As String
getSylReg = "huan(?=gu|ge)|yuan(?=gu|ge)|jian(?=gu|ge)|" & _
"zhuang|chuang|shuang|niang|liang|guang|kuang|huang|zhang|zheng|zhong|zhuai|zhuan|" & _
"chang|cheng|chong|chuai|chuan|shang|sheng|shuai|shuan|jiang|jiong|qiang|qiong|xiang|xiong|" & _
"fan(?=gu|ge)|gan(?=gu|ge)|wan(?=gu|ge)|kan(?=gu|ge)|jin(?=gu|ge)|" & _
"uang|ueng|iang|iong|bang|beng|biao|bian|bing|pang|peng|piao|pian|ping|mang|meng|miao|mian|" & _
"ming|fang|feng|dang|deng|dong|duan|diao|dian|ding|tang|teng|tong|tuan|tiao|tian|ting|nang|" & _
"neng|nong|nuan|niao|nian|ning|lang|leng|long|luan|liao|lian|ling|gang|geng|gong|guai|guan|" & _
"kang|keng|kong|kuai|kuan|hang|heng|hong|huai|huan|zang|zeng|zong|zuan|cang|ceng|cong|cuan|" & _
"sang|seng|song|suan|zhai|zhei|zhao|zhou|zhan|zhen|zhua|zhuo|zhui|zhun|chai|chao|chou|chan|" & _
"chen|chua|chuo|chui|chun|shai|shei|shao|shou|shan|shen|shua|shuo|shui|shun|rang|reng|rong|" & _
"ruan|jiao|jian|jing|juan|qiao|qian|qing|quan|xiao|xian|xing|xuan|wang|weng|yang|ying|yong|" & _
"yuan|" & _
"qu(?=n)|ke(?=n)|" & _
"ang|eng|ong|uai|uan|iao|ian|ing|üan|bai|bei|bao|ban|ben|bie|bin|pai|pei|pao|pou|pan|" & _
"pen|pie|pin|mai|mei|mao|mou|man|men|mie|miu|min|fan|fei|fou|fen|gan|dai|dei|dao|dou|dan|den|" & _
"duo|dui|dun|dia|die|diu|tai|tei|tao|tou|tan|tuo|tui|tun|tie|nai|nei|nao|nou|nan|nen|nuo|nun|" & _
"nie|niu|nin|nüe|lai|lei|lao|lou|lan|luo|lun|lia|lie|liu|lin|lüe|gai|gei|gao|gou|gen|gua|" & _
"guo|gui|gun|kai|kei|kao|kou|kan|ken|kua|kuo|kui|kun|hai|hei|hao|hou|han|hen|hua|huo|hui|hun|" & _
"zai|zei|zao|zou|zan|zen|zuo|zui|zun|cai|cao|cou|can|cen|cuo|cui|cun|sai|sao|sou|san|sen|suo|" & _
"sui|sun|zha|zhe|zhu|zhi|cha|che|chu|chi|sha|she|shu|shi|rao|rou|ran|ren|rua|ruo|rui|run|jia|" & _
"jie|jiu|jin|jue|jun|qia|qie|qiu|qin|que|qun|xia|xie|xiu|xin|xue|xun|ang|eng|wai|wei|wan|wen|" & _
"yao|you|yan|yin|yue|yun|ai|ei|ao|ou|an|en|ua|uo|ui|un|ia|ie|iu|in|üe|ün|ba|bo|bu|bi|pa|po|pu|" & _
"pi|ma|mo|me|mu|mi|fa|fo|fu|da|de|du|di|ta|te|tu|ti|na|ne|nu|ni|nü|la|le|lu|li|lü|ga|ge|gu|ka|" & _
"ke|ku|ha|he|hu|za|ze|zu|zi|ca|ce|cu|ci|sa|se|su|si|re|ru|ri|ji|ju|qi|qu|xi|xu|ai|ei|ao|ou|an|" & _
"en|wu|wa|wo|yi|ya|ye|yu|er|a|o|e|u|i|ü|a|o|e"
End Function
Private Function getDips() As Variant
getDips = Array( _
Array("iao", "ia*o"), Array("uai", "ua*i"), Array("ai", "a*i"), Array("ao", "a*o"), _
Array("ei", "e*i"), Array("ia", "ia*"), Array(" ie", "ie*"), Array("io", "io*"), _
Array("iu", "iu*"), Array("Ai", "A*i"), Array("Ao", "A*o"), Array("Ei", "E*i"), _
Array("ou", "o*u"), Array("ua", "ua*"), Array("ue", "ue*"), Array("ui", "ui*"), _
Array("uo", "uo*"), Array("ve", "üe*"), Array("üe", "üe*"), Array("Ou", "O*u"), _
Array("a", "a*"), Array("e", "e*"), Array("i", "i*"), Array("o", "o*"), _
Array("u", "u*"), Array("ü", "ü*"), Array("v", "v*"), Array("A", "A*"), _
Array("E", "E*"), Array("O", "O*"))
End Function
Private Function getVowels() As Variant
getVowels = Array("a*", "e*", "i*", "o*", "u*", "ü*", "v*", "A*", "E*", "O*")
End Function
Private Function getTransform() As Variant
getTransform = Array( _
Array(ChrW(&H101), ChrW(&H113), ChrW(&H12B), ChrW(&H14D), ChrW(&H16B), ChrW(&H1D6), ChrW(&H1D6), ChrW(&H100), ChrW(&H112), ChrW(&H12A), ChrW(&H14C)), _
Array(ChrW(&HE1), ChrW(&HE9), ChrW(&HED), ChrW(&HF3), ChrW(&HFA), ChrW(&H1D8), ChrW(&H1D8), ChrW(&HC1), ChrW(&HC9), ChrW(&HCD), ChrW(&HD3)), _
Array(ChrW(&H1CE), ChrW(&H11B), ChrW(&H1D0), ChrW(&H1D2), ChrW(&H1D4), ChrW(&H1DA), ChrW(&H1DA), ChrW(&H1CD), ChrW(&H11A), ChrW(&H1CF), ChrW(&H1D1)), _
Array(ChrW(&HE0), ChrW(&HE8), ChrW(&HEC), ChrW(&HF2), ChrW(&HF9), ChrW(&H1DC), ChrW(&H1DC), ChrW(&HC0), ChrW(&HC8), ChrW(&HCC), ChrW(&HD2)), _
Array(ChrW(&H61), ChrW(&H65), ChrW(&H69), ChrW(&H6F), ChrW(&H75), ChrW(&HFC), ChrW(&H76), ChrW(&H41), ChrW(&H45), ChrW(&H49), ChrW(&H4F)))
End Function
Private Function replaceAll(str As String, find As String, replace As String) As String
Dim r As RegExp
Set r = New RegExp
With r
.Global = True
.Pattern = find
End With
replaceAll = r.replace(str, replace)
End Function
Private Function setUpSyllableRegex() As RegExp
Dim r As RegExp
Set r = New RegExp
With r
.IgnoreCase = True
.Global = True
.Pattern = getSylReg()
End With
Set setUpSyllableRegex = r
End Function
Private Function join(A As Variant, d As String) As String
Dim s As String, i As Long
s = vbNullString
For i = LBound(A) To UBound(A)
If s <> vbNullString Then s = s & d
s = s & A(i)
Next i
join = s
End Function
Private Function length(A As Variant) As Long
length = UBound(A) - LBound(A) + 1
End Function
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment