Skip to content

Instantly share code, notes, and snippets.

@Yoxem
Last active September 23, 2017 10:59
Show Gist options
  • Save Yoxem/e5f5908d1ea443941b9e to your computer and use it in GitHub Desktop.
Save Yoxem/e5f5908d1ea443941b9e to your computer and use it in GitHub Desktop.
PhakFaSutoTHRS.js
// a converter from Hakka pha̍k-fa-sṳ to Taiwanese Hakka Romanization System in Miaoli Sixian dialect
// demo: http://yoxem.github.io/2015/01/03/Hakka_Phak-fa-su_to_Taiwanese_Hakka_Romanization_System/
// ver 0.12 on 2016-01-03 under MIT/X11 license
//XRegExp 2.0.0 <xregexp.com> MIT License from here
var XRegExp;XRegExp=XRegExp||function(n){"use strict";function v(n,i,r){var u;for(u in t.prototype)t.prototype.hasOwnProperty(u)&&(n[u]=t.prototype[u]);return n.xregexp={captureNames:i,isNative:!!r},n}function g(n){return(n.global?"g":"")+(n.ignoreCase?"i":"")+(n.multiline?"m":"")+(n.extended?"x":"")+(n.sticky?"y":"")}function o(n,r,u){if(!t.isRegExp(n))throw new TypeError("type RegExp expected");var f=i.replace.call(g(n)+(r||""),h,"");return u&&(f=i.replace.call(f,new RegExp("["+u+"]+","g"),"")),n=n.xregexp&&!n.xregexp.isNative?v(t(n.source,f),n.xregexp.captureNames?n.xregexp.captureNames.slice(0):null):v(new RegExp(n.source,f),null,!0)}function a(n,t){var i=n.length;if(Array.prototype.lastIndexOf)return n.lastIndexOf(t);while(i--)if(n[i]===t)return i;return-1}function s(n,t){return Object.prototype.toString.call(n).toLowerCase()==="[object "+t+"]"}function d(n){return n=n||{},n==="all"||n.all?n={natives:!0,extensibility:!0}:s(n,"string")&&(n=t.forEach(n,/[^\s,]+/,function(n){this[n]=!0},{})),n}function ut(n,t,i,u){var o=p.length,s=null,e,f;y=!0;try{while(o--)if(f=p[o],(f.scope==="all"||f.scope===i)&&(!f.trigger||f.trigger.call(u))&&(f.pattern.lastIndex=t,e=r.exec.call(f.pattern,n),e&&e.index===t)){s={output:f.handler.call(u,e,i),match:e};break}}catch(h){throw h;}finally{y=!1}return s}function b(n){t.addToken=c[n?"on":"off"],f.extensibility=n}function tt(n){RegExp.prototype.exec=(n?r:i).exec,RegExp.prototype.test=(n?r:i).test,String.prototype.match=(n?r:i).match,String.prototype.replace=(n?r:i).replace,String.prototype.split=(n?r:i).split,f.natives=n}var t,c,u,f={natives:!1,extensibility:!1},i={exec:RegExp.prototype.exec,test:RegExp.prototype.test,match:String.prototype.match,replace:String.prototype.replace,split:String.prototype.split},r={},k={},p=[],e="default",rt="class",it={"default":/^(?:\\(?:0(?:[0-3][0-7]{0,2}|[4-7][0-7]?)?|[1-9]\d*|x[\dA-Fa-f]{2}|u[\dA-Fa-f]{4}|c[A-Za-z]|[\s\S])|\(\?[:=!]|[?*+]\?|{\d+(?:,\d*)?}\??)/,"class":/^(?:\\(?:[0-3][0-7]{0,2}|[4-7][0-7]?|x[\dA-Fa-f]{2}|u[\dA-Fa-f]{4}|c[A-Za-z]|[\s\S]))/},et=/\$(?:{([\w$]+)}|(\d\d?|[\s\S]))/g,h=/([\s\S])(?=[\s\S]*\1)/g,nt=/^(?:[?*+]|{\d+(?:,\d*)?})\??/,ft=i.exec.call(/()??/,"")[1]===n,l=RegExp.prototype.sticky!==n,y=!1,w="gim"+(l?"y":"");return t=function(r,u){if(t.isRegExp(r)){if(u!==n)throw new TypeError("can't supply flags when constructing one RegExp from another");return o(r)}if(y)throw new Error("can't call the XRegExp constructor within token definition functions");var l=[],a=e,b={hasNamedCapture:!1,captureNames:[],hasFlag:function(n){return u.indexOf(n)>-1}},f=0,c,s,p;if(r=r===n?"":String(r),u=u===n?"":String(u),i.match.call(u,h))throw new SyntaxError("invalid duplicate regular expression flag");for(r=i.replace.call(r,/^\(\?([\w$]+)\)/,function(n,t){if(i.test.call(/[gy]/,t))throw new SyntaxError("can't use flag g or y in mode modifier");return u=i.replace.call(u+t,h,""),""}),t.forEach(u,/[\s\S]/,function(n){if(w.indexOf(n[0])<0)throw new SyntaxError("invalid regular expression flag "+n[0]);});f<r.length;)c=ut(r,f,a,b),c?(l.push(c.output),f+=c.match[0].length||1):(s=i.exec.call(it[a],r.slice(f)),s?(l.push(s[0]),f+=s[0].length):(p=r.charAt(f),p==="["?a=rt:p==="]"&&(a=e),l.push(p),++f));return v(new RegExp(l.join(""),i.replace.call(u,/[^gimy]+/g,"")),b.hasNamedCapture?b.captureNames:null)},c={on:function(n,t,r){r=r||{},n&&p.push({pattern:o(n,"g"+(l?"y":"")),handler:t,scope:r.scope||e,trigger:r.trigger||null}),r.customFlags&&(w=i.replace.call(w+r.customFlags,h,""))},off:function(){throw new Error("extensibility must be installed before using addToken");}},t.addToken=c.off,t.cache=function(n,i){var r=n+"/"+(i||"");return k[r]||(k[r]=t(n,i))},t.escape=function(n){return i.replace.call(n,/[-[\]{}()*+?.,\\^$|#\s]/g,"\\$&")},t.exec=function(n,t,i,u){var e=o(t,"g"+(u&&l?"y":""),u===!1?"y":""),f;return e.lastIndex=i=i||0,f=r.exec.call(e,n),u&&f&&f.index!==i&&(f=null),t.global&&(t.lastIndex=f?e.lastIndex:0),f},t.forEach=function(n,i,r,u){for(var e=0,o=-1,f;f=t.exec(n,i,e);)r.call(u,f,++o,n,i),e=f.index+(f[0].length||1);return u},t.globalize=function(n){return o(n,"g")},t.install=function(n){n=d(n),!f.natives&&n.natives&&tt(!0),!f.extensibility&&n.extensibility&&b(!0)},t.isInstalled=function(n){return!!f[n]},t.isRegExp=function(n){return s(n,"regexp")},t.matchChain=function(n,i){return function r(n,u){for(var o=i[u].regex?i[u]:{regex:i[u]},f=[],s=function(n){f.push(o.backref?n[o.backref]||"":n[0])},e=0;e<n.length;++e)t.forEach(n[e],o.regex,s);return u===i.length-1||!f.length?f:r(f,u+1)}([n],0)},t.replace=function(i,u,f,e){var c=t.isRegExp(u),s=u,h;return c?(e===n&&u.global&&(e="all"),s=o(u,e==="all"?"g":"",e==="all"?"":"g")):e==="all"&&(s=new RegExp(t.escape(String(u)),"g")),h=r.replace.call(String(i),s,f),c&&u.global&&(u.lastIndex=0),h},t.split=function(n,t,i){return r.split.call(n,t,i)},t.test=function(n,i,r,u){return!!t.exec(n,i,r,u)},t.uninstall=function(n){n=d(n),f.natives&&n.natives&&tt(!1),f.extensibility&&n.extensibility&&b(!1)},t.union=function(n,i){var l=/(\()(?!\?)|\\([1-9]\d*)|\\[\s\S]|\[(?:[^\\\]]|\\[\s\S])*]/g,o=0,f,h,c=function(n,t,i){var r=h[o-f];if(t){if(++o,r)return"(?<"+r+">"}else if(i)return"\\"+(+i+f);return n},e=[],r,u;if(!(s(n,"array")&&n.length))throw new TypeError("patterns must be a nonempty array");for(u=0;u<n.length;++u)r=n[u],t.isRegExp(r)?(f=o,h=r.xregexp&&r.xregexp.captureNames||[],e.push(t(r.source).source.replace(l,c))):e.push(t.escape(r));return t(e.join("|"),i)},t.version="2.0.0",r.exec=function(t){var r,f,e,o,u;if(this.global||(o=this.lastIndex),r=i.exec.apply(this,arguments),r){if(!ft&&r.length>1&&a(r,"")>-1&&(e=new RegExp(this.source,i.replace.call(g(this),"g","")),i.replace.call(String(t).slice(r.index),e,function(){for(var t=1;t<arguments.length-2;++t)arguments[t]===n&&(r[t]=n)})),this.xregexp&&this.xregexp.captureNames)for(u=1;u<r.length;++u)f=this.xregexp.captureNames[u-1],f&&(r[f]=r[u]);this.global&&!r[0].length&&this.lastIndex>r.index&&(this.lastIndex=r.index)}return this.global||(this.lastIndex=o),r},r.test=function(n){return!!r.exec.call(this,n)},r.match=function(n){if(t.isRegExp(n)){if(n.global){var u=i.match.apply(this,arguments);return n.lastIndex=0,u}}else n=new RegExp(n);return r.exec.call(n,this)},r.replace=function(n,r){var e=t.isRegExp(n),u,f,h,o;return e?(n.xregexp&&(u=n.xregexp.captureNames),n.global||(o=n.lastIndex)):n+="",s(r,"function")?f=i.replace.call(String(this),n,function(){var t=arguments,i;if(u)for(t[0]=new String(t[0]),i=0;i<u.length;++i)u[i]&&(t[0][u[i]]=t[i+1]);return e&&n.global&&(n.lastIndex=t[t.length-2]+t[0].length),r.apply(null,t)}):(h=String(this),f=i.replace.call(h,n,function(){var n=arguments;return i.replace.call(String(r),et,function(t,i,r){var f;if(i){if(f=+i,f<=n.length-3)return n[f]||"";if(f=u?a(u,i):-1,f<0)throw new SyntaxError("backreference to undefined group "+t);return n[f+1]||""}if(r==="$")return"$";if(r==="&"||+r==0)return n[0];if(r==="`")return n[n.length-1].slice(0,n[n.length-2]);if(r==="'")return n[n.length-1].slice(n[n.length-2]+n[0].length);if(r=+r,!isNaN(r)){if(r>n.length-3)throw new SyntaxError("backreference to undefined group "+t);return n[r]||""}throw new SyntaxError("invalid token "+t);})})),e&&(n.lastIndex=n.global?0:o),f},r.split=function(r,u){if(!t.isRegExp(r))return i.split.apply(this,arguments);var e=String(this),h=r.lastIndex,f=[],o=0,s;return u=(u===n?-1:u)>>>0,t.forEach(e,r,function(n){n.index+n[0].length>o&&(f.push(e.slice(o,n.index)),n.length>1&&n.index<e.length&&Array.prototype.push.apply(f,n.slice(1)),s=n[0].length,o=n.index+s)}),o===e.length?(!i.test.call(r,"")||s)&&f.push(""):f.push(e.slice(o)),r.lastIndex=h,f.length>u?f.slice(0,u):f},u=c.on,u(/\\([ABCE-RTUVXYZaeg-mopqyz]|c(?![A-Za-z])|u(?![\dA-Fa-f]{4})|x(?![\dA-Fa-f]{2}))/,function(n,t){if(n[1]==="B"&&t===e)return n[0];throw new SyntaxError("invalid escape "+n[0]);},{scope:"all"}),u(/\[(\^?)]/,function(n){return n[1]?"[\\s\\S]":"\\b\\B"}),u(/(?:\(\?#[^)]*\))+/,function(n){return i.test.call(nt,n.input.slice(n.index+n[0].length))?"":"(?:)"}),u(/\\k<([\w$]+)>/,function(n){var t=isNaN(n[1])?a(this.captureNames,n[1])+1:+n[1],i=n.index+n[0].length;if(!t||t>this.captureNames.length)throw new SyntaxError("backreference to undefined group "+n[0]);return"\\"+t+(i===n.input.length||isNaN(n.input.charAt(i))?"":"(?:)")}),u(/(?:\s+|#.*)+/,function(n){return i.test.call(nt,n.input.slice(n.index+n[0].length))?"":"(?:)"},{trigger:function(){return this.hasFlag("x")},customFlags:"x"}),u(/\./,function(){return"[\\s\\S]"},{trigger:function(){return this.hasFlag("s")},customFlags:"s"}),u(/\(\?P?<([\w$]+)>/,function(n){if(!isNaN(n[1]))throw new SyntaxError("can't use integer as capture name "+n[0]);return this.captureNames.push(n[1]),this.hasNamedCapture=!0,"("}),u(/\\(\d+)/,function(n,t){if(!(t===e&&/^[1-9]/.test(n[1])&&+n[1]<=this.captureNames.length)&&n[1]!=="0")throw new SyntaxError("can't use octal escape or backreference to undefined group "+n[0]);return n[0]},{scope:"all"}),u(/\((?!\?)/,function(){return this.hasFlag("n")?"(?:":(this.captureNames.push(null),"(")},{customFlags:"n"}),typeof exports!="undefined"&&(exports.XRegExp=t),t}()
// end of XRegExp 2.0.0
// convert pfs to THRS
function pfs_to_THRS(input){
// onset consonant (shengmu)
result = input.replace (/p([^Hh]+)/, "b$1"); // p -> b
result = result.replace (/P([^Hh]+)/, "B$1"); // P -> B
result = result.replace (/p[Hh](.+)/, "p$1"); // ph -> p
result = result.replace (/P[Hh](.+)/, "P$1"); // PH -> P
result = result.replace (/t([^Hh]+)/, "d$1"); // t -> d
result = result.replace (/T([^Hh]+)/, "D$1"); // T -> D
result = result.replace (/t[Hh](.+)/, "t$1"); // th -> t
result = result.replace (/T[Hh](.+)/, "T$1"); // TH -> T
result = result.replace (/k([^Hh]+)/, "g$1"); // k -> g
result = result.replace (/K([^Hh]+)/, "G$1"); // K -> G
result = result.replace (/k[Hh](.+)/, "k$1"); // kh -> k
result = result.replace (/K[Hh](.+)/, "K$1"); // KH -> K
/* palatalized ch, chh, s -> j, q, x */
result = result.replace (/c[Hh]([iîìíi̍IÍÌĪI̍][ptkmnPTKMN]?[gG]?)/, "j$1"); // chi -> ji
result = result.replace (/C[Hh]([iîìíi̍IÍÌĪI̍][ptkmnPTKMN]?[gG]?)/, "J$1"); // CHI -> JI
result = result.replace (/c[Hh]{2}([iîìíi̍IÍÌĪI̍][ptkmnPTKMN]?[gG]?)/, "q$1"); // chhi -> qi
result = result.replace (/C[Hh]{2}([iîìíi̍IÍÌĪI̍][ptkmnPTKMN]?[gG]?)/, "Q$1"); // CHHI -> QI
result = result.replace (/s([iîìíi̍IÍÌĪI̍][ptkmnPTKMN]?[gG]?)/, "x$1"); // si -> xi
result = result.replace (/S([iîìíi̍IÍÌĪI̍][ptkmnPTKMN]?[gG]?)/, "X$1"); // SI -> XI
result = result.replace (/c[Hh]([iI].+)/, "j$1"); // chi- -> ji-
result = result.replace (/C[Hh]([iI].+)/, "J$1"); // CHI- -> JI-
result = result.replace (/c[Hh]{2}([iI].+)/, "q$1"); // chhi- -> qi-
result = result.replace (/C[Hh]{2}([iI].+)/, "Q$1"); // CHHI- -> QI-
result = result.replace (/s([iI].+)/, "x$1"); // si- -> xi-
result = result.replace (/S([iI].+)/, "X$1"); // SI- -> XI-
/* not palatalized ch, chh, s -> z, c, s */
result = result.replace (/c[Hh]([^Hh]+)/, "z$1"); // ch -> z
result = result.replace (/C[Hh]([^Hh]+)/, "Z$1"); // CH -> Z
result = result.replace (/c[Hh]{2}(.+)/, "c$1"); // chh -> c
result = result.replace (/C[Hh]{2}(.+)/, "C$1"); // CHH -> C
/* y to i */
result = result.replace (/y[iI]?(.+)/, "i$1"); // y(i)- -> i
result = result.replace (/Y[iI]?(.+)/, "I$1"); // Y(I)- -> I
/* removing duplicated i */
result = result.replace (/i[iI]+(.*)/, "i$1"); // ii -> i
result = result.replace (/I[iI]+(.*)/, "I$1"); // II -> I
/* coda consonant (yunwei) */
result = result.replace (/(.+)p/, "$1b"); // -p -> -b
result = result.replace (/(.+)P/, "$1B"); // -k -> -B
result = result.replace (/(.+)t/, "$1d"); // -k -> -d
result = result.replace (/(.+)T/, "$1D"); // -k -> -D
result = result.replace (/(.+)k/, "$1g"); // -k -> -g
result = result.replace (/(.+)K/, "$1G"); // -K -> -G
/* vowels */
/* add THRS tone mark */
if (XRegExp.exec(result, /([âêîôû]|\u0302)/i)){result += "<sup>ˊ</sup>";} // yinping tone
else if (XRegExp.exec(result, /([àèìòùǹ]|\u0300)/i)){result += "<sup>ˇ</sup>";} // yangping tone
else if (XRegExp.exec(result, /([áéíóúń]|\u0301)/i)){result += "<sup>ˋ</sup>";} // shang tone
else if (XRegExp.exec(result, /[aeiouṳ][^\u030D]*[bdg]/i)){result += "<sup>ˋ</sup>";} // yinru tone
/* convert ṳ to ii */
result = XRegExp.replace(result, /(.*)(ṳ|ṳ̂|ṳ̀|ṳ́|ṳ̍)(.*)/,"$1ii$3"); // ii
result = XRegExp.replace(result, /(.*)(Ṳ|Ṳ́|Ṳ̀|Ṳ̂|Ṳ̍)(.*)/,"$1II$3"); // II
/* removing PFS tone marks" */
result = XRegExp.replace(result, /(.*)([âàá]|a̍)(.*)/,"$1a$3"); // a
result = XRegExp.replace(result, /(.*)([ÂÀÁ]|A̍)(.*)/,"$1A$3"); // A
result = XRegExp.replace(result, /(.*)([éèê]|e̍)(.*)/,"$1e$3"); // e
result = XRegExp.replace(result, /(.*)([ÉÈÊ]|E̍)(.*)/,"$1E$3"); // E
result = XRegExp.replace(result, /(.*)([îìí]|i̍)(.*)/,"$1i$3"); // i
result = XRegExp.replace(result, /(.*)([ÎÌÍ]|I̍)(.*)/,"$1I$3"); // I
result = XRegExp.replace(result, /(.*)(m̂|m̀| ḿ|m̍)(.*)/,"$1m$3"); // m
result = XRegExp.replace(result, /(.*)(M̂|M̀| Ḿ|M̍)(.*)/,"$1M$3"); // M
result = XRegExp.replace(result, /(.*)(ǹ|n̂|ń|n̍)(.*)/,"$1n$3"); // n
result = XRegExp.replace(result, /(.*)(Ǹ|N̂|Ń|N̍)(.*)/,"$1N$3"); // N
result = XRegExp.replace(result, /(.*)(ô|ò|ó|o̍)(.*)/,"$1o$3"); // o
result = XRegExp.replace(result, /(.*)(Ô|Ò|Ó|O̍)(.*)/,"$1O$3"); // O
result = XRegExp.replace(result, /(.*)(û|ù|ú|u̍)(.*)/,"$1u$3"); // u
result = XRegExp.replace(result, /(.*)(Û|Ù|Ú|U̍)(.*)/,"$1U$3"); // U
/* oe -> ue */
result = XRegExp.replace(result, /(.*)oe(.*)/,"$1ue$2");
/* removing all the inherited tone numbers*/
result = XRegExp.replace(result, /(.*)(\u3000|\u0301|\u0302|\u030D)(.*)/i,"$1$3");
/* return the THRS */
return result;
}
function PFStoIPA(input_string){
input_string = input_string.toLowerCase(); // make all small caps
syllable_array = XRegExp.split(input_string, /([\s-]+)/)
for(var i = 0; i < syllable_array.length; i++){
/* add ipa tone letters */
// yinping tone
if( XRegExp.exec(syllable_array[i], /([âêîôû]|\u0302)/i)){
var last_syllable = (XRegExp.exec(syllable_array[i+1], /^\s+$/) || (i == syllable_array.length - 1));
// a syllable followed by yinqu, yangping and yinru doesn't trigger tone sandhi.
var tonesandhi_prohibited = (XRegExp.exec(syllable_array[i+2],
/([áéíóúń\u0300àèìòùǹ\u0301]|^[^\u030D]+[ptk]$)/i));
if (tonesandhi_prohibited || last_syllable)
{syllable_array[i] += "˨˦";} // orig. yinping tone
else
{syllable_array[i] += "˩˩";} // tonesandhi yinping tone
;}
// yangping tone
else if ( XRegExp.exec(syllable_array[i], /([àèìòùǹ]|\u0300)/i)){syllable_array[i] += "˩˩";}
// shang tone
else if (XRegExp.exec(syllable_array[i], /([áéíóúń]|\u0301)/i)){syllable_array[i] += "˧˩";}
// ru tone
else if (XRegExp.exec(syllable_array[i], /.+[ptk]$/i))
{
syllable_array[i] += "\u031A";
if (XRegExp.exec(syllable_array[i], /[\u030D]/gi ))
{syllable_array[i] += "˥"} // yangru
else
{syllable_array[i] += "˨"} // yinru
}
// yinqu tone
else if (XRegExp.exec(syllable_array[i], /\w+/i))
{syllable_array[i] += "˥˥"}
// don't process non-syllable characters and exit to continue processing another item of syllable_array, except hyphen to syllable-breaking "."
else if (XRegExp.exec(syllable_array[i], /[\-]/g)){syllable_array[i] = XRegExp.replace(syllable_array[i], /[\-]/g,".");}
else{continue;};
// delete orig tone mark
syllable_array[i] = XRegExp.replace(syllable_array[i], /(.*)([âàá]|a̍)(.*)/,"$1a$3");
syllable_array[i] = XRegExp.replace(syllable_array[i], /(.*)([éèê]|e̍)(.*)/,"$1e$3");
syllable_array[i] = XRegExp.replace(syllable_array[i], /(.*)([îìí]|i̍)(.*)/,"$1i$3"); // i
syllable_array[i] = XRegExp.replace(syllable_array[i], /(.*)(m̂|m̀| ḿ|m̍)(.*)/,"$1m$3"); // m
syllable_array[i] = XRegExp.replace(syllable_array[i], /(.*)(ǹ|n̂|ń|n̍)(.*)/,"$1n$3"); // n
syllable_array[i] = XRegExp.replace(syllable_array[i], /(.*)(ô|ò|ó|o̍)(.*)/,"$1o$3"); // o
syllable_array[i] = XRegExp.replace(syllable_array[i], /(.*)(û|ù|ú|u̍)(.*)/,"$1u$3"); // u
syllable_array[i] = XRegExp.replace(syllable_array[i], /(.*)(\u3000|\u0301|\u0302|\u030D)(.*)/i,"$1$3");
// consonants and some (semi)-vowels to ipa
syllable_array[i] = XRegExp.replace(syllable_array[i], /ch([i].*)/g,"t͡ɕ$1"); // ch(i) -> j
syllable_array[i] = XRegExp.replace(syllable_array[i], /chh([i].*)/g,"t͡ɕʰ$1"); // chh(i) -> q
syllable_array[i] = XRegExp.replace(syllable_array[i], /chh/g,"t͡sʰ"); //other chh
syllable_array[i] = XRegExp.replace(syllable_array[i], /ch/g,"t͡s"); //other ch
syllable_array[i] = XRegExp.replace(syllable_array[i], /s([i].*)/g,"ɕ$1"); // s(i) -> x
syllable_array[i] = XRegExp.replace(syllable_array[i], /(.+)h/g,"$1ʰ"); // aspired h
syllable_array[i] = XRegExp.replace(syllable_array[i], /ng([i].*)/g,"n̠ʲ$1"); // ng(i) -> pelatalized
syllable_array[i] = XRegExp.replace(syllable_array[i], /ng/g,"ŋ"); //other ng
syllable_array[i] = XRegExp.replace(syllable_array[i], /ṳ/g,"ɨ"); //ṳ to i
syllable_array[i] = XRegExp.replace(syllable_array[i], /y/g,"i"); //y to i
// add extra-short, unsyllable & syllable mark
// add syllabic mark to nasal syllabic consonants
if (XRegExp.exec(syllable_array[i], /^([^aeiouɨ]*[mnŋ])([˥˦˧˨˩])+$/)){
syllable_array[i] = XRegExp.replace(syllable_array[i], /^([^aeiouɨ]*[mnŋ])([˥˦˧˨˩])+$/,"$1\u0329$2");}
// add extra-short matk to monophthong of ru tone
else if (XRegExp.exec(syllable_array[i], /^([^aeiouɨ]*)([aeiouɨ])([^aeiouɨ]*)$/)){
if (XRegExp.exec(syllable_array[i], /\u031a/g)){
syllable_array[i] = XRegExp.replace(syllable_array[i], /^(.*)([aeiouɨ])(.*)$/,"$1$2\u0306$3");
}
}
// add unsyllable mark or extra-short mark to di(tri)phthong with a e o
else if (XRegExp.exec(syllable_array[i], /[aeo]/g)){
//unsyllabic mark to i & u
syllable_array[i] = XRegExp.replace(syllable_array[i], /([iu])/g,"$1\u032F")
// extra-short for ru tone
if (XRegExp.exec(syllable_array[i], /\u031a/g)){
syllable_array[i] = XRegExp.replace(syllable_array[i], /^(.*)([aeo])(.*)$/,"$1$2\u0306$3");
}
}
// for ui or iu, set the 1st vowel unsyllabic
else {
syllable_array[i] = XRegExp.replace(syllable_array[i], /([iu])([iu])/g,"$1\u032F$2")
// for ru tone
if (XRegExp.exec(syllable_array[i], /\u031A/g)){
syllable_array[i] = XRegExp.replace(syllable_array[i], /([iu])([t̚p̚k̚])/g,"$1\u0306$2");
}
}
syllable_array[i] = XRegExp.replace(syllable_array[i], /(i̯)([iĭ])/,"$2"); // yi -> i
}
return syllable_array.join("");
}
function mainFunction() {
// get content in the form "form1"
var form1 = document.forms["form1"];
// get content in the inputbox "phak_fa_su"
var phakfasu = form1.elements["phak_fa_su"].value;
// splitting phak_fa_su into words
var syllable_array = phakfasu.split(/[\s]+/);
/* splitting words into syllables
and caplitalization the 1st alphabet of every syllables
in all words of which the 1st alphabet is in upper case. */
// the pattern cutting syllable with hyphens
var cut_syllb_re = /[-]+/;
for (i=0; i<syllable_array.length; i++){
// cutting a multisyllabic word into syllables
if (syllable_array[i].match(cut_syllb_re)){
syllable_array[i] = syllable_array[i].split(cut_syllb_re);
for (j=0; j<syllable_array[i].length; j++){
if (syllable_array[i][0].match(/^[A-Z]/)){
// replace pfs to THRS
var syllable = syllable_array[i][j];
syllable = pfs_to_THRS(syllable);
/* if the first letter of a multisyllablc word is in upper
case, capitalize the 1st letter of the rest syllable(s)
of the word.*/
syllable_array[i][j] = syllable.charAt(0).toUpperCase() + syllable.slice(1);
}
else{
syllable_array[i][j] = pfs_to_THRS(syllable_array[i][j]);
}
}
}
else{
syllable_array[i] = pfs_to_THRS(syllable_array[i]);
}
}
// print the output
output = "";
for (i=0; i<syllable_array.length; i++){
if( Object.prototype.toString.call(syllable_array[i]) != '[object String]' ) {
for (j=0; j<syllable_array[i].length; j++){
output += (syllable_array[i][j] + " ");
}
}else{
output += (syllable_array[i] + " ");
}
}
output = output.substring(0, output.length - 1);
document.getElementById("ipa").innerHTML =PFStoIPA(phakfasu);;
document.getElementById("result").innerHTML = output;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment