Created
May 30, 2018 10:26
-
-
Save daniyel/bc73687c45bf293c1cb7bd94ee2f0a1e to your computer and use it in GitHub Desktop.
Helper class for converting text from CP-1251 to UTF-8 encoding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @class TextCoding | |
* | |
* @constructor | |
*/ | |
function TextCoding() {} | |
/** | |
* Replaces CP-1251 encodings in string with UTF-8 | |
* | |
* @param {string} text CP-1251 encoded string. | |
* @returns {string} UTF-8 encoded string. | |
*/ | |
TextCoding.prototype.convertFromCp1251 = function (text) { | |
let result = ''; | |
if (typeof text !== 'string') { | |
return text; | |
} | |
const textLength = text.length; | |
for (let idx = 0; idx < textLength; idx += 1) { | |
let char = text[idx]; | |
let cpChar = ''; | |
if (char === '%' && this.isHexChar(text[idx + 1]) && this.isHexChar(text[idx + 2])) { | |
cpChar = char + text[idx + 1] + text[idx + 2]; | |
if (this.CP_1252[cpChar]) { | |
result += this.CP_1252[cpChar]; | |
idx += 2; | |
} else { | |
result += char; | |
} | |
} else { | |
result += char; | |
} | |
} | |
return result; | |
}; | |
/** | |
* Checks if character is valid HEX | |
* | |
* @param {string} char Character in string. | |
* @returns {boolean} True if character is valid HEX, false otherwise. | |
*/ | |
TextCoding.prototype.isHexChar = function (char) { | |
if (typeof char !== 'string' || char.length !== 1) { | |
return false; | |
} | |
const upperChar = char.toUpperCase(); | |
if ((upperChar.charCodeAt(0) >= 48 && upperChar.charCodeAt(0) <= 57) || (upperChar.charCodeAt(0) >= 65 && upperChar.charCodeAt(0) <= 70)) { | |
return true; | |
} | |
return false; | |
}; | |
/** | |
* @returns {Object} Key/value pair of CP-1251 to UTF-8 encoding. | |
*/ | |
TextCoding.prototype.CP_1252 = { | |
'%80': '%E2%82%AC', // ` | |
'%82': '%E2%80%9A', // ‚ | |
'%8A': '%C5%A0', // Š | |
'%8C': '%C5%92', // Œ | |
'%8E': '%C5%BD', // Ž | |
'%96': '%E2%80%93', // – | |
'%97': '%E2%80%94', // — | |
'%9A': '%C5%A1', // š | |
'%9C': '%C5%93', // œ | |
'%9E': '%C5%BE', // ž | |
'%9F': '%C5%B8', // Ÿ | |
'%B4': '%C2%B4', // ´ | |
'%C0': '%C3%80', // À | |
'%C1': '%C3%81', // Á | |
'%C2': '%C3%82', // Â | |
'%C3': '%C3%83', // Ã | |
'%C4': '%C3%84', // Ä | |
'%C5': '%C3%85', // Å | |
'%C6': '%C3%86', // Æ | |
'%C7': '%C3%87', // Ç | |
'%C8': '%C3%88', // È | |
'%C9': '%C3%89', // É | |
'%CA': '%C3%8A', // Ê | |
'%CB': '%C3%8B', // Ë | |
'%CC': '%C3%8C', // Ì | |
'%CD': '%C3%8D', // Í | |
'%CE': '%C3%8E', // Î | |
'%CF': '%C3%8F', // Ï | |
'%D0': '%C3%90', // Ð | |
'%D1': '%C3%91', // Ñ | |
'%D2': '%C3%92', // Ò | |
'%D3': '%C3%93', // Ó | |
'%D4': '%C3%94', // Ô | |
'%D5': '%C3%95', // Õ | |
'%D6': '%C3%96', // Ö | |
'%D8': '%C3%98', // Ø | |
'%D9': '%C3%99', // Ù | |
'%DA': '%C3%9A', // Ú | |
'%DB': '%C3%9B', // Û | |
'%DC': '%C3%9C', // Ü | |
'%DD': '%C3%9D', // Ý | |
'%DF': '%C3%9F', // ß | |
'%E0': '%C3%A0', // à | |
'%E1': '%C3%A1', // á | |
'%E2': '%C3%A2', // â | |
'%E3': '%C3%A3', // ã | |
'%E4': '%C3%A4', // ä | |
'%E5': '%C3%A5', // å | |
'%E6': '%C3%A6', // æ | |
'%E7': '%C3%A7', // ç | |
'%E8': '%C3%A8', // è | |
'%E9': '%C3%A9', // é | |
'%EA': '%C3%AA', // ê | |
'%EB': '%C3%AB', // ë | |
'%EC': '%C3%AC', // ì | |
'%ED': '%C3%AD', // í | |
'%EE': '%C3%AE', // î | |
'%EF': '%C3%AF', // ï | |
'%F0': '%C3%B0', // ð | |
'%F1': '%C3%B1', // ñ | |
'%F2': '%C3%B2', // ò | |
'%F3': '%C3%B3', // ó | |
'%F4': '%C3%B4', // ô | |
'%F5': '%C3%B5', // õ | |
'%F6': '%C3%B6', // ö | |
'%F8': '%C3%B8', // ø | |
'%F9': '%C3%B9', // ù | |
'%FA': '%C3%BA', // ú | |
'%FB': '%C3%BB', // û | |
'%FC': '%C3%BC', // ü | |
'%FD': '%C3%BD', // ý | |
'%FE': '%C3%BE', // þ | |
'%FF': '%C3%BF' // ÿ | |
}; | |
module.exports = new TextCoding(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment