Last active
August 29, 2015 14:21
-
-
Save vkz/3454b2a0390fe4f55a07 to your computer and use it in GitHub Desktop.
String literal values in JS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Parse and serialize JS string-literals | |
var esprima = require("esprima"), | |
esgen = require("escodegen").generate; | |
// Esprima.parse differentiates between the string-literal (raw) and it's | |
// content (value) | |
esprima.parse('a = "привет"') | |
// => | |
// { type: 'Program', | |
// body: | |
// [ { type: 'ExpressionStatement', | |
// expression: | |
// { type: 'AssignmentExpression', | |
// operator: '=', | |
// left: { type: 'Identifier', name: 'a' }, | |
// right: { type: 'Literal', value: 'привет', raw: '"привет"' } } } ] } | |
// NOTE Part of bsjs.ometajs grammar that parses JS string-literals | |
// ometa BSJSParser { | |
// escapeChar = <'\\' char>:s -> { | |
// switch (s) { | |
// case '\\"': return '"'; | |
// case '\\\'': return '\''; | |
// case '\\n': return '\n'; | |
// case '\\r': return '\r'; | |
// case '\\t': return '\t'; | |
// case '\\b': return '\b'; | |
// case '\\f': return '\f'; | |
// case '\\\\': return '\\'; | |
// default: return s.charAt(1); | |
// } | |
// } | |
// | <'\\' ('u' hexDigit hexDigit hexDigit hexDigit | |
// | 'x' hexDigit hexDigit)>:s -> { | |
// JSON.parse('"' + s + '"') | |
// }, | |
// str = /'([^'\\]|\\.)*'/:s -> { | |
// function swap(quote) { | |
// return quote === '"' ? '\'' : '"'; | |
// } | |
// return [ | |
// #string, | |
// JSON.parse(preparseString(s.replace(/["']/g, swap))) | |
// .replace(/["']/g, swap) | |
// ]; | |
// } | |
// | /"([^"\\]|\\.)*"/:s -> [#string, JSON.parse(preparseString(s))] | |
// } | |
// NOTE Some string normalisation technique stolen from Indutny's bsjs.ometajs | |
// grammar | |
function preparseString(str) { | |
return str.replace(/\\x([0-9a-f]{2})/ig, '\\u00$1') | |
.replace(/\\u([0-9a-f]{4})/ig, '\\u$1') | |
.replace(/\\([0-9]{3})/ig, function(all, num) { | |
var str = parseInt(num, 8).toString('hex'); | |
while (str.length < 4) str = '0' + str; | |
return '\\u' + str; | |
}) | |
.replace(/\\([^xubfnOrtv'"\\])|(\\.)/ig, function(all, m1, m2) { | |
return m1 || m2 || ''; | |
}); | |
} | |
// NOTE Given s - content of some string literal i.e. stuff between delimiters | |
// (string value as opposed to raw string (in Esprima parlor)), how do you | |
// return its raw representation wrapped in ' or " quotes so that it can be | |
// concatenated to the output code-string? Note that the content should remain | |
// unaltered in the sense that the input_string_literal === | |
// output_string_literal. | |
function repQ(s, q) { | |
return (q === 'double')? | |
JSON.stringify(s): | |
JSON.stringify(s).replace(/[']/g, "\\'").replace(/^"|"$/g, "\'") | |
} | |
// values parsed from string-literals (aka their content) | |
var stringContents = [ | |
"{'*.mtproxy1.yandex.net': '', '*.mtproxy2.yandex.net': '' }", | |
"{\"*.mtproxy1.yandex.net\": \"\", \"*.mtproxy2.yandex.net\": \"\" }", | |
'{"*.mtproxy1.yandex.net": "", "*.mtproxy2.yandex.net": "" }', | |
'{\'*.mtproxy1.yandex.net\': \'\', \'*.mtproxy2.yandex.net\': \'\' }' | |
]; | |
// Re-wrap every string-content in single quotes leaving any quotes inside | |
// untouched | |
// JSON.stringify: 'str' -> '"str"' or "str" -> '"str"' | |
// JSON.parse: '"str"' -> 'str' | |
stringContents = stringContents | |
.map(JSON.stringify) | |
.map(JSON.parse); | |
// NOTE serialize string literals into code-strings single- and double- quoted | |
// respective | |
var resultSingleQuotes = stringContents.map(repQ); | |
console.log("resultSingleQuotes = ", resultSingleQuotes); | |
var resultDoubleQuotes = stringContents.map(function (s) { return repQ(s, 'double'); }); | |
console.log("resultDoubleQuotes = ", resultDoubleQuotes); | |
// NOTE also see how JSON.stringify deals with strings | |
// `http://docs.sencha.com/touch/1.1.1/source/JSON.html' | |
var cx = /[\u0000\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g, | |
escapable = /[\\\"\x00-\x1f\x7f-\x9f\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g, | |
gap, | |
indent, | |
meta = { // table of character substitutions | |
'\b': '\\b', | |
'\t': '\\t', | |
'\n': '\\n', | |
'\f': '\\f', | |
'\r': '\\r', | |
'"' : '\\"', | |
'\\': '\\\\' | |
}, | |
rep; | |
function quote(string) { | |
// If the string contains no control characters, no quote characters, and no | |
// backslash characters, then we can safely slap some quotes around it. | |
// Otherwise we must also replace the offending characters with safe escape | |
// sequences. | |
escapable.lastIndex = 0; | |
return escapable.test(string) ? | |
'"' + string.replace(escapable, function (a) { | |
var c = meta[a]; | |
return typeof c === 'string' ? c : | |
'\\u' + ('0000' + a.charCodeAt(0).toString(16)).slice(-4); | |
}) + '"' : | |
'"' + string + '"'; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Matching Unicode blocks http://kourge.net/projects/regexp-unicode-block