Skip to content

Instantly share code, notes, and snippets.

@bitifet
Created September 17, 2019 08:44
Show Gist options
  • Save bitifet/6c8269c6c1fe697bcb8690873d20d7f3 to your computer and use it in GitHub Desktop.
Save bitifet/6c8269c6c1fe697bcb8690873d20d7f3 to your computer and use it in GitHub Desktop.
Regular expression translation
Translate well known filesystem-like wilcards ('?' for single character and '*' for 0 or more) to:
a) Suitable SQL pattern (with '_' and '%' instead) for 'like' and 'ilike' comparsion.
b) Suitable regular expression.
In both cases whithout breaking anything. That is, exitent meaningful ('%', and '_' for DB
and '.', '+', '[', etc... for regular expressions) should be properly escaped.
Also:
* Incoming '*' and '?' must be able to be escaped with backslash.
* In case of regular expression output, other backspaces should be escaped.
const regexp = {
allWilcards: /(.?[*?%_])/g,
recognizedWilcards: /[*?]/,
};
module.exports = {
sqlWilcards: (function(){//{{{
function transpile (w){//{{{
var s = "";
if (w.length >= 2) { // Wilcard not at string begining.
if (w[0] == "\\") { // Escaped.
if (w[1] == "%" || w[1] == "_") { // Would mean "\%" or "\_" literal.
return "\\\\\\"+w[1]; // Escape both.
} else { // Escaping '*' or '?'.
return w[1]; // Drop escaping charecters because they aren't actual wilcards.
};
// Never gets here.
} else { // Any other preceding character.
if (w[0] == "%" || w[0] == "_") s+="\\"; // Escape if needed.
s+=w[0]; // Output as is.
};
w=w[1]; // Drop processed preffix.
};
// Wilcard is first character or any possible escapping have been handled.
switch(w){
case "%":
case "_":
s+="\\"+w; // Escape it.
break;
case "*":
s+="%";
break;
case "?":
s+="_";
break;
};
return s;
};//}}}
return function(str){
var out = str
.replace(/\*+/g, '*') // Sanityze multiple consecutive '*'.
.replace(regexp.allWilcards, transpile)
;
return out;
};
})(),//}}}
hasSqlWilcards: function(str) {//{{{
return !! str.match(regexp.recognizedWilcards);
},//}}}
};
const wSpecs = {
sql: {
'\\*': '%',
'\\?': '_',
},
regex: {
'\\*': '\\.\\*',
'\\?': '\\.',
},
};
function transpiler(spc) {
const allBits = [
...Object.keys(spc).filter(x=>x),
...Object.values(spc),
// Other characters to be escaped:
...["\\+", "\\?", "\\+", "\\|", "\\\\"],
];
const re = new RegExp(
"((?:\\\\)*)"+"("+allBits.join("|")+")"
, "g"
)
console.log(re);
return function(txt) {
console.log(txt);
txt.replace(re, function (foo, slashes, patt) {
let output = "";
const sl = Math.floor(slashes.length / 2);
const esc = slashes.length % 2;
output += Array(sl+1).join("\\\\\\\\"); // Escape leading slashes.
console.log(slashes.length, sl, esc, patt, output)
});
};
};
transpiler(wSpecs.sql)("Hello * world. % percentage and \\escaped \\\\\\\\\\\\_ underscore\\.");
console.log("----------------");
transpiler(wSpecs.regex)("Hello * world. % percentage and \\escaped \\\\_ underscore\\.");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment