-
-
Save cshanejennings/2462a2c516be315400de04766b1ab7b0 to your computer and use it in GitHub Desktop.
const get_string_sanitizer = (user_maps = {}, user_allowed ) => { | |
const build_map = (map_str) => map_str.split(',').map(c => c.split('|')).reduce((dm, [c, r])=> c.split('').reduce((dm, c) => ({...dm, [c]: r.replace('\\u002C', ',')}), {...dm}), {}) | |
const map_strings = { | |
diacritics: '⁰₀⓪⓿0|0,¹₁①⓵❶➀➊1|1,²₂②⓶❷➁➋2|2,³₃③⓷❸➂➌3|3,⁴₄④⓸❹➃➍4|4,⁵₅⑤⓹❺➄➎5|5,⁶₆⑥⓺❻➅➏6|6,⁷₇⑦⓻❼➆➐7|7,⁸₈⑧⓼❽➇➑8|8,⁹₉⑨⓽❾➈➒9|9,⑩⓾❿➉➓|10,⑪⓫|11,⑫⓬|12,⑬⓭|13,⑭⓮|14,⑮⓯|15,⑯⓰|16,⑰⓱|17,⑱⓲|18,⑲⓳|19,⑳⓴|20,ΑÀÁÂÃÄÅĀĂĄƏǍǞǠǺȀȂȦȺᴀḀẠẢẤẦẨẪẬẮẰẲẴẶⒶA|A,ÆǢǼᴁ|AE,ΒƁƂɃʙᴃḂḄḆⒷB|B,ÇĆĈĊČƇȻʗᴄḈⒸC|C,ÈΕÉÊËĒĔĖĘĚƎƐȄȆȨɆᴇḔḖḘḚḜẸẺẼẾỀỂỄỆⒺⱻE|E,ÌÍÎÏĨĪĬĮİƖƗǏȈȊɪᵻḬḮỈỊⒾꟾIΙ|I,ÐĎĐƉƊƋᴅᴆḊḌḎḐḒⒹꝹD|D,ÑŃŅŇŊƝǸȠɴᴎṄṆṈṊⓃNΝ|N,ÒÓÔÕÖØŌŎŐƆƟƠǑǪǬǾȌȎȪȬȮȰᴏᴐṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢⓄꝊꝌOΟ|O,ÙÚÛÜŨŪŬŮŰŲƯǓǕǗǙǛȔȖɄᴜᵾṲṴṶṸṺỤỦỨỪỬỮỰⓊU|U,ÝŶŸƳȲɎʏẎỲỴỶỸỾⓎYΥ|Y,ÞꝦ|TH,ß|ss,àáâãäåāăąǎǟǡǻȁȃȧɐəɚᶏᶕḁẚạảấầẩẫậắằẳẵặₐₔⓐⱥⱯa|a,æǣǽᴂ|ae,çćĉċčƈȼɕḉↄⓒꜾꜿc|c,èéêëēĕėęěǝȅȇȩɇɘɛɜɝɞʚᴈᶒᶓᶔḕḗḙḛḝẹẻẽếềểễệₑⓔⱸe|e,ìíîïĩīĭįıǐȉȋɨᴉᵢᵼᶖḭḯỉịⁱⓘi|i,ðďđƌȡɖɗᵭᶁᶑḋḍḏḑḓⓓꝺd|d,ñńņňʼnŋƞǹȵɲɳᵰᶇṅṇṉṋⁿⓝn|n,òóôõöøōŏőơǒǫǭǿȍȏȫȭȯȱɔɵᴖᴗᶗṍṏṑṓọỏốồổỗộớờởỡợₒⓞⱺꝋꝍo|o,ùúûüũūŭůűųưǔǖǘǚǜȕȗʉᵤᶙṳṵṷṹṻụủứừửữựⓤu|u,ýÿŷƴȳɏʎẏẙỳỵỷỹỿⓨy|y,þᵺꝧ|th,ĜĞĠĢƓǤǥǦǧǴɢʛḠⒼꝽꝾG|G,ĝğġģǵɠɡᵷᵹᶃḡⓖꝿg|g,ĤĦȞʜḢḤḦḨḪⒽⱧⱵHΗ|H,ĥħȟɥɦʮʯḣḥḧḩḫẖⓗⱨⱶh|h,IJ|IJ,ij|ij,ĴɈᴊⒿJ|J,ĵǰȷɉɟʄʝⓙⱼj|j,ĶƘǨᴋḰḲḴⓀⱩꝀꝂꝄKΚ|K,ķƙǩʞᶄḱḳḵⓚⱪꝁꝃꝅk|k,ĸɋʠⓠꝗꝙq|q,ĹĻĽĿŁȽʟᴌḶḸḺḼⓁⱠⱢꝆꝈꞀL|L,ĺļľŀłƚȴɫɬɭᶅḷḹḻḽⓛⱡꝇꝉꞁl|l,Œɶ|OE,œᴔ|oe,ŔŖŘȐȒɌʀʁᴙᴚṘṚṜṞⓇⱤꝚꞂR|R,ŕŗřȑȓɍɼɽɾɿᵣᵲᵳᶉṙṛṝṟⓡꝛꞃr|r,ŚŜŞŠȘṠṢṤṦṨⓈꜱꞅS|S,śŝşšſșȿʂᵴᶊṡṣṥṧṩẜẝⓢꞄs|s,ŢŤŦƬƮȚȾᴛṪṬṮṰⓉꞆTΤ|T,ţťŧƫƭțȶʇʈᵵṫṭṯṱẗⓣⱦt|t,ŴǷᴡẀẂẄẆẈⓌⱲW|W,ŵƿʍẁẃẅẇẉẘⓦⱳw|w,ŹŻŽƵȜȤᴢẐẒẔⓏⱫꝢZΖ|Z,źżžƶȝȥɀʐʑᵶᶎẑẓẕⓩⱬꝣz|z,ƀƃɓᵬᶀḃḅḇⓑb|b,ƑḞⒻꜰꝻꟻF|F,ƒᵮᶂḟẛⓕꝼf|f,ƕ|hv,ƜᴍḾṀṂⓂⱮꟽꟿMΜ|M,ƤᴘṔṖⓅⱣꝐꝒꝔPΡ|P,ƥᵱᵽᶈṕṗⓟꝑꝓꝕꟼp|p,ƲɅᴠṼṾỼⓋꝞꝨV|V,DŽDZ|DZ,DžDz|Dz,dždzʣʥ|dz,LJ|LJ,Lj|Lj,lj|lj,NJ|NJ,Nj|Nj,nj|nj,Ƕ|HV,Ȣᴕ|OU,ȣ|ou,ȸ|db,ȹ|qp,ɊⓆꝖꝘQ|Q,ɯɰɱᵯᶆḿṁṃⓜm|m,ʋʌᵥᶌṽṿⓥⱱⱴꝟv|v,ʦ|ts,ʨ|tc,ʪ|ls,ʫ|lz,ᵫ|ue,ᶍẋẍₓⓧx|x,ẊẌⓍXΧ|X,ẞ|SS,Ỻ|LL,ỻ|ll,⑴|(1),⑵|(2),⑶|(3),⑷|(4),⑸|(5),⑹|(6),⑺|(7),⑻|(8),⑼|(9),⑽|(10),⑾|(11),⑿|(12),⒀|(13),⒁|(14),⒂|(15),⒃|(16),⒄|(17),⒅|(18),⒆|(19),⒇|(20),⒈|1.,⒉|2.,⒊|3.,⒋|4.,⒌|5.,⒍|6.,⒎|7.,⒏|8.,⒐|9.,⒑|10.,⒒|11.,⒓|12.,⒔|13.,⒕|14.,⒖|15.,⒗|16.,⒘|17.,⒙|18.,⒚|19.,⒛|20.,⒜|(a),⒝|(b),⒞|(c),⒟|(d),⒠|(e),⒡|(f),⒢|(g),⒣|(h),⒤|(i),⒥|(j),⒦|(k),⒧|(l),⒨|(m),⒩|(n),⒪|(o),⒫|(p),⒬|(q),⒭|(r),⒮|(s),⒯|(t),⒰|(u),⒱|(v),⒲|(w),⒳|(x),⒴|(y),⒵|(z),Ꜩ|TZ,ꜩ|tz,Ꜳ|AA,ꜳ|aa,Ꜵ|AO,ꜵ|ao,Ꜷ|AU,ꜷ|au,ꜸꜺ|AV,ꜹꜻ|av,Ꜽ|AY,ꜽ|ay,Ꝏ|OO,ꝏ|oo,Ꝡ|VY,ꝡ|vy,ff|ff,fi|fi,fl|fl,ffi|ffi,ffl|ffl,st|st', | |
alt: '«»“”„″‶❝❞❮❯"|",‐‑‒–—⁻₋-|-,‘’‚‛′‵‹›❛´❜'|\u0027,‸^|^,‼|!!,⁄/|/,⁅❲[|[,⁆❳]|],⁇|??,⁈|?!,⁉|!?,⁎*|*,⁏;|;,⁒%|%,⁓~˜|~,⁺₊+|+,⁼₌=|=,⁽₍❨❪(⟨|(,⁾₎❩❫)⟩|),❬❰<|<,❭❱>|>,❴{|{,❵}|},⸨|((,⸩|)),!|!,#|#,$|$,&|&,,|\\u002C,¸.|.,:|:,?|?,@|@,\|\u005C,_|_', | |
}; | |
const allowed_strings = [ | |
'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~', // querty | |
'¢£¤¥¦§©¬®°±µ·º¼½¾×÷ΓΔΘΛΞΠΣΦΨΩαβθπφ†‡•…‰€™←↑→↓↔⇐⇒⇔−∗∝∼≅≈≠≡≤≥⋅◊', // other alt characters | |
] | |
const allowed_map = (user_allowed && user_allowed.length) | |
? user_allowed.split('').reduce((map, s) => ({ ...map, [s]: s }), {}) | |
: allowed_strings.flat().join('').split('').reduce((map, s) => ({ ...map, [s]: s }), {}); | |
const maps = { ...Object.keys(map_strings).reduce((maps, key) => ({...maps, [key]: build_map(map_strings[key])}), {}), ...user_maps }; | |
const api = (string) => { return { | |
string, | |
diacritics: () => api(string.replace(/[^u0000-u007E]/g, (a) => (maps.diacritics[a] || a))), | |
alt: () => api(string.replace(/[^u0000-u007E]/g, (a) => (maps.alt[a] || a))), | |
include: () => api(string.replace(/[^u0000-u007E]/g, (a) => (allowed_map[a]) ? allowed_map[a] : (a.match(/[\w\s]+/)) ? a : '')), | |
} } | |
return api; | |
} |
This could possibly be replaced in many circumstances and simplified using string.normalize , which takes the following "modes" demonstrated below where const junk = "ŔŖŘȐȒɌʀʁᴙᴚṘṚṜṞⓇⱤꝚꞂR";
NFC -- Canonical Decomposition, followed by Canonical Composition.
junk.normalize('NFC').replace(/[\u0300-\u036f]/g, ''; // ŔŖŘȐȒɌʀʁᴙᴚṘṚṜṞⓇⱤꝚꞂR none replaced
NFKC -- Compatibility Decomposition, followed by Canonical Composition.
junk.normalize('NFKC').replace(/[\u0300-\u036f]/g, ''; // ŔŖŘȐȒɌʀʁᴙᴚṘṚṜṞRⱤꝚꞂR 2 replaced, where ŔŖŘȐȒɌʀʁᴙᴚṘṚṜṞⱤꝚꞂ are unmatched
NFD -- Canonical Decomposition.
junk.normalize('NFD').replace(/[\u0300-\u036f]/g, ''; // RRRRRɌʀʁᴙᴚRRRRⓇⱤꝚꞂR 9 replaced, where ɌʀʁᴙᴚⓇⱤꝚꞂR are unmatched
NFKD -- Compatibility Decomposition.
junk.normalize('NFKD').replace(/[\u0300-\u036f]/g, ''; // RRRRRɌʀʁᴙᴚRRRRRⱤꝚꞂR with 8 replaced, where ɌʀʁᴙᴚⱤꝚꞂ are unmatched
It looks like there are many attempts in this stackoverflow post, where there may be a better solution for the diacritics portion
Example:
map_strings is an object of strings in the format xxxxxxxx|y,xx|y,xx|y , is a comma delimited set of key-value pairs (key-value pairs are pipe delimited) where 'x' is an array of the character(s) to replace and 'y' is the character to replace them with they are used to produce the character maps while keeping the function size as small as possible
allowed_strings is a string composed of the allowed character set (I'm using a joined array to visually segregate the characters even though they have identical behavior)