Last active
April 11, 2018 11:29
-
-
Save beauterre/2d4e64329bc72d0f9f77f03b4c030a57 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//(copy freely, but include credits) | |
//whitelisted htmlEntities in javascript by Hjalmar Snoep | |
function htmlEntities(str) { | |
var whitelist="<=>©€£×÷−═‘’“”“”„¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"; | |
for(var x=str.length-1; x>=0; x--) | |
{ | |
if(whitelist.indexOf(str[x])!=-1) | |
{ | |
str=str.substr(0,x)+'&#'+str.charCodeAt(x)+';'+str.substr(x); | |
} | |
} | |
str=str.replace(/[^ -;a-zA-Z?@]/g,""); | |
return str; | |
} | |
*/ 432 bytes, (0.42 Kb) of overhead including the credits and an extensive whitelist :) ./* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var whitelist="<=>©€£×÷−═‘’“”“”„¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"; // allow just about anything, but is still safe | |
var whitelist="©€£×÷−═‘’“”“”„¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"; // instant feedback to hackers | |
var whitelist="¿ñüáéíóúÑÁÉÓÚ"; // Spanish, you can save a few bytes, if you know what language to include. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!doctype HTML> | |
<html> | |
<head><title>DIACRITICS SAFE WHITELISTING</title><style>body{font-family: sans-serif;padding: 25px; line-height: 1.3em;}</style></head> | |
<body> | |
<h1>Foreign input test.</h1> | |
<hr> | |
<div style="border: 1px solid #ccc; margin: 15px; padding: 15px;"><label style="color: #ccc; font-size: 0.8em; position: relative; top: -25px; background-color: #fff;">Anything you type below, will appear here, but stripped of anything harmful</label> | |
<div id="safe_preview"></div> </div> | |
Type here: <br> | |
<textarea style="width: 100%" id="input"></textarea> | |
<hr> | |
<h2>Test phrases</h2> | |
Click these test phrases if you are lazy like me: | |
<ul> | |
<li>Diacritics from Greek diakritikós, ("distinguishing"), from diakrī́nō, ("to distinguish"). Means acutes ( ´ ) and graves ( ` ) and words like naïve, Noël, saké, breathèd, façade, là.</li> | |
<li>ALLOWS ENGLISH: I'm happy to see you include the '</li> | |
<li>ALLOWS GERMAN: tschüs oder Tschüs, tschüss oder Tschüss ("Abschiedsgruß")</li> | |
<li>ALLOWS FRENCH: Garçon, j’ai une première édition de ‘À la recherche du temps perdu.’ Tu veux le voir ?</li> | |
<li>ALLOWS DANISH: Enhver har ret til undervisning. Undervisningen skal være gratis, i det mindste på de elementære og grundlæggende trin.</li> | |
<li>ALLOWS ICELANDIC: Bað, Átvagl, Ísland,Góður, Þór, Tækifæri, Röð.</li> | |
<li>ALLOWS CURRENCIES: € $ ¥ ฿ ¢ Ð E£ £ ₱</li> | |
<li>ALLOWS NICE SUMS: 5÷ of ( 7 × 8 ) + ( 7 - 2) ═ ? </li> | |
<li>PREVENTS: <strong>HTML MARKUP</strong></li> | |
<li>PREVENTS:<script>window.alert("you are hacked")</script></li> | |
<li>Other stuff to test : ♤ ♧ ♥ ♢ ← ↑ → ↓ ↔ « » ‹ › ◊ ¡ ¿ € £ ¤ ¥ ¢ ‰ ¶ “ ” „ ¦ ‡ † § © ™ ® ¹ ² ³ ¼ ½ ¾ · • ª º ¨ × ÷ − √ ∞ ∩ ∫ ± ¬ ~ ≈ ≠ ≡ ◊ ø Ø ≤ ≥ Δ Ω α β π µ ð ∂ ∏ ∑ ƒ. </li> | |
</ul> | |
<code><pre id="showCode" style="background: #ccc; color: #888; margin: 25px;padding: 25px;"></pre></code> | |
<h3>GENERAL INFORMATION</h3> | |
<ol> | |
<li>accepts basic ASCII, punctuation and anything in whitelist</li> | |
<li>accepts capitals and lowercase, numbers</li> | |
<li>accepted range of (normal) exotic characters = !"#$%&'()*+,-./:;?@ </li> | |
<li>accepted range of diacritics = ¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ</li> | |
<li>whitelisted characters will be encoded as html-entities like: &#000;</li> | |
<li>There is almost little visual distinction in most fonts between curly start quote ‘ and back-tick `, | |
but there is in ascii, allowed here are curly start quotes, which have no meaning in programming languages.. | |
Backticks can be VERY dangerous, so please don't whitelist them here.</li> | |
<li>Similarly ═ can be used in stead of =. They look the same, but NOT in ascii!</li> | |
<li>Does NOT accept < and > or =, which makes it rather hard to put any kind of code in there :)</li> | |
<li>It does allow for most western-european languages, some meditaranean and eastern european.</li> | |
<li>str=str.replace(/[^ -;a-zA-Z?@]/g,"");<br>MEANS: anything left that wasn't whitelisted or is run of the mill?, strip it! </li> | |
</ol> | |
<script> | |
// whitelisted htmlEntities in javascript by Hjalmar Snoep. | |
function htmlEntities(str) { | |
var whitelist="<=>©€£×÷−═‘’“”“”„¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"; | |
for(var x=str.length-1; x>=0; x--){ | |
if(whitelist.indexOf(str[x])!=-1) | |
{ | |
var code=str.charCodeAt(x); | |
str=str.substr(0,x)+'&#'+code+';'+str.substr(x); | |
} | |
} | |
str=str.replace(/[^ -;a-zA-Z?@]/g,""); | |
return str; | |
} | |
// demo | |
window.addEventListener("load",function(){ | |
var input=document.getElementById("input"); | |
input.addEventListener("keyup",function() | |
{ | |
show.innerHTML=htmlEntities(input.value); | |
}) | |
var show=document.getElementById("safe_preview"); | |
var li=document.getElementsByTagName("li"); | |
var showCode=document.getElementById("showCode"); | |
var code="//(copy freely, but include credits)\n//whitelisted htmlEntities in javascript by Hjalmar Snoep \n"+htmlEntities+""; | |
showCode.innerHTML=code+"\n"+(code.length)+" bytes, ("+(Math.round((code.length/1024)*100)/100)+" Kb) of overhead."; | |
for(var i=0;i<li.length;i++) | |
{ | |
li[i].style.cursor="pointer"; | |
li[i].addEventListener("click",showPhrase); | |
} | |
function showPhrase(e) | |
{ | |
input.value=e.currentTarget.innerHTML.split("<").join("<").split(">").join(">"); | |
show.innerHTML=htmlEntities(input.value); | |
} | |
}); | |
</script> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment