Created
June 25, 2015 16:53
-
-
Save AccaliaDeElementia/08e0c2319239d8038742 to your computer and use it in GitHub Desktop.
Updated sanitize with new tests (SockBot/es6-dev)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function sanitize(text) { | |
var edited = text. | |
// Normalize newlines | |
replace(/\r\n?/g, '\n'). | |
// Remove low-ASCII control chars except \t (\x09) and \n (\x0a) | |
replace(/[\x00-\x08\x0b-\x1f]/g, ''). | |
// Remove GFM-fenced code blocks | |
replace(/^```.*\n(?:.*\n)*?```(?:\n|$)/gm, ''). | |
// Disable bbcode tags inside inline code blocks | |
replace(/`[^`\n]*`/g, code => code.replace(/\[/g, '[\x01')). | |
// Ease recognition of bbcode [quote] and | |
// [quote=whatever] start tags | |
replace(/\[quote(?:=[^[\]]*)?]/ig, '\x02$&'). | |
// Ease recognition of bbcode [/quote] end tags | |
replace(/\[\/quote]/ig, '$&\x03'); | |
// Repeatedly strip non-nested quoted blocks until | |
// no more remain; this removes nested blocks from | |
// the innermost outward. Leave markers in places | |
// where blocks were removed. | |
do { | |
text = edited; | |
edited = text.replace(/\x02[^\x02\x03]*\x03/g, '\x04'); | |
} while (edited != text); | |
return text. | |
// Remove any leftover unbalanced quoted text, | |
// treating places where blocks were removed | |
// as if they were the missing end tags | |
replace(/\x02[^\x04]*\x04/g, ''). | |
// Remove leftover recognition helpers | |
replace(/[\x01-\x04]/g, '') | |
} | |
function failures(func, testcases, failed, initial = '') { | |
return testcases.reduce( | |
function (result, t) { | |
var testname = t[0]; | |
var input = t[1]; | |
var expected = t[2]; | |
var actual = func(input); | |
if (actual !== expected) { | |
result += failed(testname, input, expected, actual); | |
} | |
return result; | |
}, | |
initial | |
); | |
} | |
document.getElementById('test').innerHTML = failures(sanitize, | |
[ | |
['Full quote empty', '[quote="accalia, post:108, topic:49440, full:true"][/quote]', ''], | |
['Full quote with text', '[quote="accalia, post:108, topic:49440, full:true"]\nthis is ' + | |
'a quote[/quote]', '' | |
], | |
['Full quote with multiline text', '[quote="accalia, post:108, topic:49440, full:true"]\r\n' + | |
'this is a quote\r\n\r\n[/quote]', '' | |
], | |
['Partial quote empty', '[quote="accalia, post:108, topic:49440"][/quote]', ''], | |
['Partial quote with text', '[quote="accalia, post:108, topic:49440"]this is a quote[/quote]', ''], | |
['Partial quote with multiline text', '[quote="accalia, post:108, topic:49440"]\n' + | |
'this is a quote [/quote]', '' | |
], | |
['Topic quote with multiline text', '[quote="accalia, post:108"]\nthis is a quote\n\r\n[/quote]', ''], | |
['Topic quote empty', '[quote="accalia, post:108"][/quote]', ''], | |
['Username quote empty', '[quote="accalia"][/quote]', ''], | |
['Username quote with text', '[quote="accalia"]this is a quote\r\n\r\n[/quote]', ''], | |
['Unquoted username quote empty', '[quote=accalia][/quote]', ''], | |
['Unquoted username quote', '[quote=accalia]this is a quote[/quote]', ''], | |
['Bare quote empty', '[quote][/quote]', ''], | |
['Bare quote', '[quote]\n\nthis is a quote[/quote]', ''], | |
['Only open quote', '[quote="accalia, post:108, topic:49440, full:true"]', | |
'[quote="accalia, post:108, topic:49440, full:true"]' | |
], | |
['Malformed open quote', '[quote="accalia, post:108, topic:49440, full:true"this is a quote[/quote]', | |
'[quote="accalia, post:108, topic:49440, full:true"this is a quote[/quote]' | |
], | |
['Malformed close quote', '[quote="accalia, post:108, topic:49440, full:true"]this is a quote/quote]', | |
'[quote="accalia, post:108, topic:49440, full:true"]this is a quote/quote]' | |
], | |
['Malformed close quote 2', '[quote="accalia, post:108, topic:49440"][/quote', | |
'[quote="accalia, post:108, topic:49440"][/quote' | |
], | |
['Malformed open quote 2', '[quote="accalia, post:108, topic:49440"]this is a quote [quote]', | |
'[quote="accalia, post:108, topic:49440"]this is a quote [quote]' | |
], | |
['Only close quote', '[/quote]', '[/quote]'], | |
['Malformed open quote 2', '[quoteaccalia, post:108"][/quote]', '[quoteaccalia, post:108"][/quote]'], | |
['Only a quote', '[quote=a]i am stripped[/quote]', ''], | |
['Embedded quote', 'this is\n[quote=accalia][/quote] \r\na quote', 'this is\n \na quote'], | |
['Nested quote simple', 'this[quote]nope[quote]no[/quote]nada[/quote] survives', 'this survives'], | |
['Unmatched quote block', 'i am not a [quote]', 'i am not a [quote]'], | |
['Unmatched quote in valid quote', 'this[quote]nope[quote]no[quote]nada[/quote] survives', | |
'this survives' | |
], | |
['Multiple quotes', '[quote="accalia, post:108, topic:49440, full:true"]\nthis is a quote[/quote] ' + | |
'inner words [quote="accalia, post:108, topic:49440, full:true"]\nthis is another quote[/quote]', | |
' inner words ' | |
], | |
['Multiple quotes 2', 'before words\n[quote="accalia, post:108, topic:49440, full:true"]\nthis is a ' + | |
'quote[/quote] inner words [quote="accalia, post:108, topic:49440, full:true"]\nthis is another ' + | |
'quote[/quote]\nafter words', | |
'before words\n inner words \nafter words' | |
], | |
['Only a GFM with embedded backticks', '```\n``\n```', ''], | |
['Empty Line in a GFM block', '```\n\n```', ''], | |
['Empty GFM block', '```\n```', ''], | |
['GFM with space for typehint', '``` \nfoo();\n```', ''], | |
['Only a GFM code block', '```\ncode\n```', ''], | |
['GFM with text before', 'before\n```\ncode\n```', 'before\n'], | |
['GFM with text before 2', 'before2\n```\ncode\n```\n', 'before2\n'], | |
['GFM with text between', '```\ncode\n```\nbetween\n```\ncode\n```', 'between\n'], | |
['GFM with text between 2', '```\ncode\n```\nbetween2\n```\ncode\n```\n', 'between2\n'], | |
['GFM with text after', '```\ncode\n```\nafter', 'after'], | |
['Only a type hinted GFM', '```type\ncode\n```', ''], | |
['GFM with unpaired tripletic', '```\ncode\n```\nnotcode\n```\n', 'notcode\n```\n'], | |
['GFM with before/after text', 'before\n```\n```\nafter', 'before\nafter'], | |
['Multiple GFM blocks', 'before\n```\n```\nmiddle\n```\nafter', 'before\nmiddle\n```\nafter'], | |
['Multiple GFM blocks 2', 'before\n```\n1\n```\nmiddle\n```\n2\n```\nafter', 'before\nmiddle\nafter'], | |
['GFM with trailing text', '```\n```test', '```\n```test'], | |
['GFM wit trailing space on close', '```\n``` ', '```\n``` '], | |
['GFM without line breaks', '```foo```', '```foo```'], | |
['GFM missing with close not on new line', '```\ntest();```', '```\ntest();```'], | |
['GFM with space before open', ' ```\ntest();\n```', ' ```\ntest();\n```'], | |
['Inline code block', 'this `is` code', 'this `is` code'], | |
['inline code block in quote', 'a[quote]`code`[/quote]b', 'ab'], | |
['Inline code with quote inside', '`[quote]a[/quote]`', '``'], | |
['Not an inline code with quote "inside"', '`\n[quote][/quote]\n`', '`\n\n`'], | |
['Newline Normalization - windows single', 'this is normal\r\n', 'this is normal\n'], | |
['Newline Normalization - windows multiple', 'this is normal\r\n\r\n', 'this is normal\n\n'], | |
['Newline Normalization - mac', 'this\ris normal\r', 'this\nis normal\n'], | |
['Newline normalization - mixed', 'this\ris\nnormal\r\n', 'this\nis\nnormal\n'], | |
['Newline Normalization - windows multiple 2', 'this\r\nis normal\r\n', 'this\nis normal\n'], | |
['One backtick around code', 'One backtick around `code`', 'One backtick around `code`'], | |
['Bare quote in inline code', '`[quote][/quote]`', '`[quote][/quote]`'], | |
['Bare quote with backtic', '`[quote][/quote]', ''], | |
['"inline" code on multiline', 'a `code\ncode` b', 'a `code\ncode` b'], | |
['multiline inline code', '`\ncode\n`', '`\ncode\n`'], | |
['multiline with aftertext', '`name\ncode`\ntext', '`name\ncode`\ntext'], | |
['double backtick inline code', '``inline code``', '``inline code``'], | |
['double backtick with quote', '``[quote][/quote]``', '``[quote][/quote]``'], | |
['inline code before quote', '``code``[quote][/quote]``', '``code``'], | |
['double backtick with embedded singletick', ' ``code with ` embedded``', ' ``code with ` embedded``'], | |
['double backtick with embedded singletick', ' `` ` [quote][/quote] ``', ' `` ` ``'], //may need newline | |
['multiline double backtick', 'before ``code\ncode2`` after', 'before ``code\ncode2`` after'], | |
['multiline double backtick with quote', 'before ``[quote]\n[/quote]`` after', | |
'before ``\n`` after' //may need newline | |
], | |
['multiline double backtick 2', 'before ``\ncode\ncode2\n`` after', 'before ``\ncode\ncode2\n`` after'], | |
['multiline double backtick with quote 2', 'before ``\n[quote]\n[/quote]\n`` after', | |
'before ``\n\n`` after' //may need newline | |
], | |
['multiline double backtick 3', 'before ``javascript\ncode\ncode2\n`` after', | |
'before ``javascript\ncode\ncode2\n`` after' | |
], | |
['multiline double backtick with quote 2', 'before ``javascript\n[quote]\n[/quote]\n`` after', | |
'before ``javascript\n\n`` after' //may need newline | |
], | |
['inline triple backtick', '```code```', '```code```'], | |
['inline triple backtick with quote', '```[quote][/quote]```', '``````'], //may need newline | |
['inline triple backtick with singletick', '```code with ` embedded```', '```code with ` embedded```'], | |
['inline triple backtick with doubletick', '```code with `` embedded```', '```code with `` embedded```'], | |
['inline triple backtick with linebreak', '```code with\nlinebreak```', '```code with\nlinebreak```'], | |
['inline triple backtick with singletick and quote', '```code with ` [quote][/quote]embedded```', | |
'```code with ` embedded```' | |
], | |
['inline triple backtick with doubletick and quote', '```code with `` [quote][/quote]embedded```', | |
'```code with `` embedded```' | |
], | |
['inline triple backtick with linebreak and quote', '```code with\n[quote][/quote]linebreak```', | |
'```code with\nlinebreak```' | |
], | |
['tripletick with embedded tripletick', '```\ncode```\n```', ''], | |
['language hinted tripletick with embedded tripletick', '```ruby\ncode```\n```', ''], | |
['quadruple tick', '````code````', '````code````'], | |
['quadruple tick with singletick', '````code ` code2````', '````code ` code2````'], | |
['quadruple tick with doubletick', '````code `` code2````', '````code `` code2````'], | |
['quadruple tick with tripletick', '````code ``` code2````', '````code ``` code2````'], | |
['quadruple tick with newline', '````code\ncode2````', '````code\ncode2````'], | |
['quadruple tick with quote', '````code````', '````code````'], | |
['quadruple tick with singletick and quote', '````code ` [quote][/quote]code2````', | |
'````code ` code2````' | |
], | |
['quadruple tick with doubletick and quote', '````code `` [quote][/quote]code2````', | |
'````code `` code2````' | |
], | |
['quadruple tick with tripletick and quote', '````code ``` [quote][/quote]code2````', | |
'````code ``` code2````' | |
], | |
['quadruple tick with newline and quote', '````code\n[quote][/quote]code2````', | |
'````code\ncode2````' | |
] | |
], | |
(testname, input, expected, instead) => | |
'<h2>' + testname + '</h2><br>' + | |
'<h3>Supplied:</h3><pre>' + input + '</pre><br>' + | |
'<h3>Expected:</h3><pre>' + expected + '</pre><br>' + | |
'<h3>Got instead:</h3><pre>' + instead + '</pre><br>' | |
) || '<h2>Passed</h2>'; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment