Skip to content

Instantly share code, notes, and snippets.

@AccaliaDeElementia
Created June 25, 2015 16:53
Show Gist options
  • Save AccaliaDeElementia/08e0c2319239d8038742 to your computer and use it in GitHub Desktop.
Save AccaliaDeElementia/08e0c2319239d8038742 to your computer and use it in GitHub Desktop.
Updated sanitize with new tests (SockBot/es6-dev)
function sanitize(text) {
var edited = text.
// Normalize newlines
replace(/\r\n?/g, '\n').
// Remove low-ASCII control chars except \t (\x09) and \n (\x0a)
replace(/[\x00-\x08\x0b-\x1f]/g, '').
// Remove GFM-fenced code blocks
replace(/^```.*\n(?:.*\n)*?```(?:\n|$)/gm, '').
// Disable bbcode tags inside inline code blocks
replace(/`[^`\n]*`/g, code => code.replace(/\[/g, '[\x01')).
// Ease recognition of bbcode [quote] and
// [quote=whatever] start tags
replace(/\[quote(?:=[^[\]]*)?]/ig, '\x02$&').
// Ease recognition of bbcode [/quote] end tags
replace(/\[\/quote]/ig, '$&\x03');
// Repeatedly strip non-nested quoted blocks until
// no more remain; this removes nested blocks from
// the innermost outward. Leave markers in places
// where blocks were removed.
do {
text = edited;
edited = text.replace(/\x02[^\x02\x03]*\x03/g, '\x04');
} while (edited != text);
return text.
// Remove any leftover unbalanced quoted text,
// treating places where blocks were removed
// as if they were the missing end tags
replace(/\x02[^\x04]*\x04/g, '').
// Remove leftover recognition helpers
replace(/[\x01-\x04]/g, '')
}
function failures(func, testcases, failed, initial = '') {
return testcases.reduce(
function (result, t) {
var testname = t[0];
var input = t[1];
var expected = t[2];
var actual = func(input);
if (actual !== expected) {
result += failed(testname, input, expected, actual);
}
return result;
},
initial
);
}
document.getElementById('test').innerHTML = failures(sanitize,
[
['Full quote empty', '[quote="accalia, post:108, topic:49440, full:true"][/quote]', ''],
['Full quote with text', '[quote="accalia, post:108, topic:49440, full:true"]\nthis is ' +
'a quote[/quote]', ''
],
['Full quote with multiline text', '[quote="accalia, post:108, topic:49440, full:true"]\r\n' +
'this is a quote\r\n\r\n[/quote]', ''
],
['Partial quote empty', '[quote="accalia, post:108, topic:49440"][/quote]', ''],
['Partial quote with text', '[quote="accalia, post:108, topic:49440"]this is a quote[/quote]', ''],
['Partial quote with multiline text', '[quote="accalia, post:108, topic:49440"]\n' +
'this is a quote [/quote]', ''
],
['Topic quote with multiline text', '[quote="accalia, post:108"]\nthis is a quote\n\r\n[/quote]', ''],
['Topic quote empty', '[quote="accalia, post:108"][/quote]', ''],
['Username quote empty', '[quote="accalia"][/quote]', ''],
['Username quote with text', '[quote="accalia"]this is a quote\r\n\r\n[/quote]', ''],
['Unquoted username quote empty', '[quote=accalia][/quote]', ''],
['Unquoted username quote', '[quote=accalia]this is a quote[/quote]', ''],
['Bare quote empty', '[quote][/quote]', ''],
['Bare quote', '[quote]\n\nthis is a quote[/quote]', ''],
['Only open quote', '[quote="accalia, post:108, topic:49440, full:true"]',
'[quote="accalia, post:108, topic:49440, full:true"]'
],
['Malformed open quote', '[quote="accalia, post:108, topic:49440, full:true"this is a quote[/quote]',
'[quote="accalia, post:108, topic:49440, full:true"this is a quote[/quote]'
],
['Malformed close quote', '[quote="accalia, post:108, topic:49440, full:true"]this is a quote/quote]',
'[quote="accalia, post:108, topic:49440, full:true"]this is a quote/quote]'
],
['Malformed close quote 2', '[quote="accalia, post:108, topic:49440"][/quote',
'[quote="accalia, post:108, topic:49440"][/quote'
],
['Malformed open quote 2', '[quote="accalia, post:108, topic:49440"]this is a quote [quote]',
'[quote="accalia, post:108, topic:49440"]this is a quote [quote]'
],
['Only close quote', '[/quote]', '[/quote]'],
['Malformed open quote 2', '[quoteaccalia, post:108"][/quote]', '[quoteaccalia, post:108"][/quote]'],
['Only a quote', '[quote=a]i am stripped[/quote]', ''],
['Embedded quote', 'this is\n[quote=accalia][/quote] \r\na quote', 'this is\n \na quote'],
['Nested quote simple', 'this[quote]nope[quote]no[/quote]nada[/quote] survives', 'this survives'],
['Unmatched quote block', 'i am not a [quote]', 'i am not a [quote]'],
['Unmatched quote in valid quote', 'this[quote]nope[quote]no[quote]nada[/quote] survives',
'this survives'
],
['Multiple quotes', '[quote="accalia, post:108, topic:49440, full:true"]\nthis is a quote[/quote] ' +
'inner words [quote="accalia, post:108, topic:49440, full:true"]\nthis is another quote[/quote]',
' inner words '
],
['Multiple quotes 2', 'before words\n[quote="accalia, post:108, topic:49440, full:true"]\nthis is a ' +
'quote[/quote] inner words [quote="accalia, post:108, topic:49440, full:true"]\nthis is another ' +
'quote[/quote]\nafter words',
'before words\n inner words \nafter words'
],
['Only a GFM with embedded backticks', '```\n``\n```', ''],
['Empty Line in a GFM block', '```\n\n```', ''],
['Empty GFM block', '```\n```', ''],
['GFM with space for typehint', '``` \nfoo();\n```', ''],
['Only a GFM code block', '```\ncode\n```', ''],
['GFM with text before', 'before\n```\ncode\n```', 'before\n'],
['GFM with text before 2', 'before2\n```\ncode\n```\n', 'before2\n'],
['GFM with text between', '```\ncode\n```\nbetween\n```\ncode\n```', 'between\n'],
['GFM with text between 2', '```\ncode\n```\nbetween2\n```\ncode\n```\n', 'between2\n'],
['GFM with text after', '```\ncode\n```\nafter', 'after'],
['Only a type hinted GFM', '```type\ncode\n```', ''],
['GFM with unpaired tripletic', '```\ncode\n```\nnotcode\n```\n', 'notcode\n```\n'],
['GFM with before/after text', 'before\n```\n```\nafter', 'before\nafter'],
['Multiple GFM blocks', 'before\n```\n```\nmiddle\n```\nafter', 'before\nmiddle\n```\nafter'],
['Multiple GFM blocks 2', 'before\n```\n1\n```\nmiddle\n```\n2\n```\nafter', 'before\nmiddle\nafter'],
['GFM with trailing text', '```\n```test', '```\n```test'],
['GFM wit trailing space on close', '```\n``` ', '```\n``` '],
['GFM without line breaks', '```foo```', '```foo```'],
['GFM missing with close not on new line', '```\ntest();```', '```\ntest();```'],
['GFM with space before open', ' ```\ntest();\n```', ' ```\ntest();\n```'],
['Inline code block', 'this `is` code', 'this `is` code'],
['inline code block in quote', 'a[quote]`code`[/quote]b', 'ab'],
['Inline code with quote inside', '`[quote]a[/quote]`', '``'],
['Not an inline code with quote "inside"', '`\n[quote][/quote]\n`', '`\n\n`'],
['Newline Normalization - windows single', 'this is normal\r\n', 'this is normal\n'],
['Newline Normalization - windows multiple', 'this is normal\r\n\r\n', 'this is normal\n\n'],
['Newline Normalization - mac', 'this\ris normal\r', 'this\nis normal\n'],
['Newline normalization - mixed', 'this\ris\nnormal\r\n', 'this\nis\nnormal\n'],
['Newline Normalization - windows multiple 2', 'this\r\nis normal\r\n', 'this\nis normal\n'],
['One backtick around code', 'One backtick around `code`', 'One backtick around `code`'],
['Bare quote in inline code', '`[quote][/quote]`', '`[quote][/quote]`'],
['Bare quote with backtic', '`[quote][/quote]', ''],
['"inline" code on multiline', 'a `code\ncode` b', 'a `code\ncode` b'],
['multiline inline code', '`\ncode\n`', '`\ncode\n`'],
['multiline with aftertext', '`name\ncode`\ntext', '`name\ncode`\ntext'],
['double backtick inline code', '``inline code``', '``inline code``'],
['double backtick with quote', '``[quote][/quote]``', '``[quote][/quote]``'],
['inline code before quote', '``code``[quote][/quote]``', '``code``'],
['double backtick with embedded singletick', ' ``code with ` embedded``', ' ``code with ` embedded``'],
['double backtick with embedded singletick', ' `` ` [quote][/quote] ``', ' `` ` ``'], //may need newline
['multiline double backtick', 'before ``code\ncode2`` after', 'before ``code\ncode2`` after'],
['multiline double backtick with quote', 'before ``[quote]\n[/quote]`` after',
'before ``\n`` after' //may need newline
],
['multiline double backtick 2', 'before ``\ncode\ncode2\n`` after', 'before ``\ncode\ncode2\n`` after'],
['multiline double backtick with quote 2', 'before ``\n[quote]\n[/quote]\n`` after',
'before ``\n\n`` after' //may need newline
],
['multiline double backtick 3', 'before ``javascript\ncode\ncode2\n`` after',
'before ``javascript\ncode\ncode2\n`` after'
],
['multiline double backtick with quote 2', 'before ``javascript\n[quote]\n[/quote]\n`` after',
'before ``javascript\n\n`` after' //may need newline
],
['inline triple backtick', '```code```', '```code```'],
['inline triple backtick with quote', '```[quote][/quote]```', '``````'], //may need newline
['inline triple backtick with singletick', '```code with ` embedded```', '```code with ` embedded```'],
['inline triple backtick with doubletick', '```code with `` embedded```', '```code with `` embedded```'],
['inline triple backtick with linebreak', '```code with\nlinebreak```', '```code with\nlinebreak```'],
['inline triple backtick with singletick and quote', '```code with ` [quote][/quote]embedded```',
'```code with ` embedded```'
],
['inline triple backtick with doubletick and quote', '```code with `` [quote][/quote]embedded```',
'```code with `` embedded```'
],
['inline triple backtick with linebreak and quote', '```code with\n[quote][/quote]linebreak```',
'```code with\nlinebreak```'
],
['tripletick with embedded tripletick', '```\ncode```\n```', ''],
['language hinted tripletick with embedded tripletick', '```ruby\ncode```\n```', ''],
['quadruple tick', '````code````', '````code````'],
['quadruple tick with singletick', '````code ` code2````', '````code ` code2````'],
['quadruple tick with doubletick', '````code `` code2````', '````code `` code2````'],
['quadruple tick with tripletick', '````code ``` code2````', '````code ``` code2````'],
['quadruple tick with newline', '````code\ncode2````', '````code\ncode2````'],
['quadruple tick with quote', '````code````', '````code````'],
['quadruple tick with singletick and quote', '````code ` [quote][/quote]code2````',
'````code ` code2````'
],
['quadruple tick with doubletick and quote', '````code `` [quote][/quote]code2````',
'````code `` code2````'
],
['quadruple tick with tripletick and quote', '````code ``` [quote][/quote]code2````',
'````code ``` code2````'
],
['quadruple tick with newline and quote', '````code\n[quote][/quote]code2````',
'````code\ncode2````'
]
],
(testname, input, expected, instead) =>
'<h2>' + testname + '</h2><br>' +
'<h3>Supplied:</h3><pre>' + input + '</pre><br>' +
'<h3>Expected:</h3><pre>' + expected + '</pre><br>' +
'<h3>Got instead:</h3><pre>' + instead + '</pre><br>'
) || '<h2>Passed</h2>';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment