Created
April 8, 2018 18:45
-
-
Save Rudokhvist/c394ec117daae62ea7effbe53f9b9cb9 to your computer and use it in GitHub Desktop.
AntiZalgo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name Anti-Zalgo (Complex) NRGsoft | |
// @namespace https://github.com/Goodlookinguy/pwvnrg/raw/master/anti-zalgo/antizalgo-complex.js | |
// @version 1.0b | |
// @description Strip certain ranges of text that can be used to create Zalgo text from forums | |
// @author NRGLG | |
// @run-at document-end | |
// @include http://*/* | |
// @include https://*/* | |
// @grant none | |
// ==/UserScript== | |
(function() | |
{ | |
// more specific filters | |
// following the Unicode categorization | |
// this list is incomplete at the moment | |
var blockedRanges = [ | |
//////////////////////////////////////////////// | |
// European Scripts | |
[0xFB1E, 0xFB1E], // Alphabetic Presentation Forms | |
[0x0483, 0x0489], // Cyrillic | |
[0x2DE0, 0x2DFF], // Cyrillic Extended-A | |
[0xA66F, 0xA672], // Cyrillic Extended-B | |
[0xA674, 0xA67D], // Cyrillic Extended-B | |
[0xA69E, 0xA69F], // Cyrillic Extended-B | |
[0x10376, 0x1037A], // Old Permic | |
[0x101FD, 0x101FD], // Phaistos Disc | |
//////////////////////////////////////////////// | |
// Combining Marks | |
[0x0300, 0x036F], // Combining Diacritical Marks | |
[0x1AB0, 0x1ABE], // Combining Diacritical Marks Extended | |
[0x1DC0, 0x1DFF], // Combining Diacritical Marks Supplement | |
[0x20D0, 0x20F0], // Combining Diacritical Marks for Symbols | |
[0xFE20, 0xFE2F], // Combining Half Marks | |
//////////////////////////////////////////////// | |
// African Scripts | |
[0xA6F0, 0xA6F1], // Bamum | |
[0x16AF0, 0x16AF4], // Bassa Vah | |
[0x2CEF, 0x2CF1], // Coptic | |
[0x102E0, 0x102E0], // Coptic Epact Numbers | |
[0x1E8D0, 0x1E8D6], // Mende Kikakui | |
[0x07EB, 0x07F3], // N'Ko | |
[0x2D7F, 0x2D7F], // Tifinagh | |
//////////////////////////////////////////////// | |
// Middle Eastern Scripts | |
[0x0610, 0x061A], // Arabic | |
[0x064B, 0x065F], // Arabic | |
[0x06D6, 0x06DC], // Arabic | |
[0x06DF, 0x06E4], // Arabic | |
[0x06E7, 0x06E8], // Arabic | |
[0x06EA, 0x06ED], // Arabic | |
[0x08E4, 0x08FF], // Arabic Extended-A | |
[0x0591, 0x05BD], // Hebrew | |
[0x05BF, 0x05BF], // Hebrew | |
[0x05C1, 0x05C2], // Hebrew | |
[0x05C4, 0x05C5], // Hebrew | |
[0x05C7, 0x05C7], // Hebrew | |
[0x0859, 0x085B], // Mandaic | |
[0x0816, 0x0819], // Samaritan | |
[0x081B, 0x0823], // Samaritan | |
[0x0825, 0x0827], // Samaritan | |
[0x0829, 0x082D], // Samaritan | |
[0x0711, 0x0711], // Syriac | |
[0x0730, 0x074A], // Syriac | |
//////////////////////////////////////////////// | |
// Central Asian Scripts | |
[0x10AE5, 0x10AE6], // Manichaean | |
[0x18A9, 0x18A9], // Mongolian | |
[0x0F00, 0x0FDA], // Tibetan (I'm going to block this whole language) | |
//////////////////////////////////////////////// | |
// South Asian Scripts | |
[0x1171D, 0x1172B], // Ahom | |
[0x0980, 0x0983], // Bengali | |
[0x09BC, 0x09BC], // Bengali | |
[0x09BE, 0x09C4], // Bengali | |
[0x09C7, 0x09C8], // Bengali | |
[0x09CB, 0x09CE], // Bengali | |
[0x09D7, 0x09D7], // Bengali | |
[0x09E2, 0x09E3], // Bengali | |
[0x11000, 0x11002], // Brahmi | |
[0x11038, 0x11046], // Brahmi | |
[0x11100, 0x11102], // Chakma | |
[0x11127, 0x11134], // Chakma | |
[0x0900, 0x0903], // Devanagari | |
[0x093A, 0x094F], // Devanagari | |
[0x0951, 0x0957], // Devanagari | |
[0x0962, 0x0963], // Devanagari | |
[0xA8E0, 0xA8F0], // Devanagari Extended | |
[0x11300, 0x11303], // Grantha | |
[0x1133C, 0x1133C], // Grantha | |
[0x1133E, 0x1134D], // Grantha | |
[0x11357, 0x11357], // Grantha | |
[0x11362, 0x11374], // Grantha | |
[0x0A81, 0x0A83], // Gujarati | |
[0x0ABC, 0x0ABC], // Gujarati | |
[0x0ABE, 0x0ACD], // Gujarati | |
[0x0AE2, 0x0AE3], // Gujarati | |
[0x0A01, 0x0A03], // Gurmukhi | |
[0x0A3C, 0x0A51], // Gurmukhi | |
[0x0A70, 0x0A71], // Gurmukhi | |
[0x0A75, 0x0A75], // Gurmukhi | |
[0x11080, 0x11082], // Kaithi | |
[0x110B0, 0x110BA], // Kaithi | |
[0x0C81, 0x0C83], // Kannada | |
[0x0CBC, 0x0CBC], // Kannada | |
[0x0CBE, 0x0CD6], // Kannada | |
[0x0CE2, 0x0CE3], // Kannada | |
// assorted | |
[0x1920, 0x193B], // Limbu | |
[0x0D01, 0x0D03], // Malayalam | |
[0x0D3E, 0x0D57], // Malayalam | |
[0x0D62, 0x0D63], // Malayalam | |
[0x0D82, 0x0D83], // Sinhala | |
[0x0DCA, 0x0DDF], // Sinhala | |
[0x0DF2, 0x0DF3], // Sinhala | |
[0x0C00, 0x0C03], // Telugu | |
[0x0C3E, 0x0C56], // Telugu | |
[0x0C62, 0x0C63], // Telugu | |
[0x1CD0, 0x1CD2], // Vedic Extensions | |
[0x1CD4, 0x1CE8], // Vedic Extensions | |
[0x1CED, 0x1CED], // Vedic Extensions | |
[0x1CF2, 0x1CF4], // Vedic Extensions | |
[0x1CF8, 0x1CF9], // Vedic Extensions | |
//////////////////////////////////////////////// | |
// Southeast Asian Scripts | |
[0x0E31, 0x0E31], // Thai | |
[0x0E34, 0x0E3A], // Thai | |
[0x0E47, 0x0E4F] // Thai | |
//////////////////////////////////////////////// | |
// Indonesia & Oceania Scripts | |
//////////////////////////////////////////////// | |
// East Asian Scripts | |
//////////////////////////////////////////////// | |
// American Scripts | |
//////////////////////////////////////////////// | |
// Other | |
]; | |
// even out the arrays with this since nulls wouldn't show up anyways | |
if ((blockedRanges.length % 2) == 1) | |
blockedRanges.unshift([0, 0]); | |
blockedRanges.sort(function(a, b) | |
{ | |
if (a[0] < b[0]) return -1; | |
if (a[0] > b[0]) return 1; | |
return 0; | |
}); | |
jQuery('body').find('*').contents().filter(function () { return this.nodeType === 3 && jQuery.trim(this.nodeValue) !== ''; }).each(function(index,value) | |
{ | |
var text = value.textContent; | |
var finalText = ''; | |
var dirty = false; | |
for (var i = 0; i < text.length; i++) | |
{ | |
var charCode = text.charCodeAt(i); | |
// If in ASCII and ASCII extended limit, don't bother | |
// this will drastically decrease processing time | |
if (charCode >= 256) | |
{ | |
var filterIndex = (blockedRanges.length / 2) | 0; | |
var size = (blockedRanges.length / 4) | 0; | |
var lastSize = size; | |
while (true) | |
{ | |
if (blockedRanges[filterIndex][1] < charCode) | |
{ | |
filterIndex = (filterIndex + size) | 0; | |
size = Math.ceil(size / 2) | 0; | |
} | |
else if (blockedRanges[filterIndex][0] > charCode) | |
{ | |
filterIndex = (filterIndex - size) | 0; | |
size = Math.ceil(size / 2) | 0; | |
} | |
else | |
{ | |
break; | |
} | |
if (lastSize == size) break; | |
lastSize = size; | |
} | |
var filterA = blockedRanges[Math.max(filterIndex - 1, 0)]; | |
var filterB = blockedRanges[filterIndex]; | |
if ((charCode >= filterB[0] && charCode <= filterB[1]) || | |
(charCode >= filterA[0] && charCode <= filterA[1])) | |
{ | |
charCode = 0; | |
if (!dirty) | |
{ | |
dirty = true; | |
finalText = text.substr(0, i); | |
} | |
} | |
} | |
if (dirty && charCode > 0) | |
finalText += text.charAt(i); | |
} | |
if (dirty) | |
value.textContent = finalText; | |
}); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment