Skip to content

Instantly share code, notes, and snippets.

@Rudokhvist
Created April 8, 2018 18:45
Show Gist options
  • Save Rudokhvist/c394ec117daae62ea7effbe53f9b9cb9 to your computer and use it in GitHub Desktop.
Save Rudokhvist/c394ec117daae62ea7effbe53f9b9cb9 to your computer and use it in GitHub Desktop.
AntiZalgo
// ==UserScript==
// @name Anti-Zalgo (Complex) NRGsoft
// @namespace https://github.com/Goodlookinguy/pwvnrg/raw/master/anti-zalgo/antizalgo-complex.js
// @version 1.0b
// @description Strip certain ranges of text that can be used to create Zalgo text from forums
// @author NRGLG
// @run-at document-end
// @include http://*/*
// @include https://*/*
// @grant none
// ==/UserScript==
(function()
{
// more specific filters
// following the Unicode categorization
// this list is incomplete at the moment
var blockedRanges = [
////////////////////////////////////////////////
// European Scripts
[0xFB1E, 0xFB1E], // Alphabetic Presentation Forms
[0x0483, 0x0489], // Cyrillic
[0x2DE0, 0x2DFF], // Cyrillic Extended-A
[0xA66F, 0xA672], // Cyrillic Extended-B
[0xA674, 0xA67D], // Cyrillic Extended-B
[0xA69E, 0xA69F], // Cyrillic Extended-B
[0x10376, 0x1037A], // Old Permic
[0x101FD, 0x101FD], // Phaistos Disc
////////////////////////////////////////////////
// Combining Marks
[0x0300, 0x036F], // Combining Diacritical Marks
[0x1AB0, 0x1ABE], // Combining Diacritical Marks Extended
[0x1DC0, 0x1DFF], // Combining Diacritical Marks Supplement
[0x20D0, 0x20F0], // Combining Diacritical Marks for Symbols
[0xFE20, 0xFE2F], // Combining Half Marks
////////////////////////////////////////////////
// African Scripts
[0xA6F0, 0xA6F1], // Bamum
[0x16AF0, 0x16AF4], // Bassa Vah
[0x2CEF, 0x2CF1], // Coptic
[0x102E0, 0x102E0], // Coptic Epact Numbers
[0x1E8D0, 0x1E8D6], // Mende Kikakui
[0x07EB, 0x07F3], // N'Ko
[0x2D7F, 0x2D7F], // Tifinagh
////////////////////////////////////////////////
// Middle Eastern Scripts
[0x0610, 0x061A], // Arabic
[0x064B, 0x065F], // Arabic
[0x06D6, 0x06DC], // Arabic
[0x06DF, 0x06E4], // Arabic
[0x06E7, 0x06E8], // Arabic
[0x06EA, 0x06ED], // Arabic
[0x08E4, 0x08FF], // Arabic Extended-A
[0x0591, 0x05BD], // Hebrew
[0x05BF, 0x05BF], // Hebrew
[0x05C1, 0x05C2], // Hebrew
[0x05C4, 0x05C5], // Hebrew
[0x05C7, 0x05C7], // Hebrew
[0x0859, 0x085B], // Mandaic
[0x0816, 0x0819], // Samaritan
[0x081B, 0x0823], // Samaritan
[0x0825, 0x0827], // Samaritan
[0x0829, 0x082D], // Samaritan
[0x0711, 0x0711], // Syriac
[0x0730, 0x074A], // Syriac
////////////////////////////////////////////////
// Central Asian Scripts
[0x10AE5, 0x10AE6], // Manichaean
[0x18A9, 0x18A9], // Mongolian
[0x0F00, 0x0FDA], // Tibetan (I'm going to block this whole language)
////////////////////////////////////////////////
// South Asian Scripts
[0x1171D, 0x1172B], // Ahom
[0x0980, 0x0983], // Bengali
[0x09BC, 0x09BC], // Bengali
[0x09BE, 0x09C4], // Bengali
[0x09C7, 0x09C8], // Bengali
[0x09CB, 0x09CE], // Bengali
[0x09D7, 0x09D7], // Bengali
[0x09E2, 0x09E3], // Bengali
[0x11000, 0x11002], // Brahmi
[0x11038, 0x11046], // Brahmi
[0x11100, 0x11102], // Chakma
[0x11127, 0x11134], // Chakma
[0x0900, 0x0903], // Devanagari
[0x093A, 0x094F], // Devanagari
[0x0951, 0x0957], // Devanagari
[0x0962, 0x0963], // Devanagari
[0xA8E0, 0xA8F0], // Devanagari Extended
[0x11300, 0x11303], // Grantha
[0x1133C, 0x1133C], // Grantha
[0x1133E, 0x1134D], // Grantha
[0x11357, 0x11357], // Grantha
[0x11362, 0x11374], // Grantha
[0x0A81, 0x0A83], // Gujarati
[0x0ABC, 0x0ABC], // Gujarati
[0x0ABE, 0x0ACD], // Gujarati
[0x0AE2, 0x0AE3], // Gujarati
[0x0A01, 0x0A03], // Gurmukhi
[0x0A3C, 0x0A51], // Gurmukhi
[0x0A70, 0x0A71], // Gurmukhi
[0x0A75, 0x0A75], // Gurmukhi
[0x11080, 0x11082], // Kaithi
[0x110B0, 0x110BA], // Kaithi
[0x0C81, 0x0C83], // Kannada
[0x0CBC, 0x0CBC], // Kannada
[0x0CBE, 0x0CD6], // Kannada
[0x0CE2, 0x0CE3], // Kannada
// assorted
[0x1920, 0x193B], // Limbu
[0x0D01, 0x0D03], // Malayalam
[0x0D3E, 0x0D57], // Malayalam
[0x0D62, 0x0D63], // Malayalam
[0x0D82, 0x0D83], // Sinhala
[0x0DCA, 0x0DDF], // Sinhala
[0x0DF2, 0x0DF3], // Sinhala
[0x0C00, 0x0C03], // Telugu
[0x0C3E, 0x0C56], // Telugu
[0x0C62, 0x0C63], // Telugu
[0x1CD0, 0x1CD2], // Vedic Extensions
[0x1CD4, 0x1CE8], // Vedic Extensions
[0x1CED, 0x1CED], // Vedic Extensions
[0x1CF2, 0x1CF4], // Vedic Extensions
[0x1CF8, 0x1CF9], // Vedic Extensions
////////////////////////////////////////////////
// Southeast Asian Scripts
[0x0E31, 0x0E31], // Thai
[0x0E34, 0x0E3A], // Thai
[0x0E47, 0x0E4F] // Thai
////////////////////////////////////////////////
// Indonesia & Oceania Scripts
////////////////////////////////////////////////
// East Asian Scripts
////////////////////////////////////////////////
// American Scripts
////////////////////////////////////////////////
// Other
];
// even out the arrays with this since nulls wouldn't show up anyways
if ((blockedRanges.length % 2) == 1)
blockedRanges.unshift([0, 0]);
blockedRanges.sort(function(a, b)
{
if (a[0] < b[0]) return -1;
if (a[0] > b[0]) return 1;
return 0;
});
jQuery('body').find('*').contents().filter(function () { return this.nodeType === 3 && jQuery.trim(this.nodeValue) !== ''; }).each(function(index,value)
{
var text = value.textContent;
var finalText = '';
var dirty = false;
for (var i = 0; i < text.length; i++)
{
var charCode = text.charCodeAt(i);
// If in ASCII and ASCII extended limit, don't bother
// this will drastically decrease processing time
if (charCode >= 256)
{
var filterIndex = (blockedRanges.length / 2) | 0;
var size = (blockedRanges.length / 4) | 0;
var lastSize = size;
while (true)
{
if (blockedRanges[filterIndex][1] < charCode)
{
filterIndex = (filterIndex + size) | 0;
size = Math.ceil(size / 2) | 0;
}
else if (blockedRanges[filterIndex][0] > charCode)
{
filterIndex = (filterIndex - size) | 0;
size = Math.ceil(size / 2) | 0;
}
else
{
break;
}
if (lastSize == size) break;
lastSize = size;
}
var filterA = blockedRanges[Math.max(filterIndex - 1, 0)];
var filterB = blockedRanges[filterIndex];
if ((charCode >= filterB[0] && charCode <= filterB[1]) ||
(charCode >= filterA[0] && charCode <= filterA[1]))
{
charCode = 0;
if (!dirty)
{
dirty = true;
finalText = text.substr(0, i);
}
}
}
if (dirty && charCode > 0)
finalText += text.charAt(i);
}
if (dirty)
value.textContent = finalText;
});
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment