Skip to content

Instantly share code, notes, and snippets.

@ElliottLandsborough
Last active August 29, 2019 10:38
Show Gist options
  • Save ElliottLandsborough/549c4654dabb232313c06aa748b9ae43 to your computer and use it in GitHub Desktop.
Save ElliottLandsborough/549c4654dabb232313c06aa748b9ae43 to your computer and use it in GitHub Desktop.
PHP 7.3 advanced bad word filter
<?php
/**
* The Scunthorpe problem
*
* PHP version 7.3
*
* @category LolPHP
* @package LolPHP
* @author Elliott Landsborough <elliott.landsborough@gmail.com>
* @license MIT https://opensource.org/licenses/MIT
* @link https://github.com/ElliottLandsborough
*/
namespace App;
/**
* CensorEngine Class
*
* So I used to work for an agency maintaining a website
* with a target audience of high average age.
*
* Certain pages had comment forms for user discussions etc.
*
* Before realising that every comment would have to be moderated
* before being published, this solution was implemented.
*
* More info about the Scunthorpe problem:
* https://en.wikipedia.org/wiki/Scunthorpe_problem
*
* According to the commit logs, the first iteration of this was
* committed on Mon May 19 14:19:32 2014.
*
* It is still in the codebase as of now (2019-08-29)
*
* @category LolPHP
* @package LolPHP
* @author Elliott Landsborough <elliott.landsborough@gmail.com>
* @license MIT https://opensource.org/licenses/MIT
* @link https://github.com/ElliottLandsborough
*/
class CensorEngine
{
/**
* An array of all the bad words we are looking for
*
* @var array
*/
protected $badwords = [];
/**
* Constructor
*/
public function __construct()
{
$this->addWords();
}
/**
* RandCensor Function
*
* @param string $chars I don't know
* @param int $len I don't know
*
* @return string
*/
protected function randCensor($chars, $len): ?string
{
mt_srand(); // useful for < PHP4.2
$lastChar = strlen($chars) - 1;
$randOld = -1;
$out = '';
// create $len chars
for ($i = $len; $i > 0; $i--) {
// generate random char - it must be different from previously generated
while (($randNew = mt_rand(0, $lastChar)) === $randOld) {
}
$randOld = $randNew;
$out .= $chars[$randNew];
}
return $out;
}
/**
* CensorString replaces bad words in a string with a specified character
*
* @param string $string The string to be censored
* @param string $censorChar The character to censor with
*
* @return array
*/
public function censorString($string, $censorChar = '*'): ?array
{
$badwords = $this->badwords;
$leet_replace = [];
$leet_replace['a'] = '(a|a\.|a\-|4|@|Á|á|À|Â|à|Â|â|Ä|ä|Ã|ã|Å|å|α|Δ|Λ|λ)';
$leet_replace['b'] = '(b|b\.|b\-|8|\|3|ß|Β|β)';
$leet_replace['c'] = '(c|c\.|c\-|Ç|ç|¢|€|<|\(|{|©)';
$leet_replace['d'] = '(d|d\.|d\-|&part;|\|\)|Þ|þ|Ð|ð)';
$leet_replace['e'] = '(e|e\.|e\-|3|€|È|è|É|é|Ê|ê|∑)';
$leet_replace['f'] = '(f|f\.|f\-|ƒ)';
$leet_replace['g'] = '(g|g\.|g\-|6|9)';
$leet_replace['h'] = '(h|h\.|h\-|Η)';
$leet_replace['i'] = '(i|i\.|i\-|!|\||\]\[|]|1|∫|Ì|Í|Î|Ï|ì|í|î|ï)';
$leet_replace['j'] = '(j|j\.|j\-)';
$leet_replace['k'] = '(k|k\.|k\-|Κ|κ)';
$leet_replace['l'] = '(l|1\.|l\-|!|\||\]\[|]|£|∫|Ì|Í|Î|Ï)';
$leet_replace['m'] = '(m|m\.|m\-)';
$leet_replace['n'] = '(n|n\.|n\-|η|Ν|Π)';
$leet_replace['o'] = '(o|o\.|o\-|0|Ο|ο|Φ|¤|°|ø)';
$leet_replace['p'] = '(p|p\.|p\-|ρ|Ρ|¶|þ)';
$leet_replace['q'] = '(q|q\.|q\-)';
$leet_replace['r'] = '(r|r\.|r\-|®)';
$leet_replace['s'] = '(s|s\.|s\-|5|\$|§)';
$leet_replace['t'] = '(t|t\.|t\-|Τ|τ)';
$leet_replace['u'] = '(u|u\.|u\-|υ|µ)';
$leet_replace['v'] = '(v|v\.|v\-|υ|ν)';
$leet_replace['w'] = '(w|w\.|w\-|ω|ψ|Ψ)';
$leet_replace['x'] = '(x|x\.|x\-|Χ|χ)';
$leet_replace['y'] = '(y|y\.|y\-|¥|γ|ÿ|ý|Ÿ|Ý)';
$leet_replace['z'] = '(z|z\.|z\-|Ζ)';
$words = explode(" ", $string);
// is $censorChar a single char?
$isOneChar = (strlen($censorChar) === 1);
for ($x = 0; $x < count($badwords); $x++) {
$replacement[$x] = $isOneChar
? str_repeat($censorChar, strlen($badwords[$x]))
: $this->randCensor($censorChar, strlen($badwords[$x]));
$badwords[$x] = '/' . str_ireplace(
array_keys($leet_replace),
array_values($leet_replace),
$badwords[$x]
) . '/i';
}
$newstring = [];
$newstring['orig'] = html_entity_decode($string);
$newstring['clean'] = preg_replace(
$badwords,
$replacement,
$newstring['orig']
);
return $newstring;
}
/**
* Adds bad words to class variable
*
* @return void
*/
protected function addWords(): void
{
array_push(
$this->badwords,
'assfuck',
'assfucker',
'asshat',
'asshole',
'assholes',
'asshore',
'assjockey',
'asskiss',
'asskisser',
'assklown',
'asslick',
'asslicker',
'asslover',
'assmonkey',
'assmunch',
'assmuncher',
'asspacker',
'asspirate',
'asspuppies',
'assranger',
'asswhore',
'asswipe',
'arsehole',
'ballsack',
'bastard ',
'bitch',
'blowjob',
'bollock',
'bugger',
'bullshit',
'clitoris',
'cockhead',
'cockknob',
'cocklicker',
'cocklover',
'cocknob',
'cockqueen',
'cockrider',
'cocksman',
'cocksmith',
'cocksmoker',
'cocksucer',
'cocksuck',
'cocksucked',
'cocksucker',
'cocksucking',
'crackwhore',
'crack-whore',
'cunt',
'dickbrain',
'dickforbrains',
'dickhead',
'dickless',
'dicklick',
'dicklicker',
'ejaculate',
'ejaculated',
'ejaculating ',
'ejaculation',
'fagging',
'faggot',
'fagot',
'felcher',
'felching',
'fellatio',
'fetish',
'fuck',
'fucck',
'fudgepacker',
'genital',
'gypo',
'gypp',
'handjob',
'hymen',
'limpdick',
'mastabate',
'masterbate',
'molest',
'mothafuck',
'mothafucka',
'mothafuckaz',
'mothafucked',
'mothafucker',
'mothafuckin',
'mothafucking',
'mothafuckings',
'motherfuck',
'motherfucked',
'motherfucker',
'motherfuckin',
'motherfucking',
'motherfuckings',
'motherlovebone',
'muffdive',
'muffdiver',
'muffindiver',
'mufflikcer',
'nigga',
'orgasm',
'penis',
'pornography',
'pussie',
'pussyeater',
'pussyfucker',
'pussylicker',
'pussylips',
'pussylover',
'pussypounder',
'raghead',
'retard',
'retarded',
'shagging',
'shit',
'skank',
'slut',
'slutty',
'slutwear',
'slutwhore',
'testicle',
'vulva'
);
}
}
$censorEngine = new CensorEngine();
print_r($censorEngine->censorString('You are a whoreish asspirate.'));
/**
* Expected output:
*
* Array
* (
* [orig] => You are a whoreish asspirate.
* [clean] => You are a whoreish *********.
* )
*
* The original wordlist from 19/05/2014:
*
* 'analplug', 'analsex', 'arse', 'assassin','balls', 'bimbo', 'bloody',
* 'bloodyhell', 'blowjob', 'bollocks', 'boner', 'boobies', 'boobs',
* 'bugger', 'bukkake', 'bullshit', 'chink', 'clit', 'clitoris',
* 'cocksucker', 'condom', 'coon', 'crap', 'cumshot', 'damm', 'dammit',
* 'damn', 'dickhead', 'doggystyle', 'f0ck', 'fags', 'fanny', 'fck',
* 'fcker', 'fckr', 'fcku', 'fcuk', 'fucker', 'fuckface', 'fuckr',
* 'fuct', 'genital', 'genitalia', 'genitals', 'glory hole', 'gloryhole',
* 'gobshite', 'godammet', 'godammit', 'goddammet', 'goddammit',
* 'goddamn', 'gypo', 'hitler', 'hooker', 'hore', 'horny', 'jesussucks',
* 'jizzum', 'kaffir', 'kill', 'killer', 'killin', 'killing', 'lesbo',
* 'masturbate', 'milf', 'molest', 'moron', 'motherfuck', 'mthrfckr',
* 'murder', 'murderer', 'nazi', 'negro', 'nigga', 'niggah', 'nonce',
* 'paedo', 'paedophile', 'paki', 'pecker', 'pedo', 'pedofile',
* 'pedophile', 'phuk', 'pig', 'pimp', 'poof', 'porn', 'prick', 'pron',
* 'prostitute', 'raped', 'rapes', 'rapist', 'schlong', 'screw',
* 'scrotum', 'shag', 'shemale', 'shite', 'shiz', 'slag', 'spastic',
* 'spaz', 'sperm', 'spunk', 'stripper', 'tart', 'terrorist', 'tits',
* 'tittyfuck', 'tosser', 'turd', 'vaginal', 'vibrator', 'wanker',
* 'weed', 'wetback', 'whor', 'whore', 'wog', 'wtf', 'xxx' 'anal',
* 'anus', 'ass', 'bastard', 'bitch', 'boob', 'cock', 'cum', 'cunt',
* 'dick', 'dildo', 'dyke', 'fag', 'faggot', 'fuck', 'fuk', 'handjob',
* 'homo', 'jizz', 'kike', 'kunt', 'muff', 'nigger', 'penis', 'piss',
* 'poop', 'pussy', 'queer', 'rape', 'semen', 'sex', 'shit', 'slut',
* 'titties', 'twat', 'vagina', 'vulva', 'wank'
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment