Last active
August 29, 2015 14:19
-
-
Save arthurkushman/2c0e7c723d887e3613e5 to your computer and use it in GitHub Desktop.
Anit spam filter based on counters like n (messages) per t (period) k times + protection from repeated images/text content via md5 hash (byte in case of image) comparison, also as additional params passed through to protect accidential repetition of text messages and images
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* $userId - int | |
* $text - post, message etc. text - string | |
* $img - image - resource usually $_FILES['image']['tmp_name'] | |
* $counterLimit - limit of n messages in t period - int | |
* $countFails - limit of x failes after which user will be blocked - int | |
* $addParams - ex.: array('period_same_counter'=>3, 'period_same_time'=>15*60); - array | |
*/ | |
class AntiSpam { | |
// ANTI-SPAM CONSTS | |
const SPAM_TIME_LIMIT = 60, // NOT MORE THEN n SECONDS | |
SPAM_COUNTER_LIMIT = 10, // NOT MORE THEN n MESSAGES PER TIME_LIMIT ^ | |
FAILS_LIMIT = 10, // NOT MORE THEN n FAILS PER COUNTER_LIMIT ^ | |
SAME_MSGS_LIMIT = 200, // LIMIT TO BLOCK | |
TEXT_NOT_CHECKED = 45, // MIN SYMBOLS TO CHECK | |
SIMILARITY_THRESHOLD = 8, // AMOUNT OF SIMILAR BLOCKS PREV TO NEXT MSG | |
SIMILAR_BLOCKS_MSG = 10, // FIND THAT AMOUNT OF BLOCKS | |
INDEX_DIVISION = 10; // SEARCH INDEX DIVISION | |
public function isSpam($userId, $text, $img, $counterLimit = self::SPAM_COUNTER_LIMIT, | |
$countFails = true, $addParams = []) { | |
$row = []; | |
$row['same_counter'] = 0; | |
$row['salt'] = 0; | |
$sameForPeriod = false; | |
$addSql = ''; | |
// create only if not exists | |
$this->dbConn->exec("CREATE TABLE IF NOT EXISTS anti_spam(" | |
. "user_id INT UNSIGNED NOT NULL, " | |
. "text VARCHAR(128) NOT NULL, " | |
. "time_start INT UNSIGNED NOT NULL, " | |
. "counter INT UNSIGNED NOT NULL, " | |
. "failed MEDIUMINT UNSIGNED NOT NULL DEFAULT 0, " | |
. "INDEX BTREE(user_id))ENGINE=MEMORY CHARSET utf8 COLLATE utf8_general_ci"); | |
$stmt = $this->dbConn->prepare("SELECT time_start, text, counter, failed, salt, salt_img, " | |
. "same_counter, period_same_counter, period_time_start " | |
. "FROM anti_spam " | |
. "WHERE user_id=:user_id", array(PDO::ATTR_CURSOR => PDO::CURSOR_FWDONLY)); | |
$stmt->execute(array(':user_id' => $userId)); | |
$row = $stmt->fetch(PDO::FETCH_ASSOC); | |
$now = time(); | |
$period = $now - self::SPAM_TIME_LIMIT; | |
$saltText = md5(str_replace(' ', '', $text)); | |
$image = ''; | |
$saltImg = uniqid(); | |
if (is_file($img)) { | |
$image = $img; | |
$saltImg = md5_file($image); | |
} | |
// echo $saltImg.' '.$image; | |
$text = trim($text); | |
// print_r($row); | |
if (empty($row)) { | |
// insert if hasn`t been written/added yet | |
$this->dbConn->exec("INSERT INTO anti_spam SET user_id=$userId, text='$text', time_start=$now, counter=0, salt='$saltText', " | |
. "same_counter=0, period_same_counter=0, period_time_start=$now"); | |
$row['time_start'] = $now; | |
$row['counter'] = 0; | |
$row['failed'] = 0; | |
$row['text'] = ''; | |
$row['period_time_start'] = $now; | |
$row['period_same_counter'] = 0; | |
} | |
// check if same pics for period | |
if (!empty($addParams)) { | |
$periodSame = $now - $addParams['period_same_time']; | |
if ($row['period_time_start'] < $periodSame) { // period lapsed | |
$row['period_time_start'] = $now; | |
$row['period_same_counter'] = 0; | |
} else { | |
if ((!empty($img) && (string) $row['salt_img'] === (string) $saltImg) || (!empty($text) && (string) $row['salt'] === (string) $saltText)) { // prev and curr are the same | |
$row['period_same_counter'] += 1; | |
if ($row['period_same_counter'] >= $addParams['period_same_counter']) { | |
// more then N same in T period | |
$sameForPeriod = true; | |
} | |
} | |
} | |
} | |
$addSql = ", period_same_counter={$row['period_same_counter']}, period_time_start={$row['period_time_start']} "; | |
// check if failed equals LIMIT immideatelly block user | |
if (($row['failed'] > self::FAILS_LIMIT && $countFails === true) || $row['same_counter'] > self::SAME_MSGS_LIMIT) { | |
// block user in groups | |
// $this->dbConn->exec("UPDATE road_group_users SET removed=1, " | |
// . "removed_by_moder=1 " | |
// . "WHERE user_id=$userId"); | |
$spamIssue = 0; | |
if ($row['failed'] > self::FAILS_LIMIT) { | |
$spamIssue = 1; // failed N times X times in T interval | |
} else if ($row['same_counter'] > self::SAME_MSGS_LIMIT) { | |
$spamIssue = 2; // N same msgs | |
} | |
// block user in users table | |
$this->dbConn->exec("UPDATE users SET blocked=1, spam_issue=$spamIssue, spam_text='$text' " | |
. "WHERE id=$userId"); | |
// can block users in messages in block_users table | |
// flush failed counter and same_counter to proceed msgs after unblocking this user | |
// $this->dbConn->exec("UPDATE anti_spam SET failed=0, same_counter=0 WHERE user_id=$userId"); | |
return true; | |
} else { | |
// same msgs by salt md5 and prev->next msg seekg | |
if ((!empty($text) && (string) $row['salt'] === (string) $saltText) | |
|| (!empty($img) && (string) $row['salt_img'] === (string) $saltImg)) { | |
$row['same_counter'] += 2; | |
} else if ($row['same_counter'] > 0) { | |
// we should count down one by one, because hackers can send 1/1 1/2 1/n msgs | |
$row['same_counter'] -= 1; | |
} | |
$similarity = $this->checkSimilarity($row['text'], $text); // comper prev to next msgs by blocks | |
if ($similarity>self::SIMILARITY_THRESHOLD) { // avoiding upper -1 | |
$row['same_counter'] += 3; | |
} | |
$wordsIndex = $this->spamWordsIndex($text); | |
if ($wordsIndex>=self::INDEX_DIVISION) { | |
$row['same_counter'] += round($wordsIndex/self::INDEX_DIVISION); | |
} | |
// if less then period ago - update till now, update counter=0, and failed-1 if it is more then 0 | |
if ($row['time_start'] < $period) { | |
$row['time_start'] = $now; | |
$row['counter'] = 0; | |
if ($row['failed'] > 0) | |
$row['failed'] = $row['failed'] - 1; | |
} else { | |
// if period hasn`t been lapsed, then counter+1 and if it is more then COUNTER_LIMIT - failed + 1 | |
$row['counter'] = $row['counter'] + 1; | |
if ($row['counter'] >= $counterLimit) { | |
$row['counter'] = 0; // reset because of failed increase | |
$row['failed'] = $row['failed'] + 1; | |
// update data | |
$this->dbConn->exec("UPDATE anti_spam SET text='$text', " | |
. "time_start={$row['time_start']}, counter={$row['counter']}, " | |
. "failed={$row['failed']}, salt='$saltText', salt_img='$saltImg', same_counter={$row['same_counter']} $addSql " | |
. "WHERE user_id=$userId"); | |
return true; | |
} | |
} | |
} | |
// echo $row['same_counter'].' '.$row['salt_img'].' '.$saltImg; | |
$sql = "UPDATE anti_spam SET text='$text', " | |
. "time_start={$row['time_start']}, counter={$row['counter']}, " | |
. "failed={$row['failed']}, salt='$saltText', salt_img='$saltImg', same_counter={$row['same_counter']} $addSql " | |
. "WHERE user_id=$userId"; | |
// echo $sql; | |
// update data | |
$this->dbConn->exec($sql); | |
if ($sameForPeriod) | |
return true; | |
// if (mb_strlen($text, 'utf-8') > self::TEXT_NOT_CHECKED && $this->isSameSpam($userId, $saltText, $saltImg, $text)) | |
// return true; | |
return false; // not a spam - ok | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Examples:
!!! U`ll need a users table with at least 3 columns for blocking !!!
Or U can change this block of code to provide Your own table fields.