Skip to content

Instantly share code, notes, and snippets.

@arthurkushman
Last active August 29, 2015 14:19
Show Gist options
  • Save arthurkushman/2c0e7c723d887e3613e5 to your computer and use it in GitHub Desktop.
Save arthurkushman/2c0e7c723d887e3613e5 to your computer and use it in GitHub Desktop.
Anit spam filter based on counters like n (messages) per t (period) k times + protection from repeated images/text content via md5 hash (byte in case of image) comparison, also as additional params passed through to protect accidential repetition of text messages and images
/**
* $userId - int
* $text - post, message etc. text - string
* $img - image - resource usually $_FILES['image']['tmp_name']
* $counterLimit - limit of n messages in t period - int
* $countFails - limit of x failes after which user will be blocked - int
* $addParams - ex.: array('period_same_counter'=>3, 'period_same_time'=>15*60); - array
*/
class AntiSpam {
// ANTI-SPAM CONSTS
const SPAM_TIME_LIMIT = 60, // NOT MORE THEN n SECONDS
SPAM_COUNTER_LIMIT = 10, // NOT MORE THEN n MESSAGES PER TIME_LIMIT ^
FAILS_LIMIT = 10, // NOT MORE THEN n FAILS PER COUNTER_LIMIT ^
SAME_MSGS_LIMIT = 200, // LIMIT TO BLOCK
TEXT_NOT_CHECKED = 45, // MIN SYMBOLS TO CHECK
SIMILARITY_THRESHOLD = 8, // AMOUNT OF SIMILAR BLOCKS PREV TO NEXT MSG
SIMILAR_BLOCKS_MSG = 10, // FIND THAT AMOUNT OF BLOCKS
INDEX_DIVISION = 10; // SEARCH INDEX DIVISION
public function isSpam($userId, $text, $img, $counterLimit = self::SPAM_COUNTER_LIMIT,
$countFails = true, $addParams = []) {
$row = [];
$row['same_counter'] = 0;
$row['salt'] = 0;
$sameForPeriod = false;
$addSql = '';
// create only if not exists
$this->dbConn->exec("CREATE TABLE IF NOT EXISTS anti_spam("
. "user_id INT UNSIGNED NOT NULL, "
. "text VARCHAR(128) NOT NULL, "
. "time_start INT UNSIGNED NOT NULL, "
. "counter INT UNSIGNED NOT NULL, "
. "failed MEDIUMINT UNSIGNED NOT NULL DEFAULT 0, "
. "INDEX BTREE(user_id))ENGINE=MEMORY CHARSET utf8 COLLATE utf8_general_ci");
$stmt = $this->dbConn->prepare("SELECT time_start, text, counter, failed, salt, salt_img, "
. "same_counter, period_same_counter, period_time_start "
. "FROM anti_spam "
. "WHERE user_id=:user_id", array(PDO::ATTR_CURSOR => PDO::CURSOR_FWDONLY));
$stmt->execute(array(':user_id' => $userId));
$row = $stmt->fetch(PDO::FETCH_ASSOC);
$now = time();
$period = $now - self::SPAM_TIME_LIMIT;
$saltText = md5(str_replace(' ', '', $text));
$image = '';
$saltImg = uniqid();
if (is_file($img)) {
$image = $img;
$saltImg = md5_file($image);
}
// echo $saltImg.' '.$image;
$text = trim($text);
// print_r($row);
if (empty($row)) {
// insert if hasn`t been written/added yet
$this->dbConn->exec("INSERT INTO anti_spam SET user_id=$userId, text='$text', time_start=$now, counter=0, salt='$saltText', "
. "same_counter=0, period_same_counter=0, period_time_start=$now");
$row['time_start'] = $now;
$row['counter'] = 0;
$row['failed'] = 0;
$row['text'] = '';
$row['period_time_start'] = $now;
$row['period_same_counter'] = 0;
}
// check if same pics for period
if (!empty($addParams)) {
$periodSame = $now - $addParams['period_same_time'];
if ($row['period_time_start'] < $periodSame) { // period lapsed
$row['period_time_start'] = $now;
$row['period_same_counter'] = 0;
} else {
if ((!empty($img) && (string) $row['salt_img'] === (string) $saltImg) || (!empty($text) && (string) $row['salt'] === (string) $saltText)) { // prev and curr are the same
$row['period_same_counter'] += 1;
if ($row['period_same_counter'] >= $addParams['period_same_counter']) {
// more then N same in T period
$sameForPeriod = true;
}
}
}
}
$addSql = ", period_same_counter={$row['period_same_counter']}, period_time_start={$row['period_time_start']} ";
// check if failed equals LIMIT immideatelly block user
if (($row['failed'] > self::FAILS_LIMIT && $countFails === true) || $row['same_counter'] > self::SAME_MSGS_LIMIT) {
// block user in groups
// $this->dbConn->exec("UPDATE road_group_users SET removed=1, "
// . "removed_by_moder=1 "
// . "WHERE user_id=$userId");
$spamIssue = 0;
if ($row['failed'] > self::FAILS_LIMIT) {
$spamIssue = 1; // failed N times X times in T interval
} else if ($row['same_counter'] > self::SAME_MSGS_LIMIT) {
$spamIssue = 2; // N same msgs
}
// block user in users table
$this->dbConn->exec("UPDATE users SET blocked=1, spam_issue=$spamIssue, spam_text='$text' "
. "WHERE id=$userId");
// can block users in messages in block_users table
// flush failed counter and same_counter to proceed msgs after unblocking this user
// $this->dbConn->exec("UPDATE anti_spam SET failed=0, same_counter=0 WHERE user_id=$userId");
return true;
} else {
// same msgs by salt md5 and prev->next msg seekg
if ((!empty($text) && (string) $row['salt'] === (string) $saltText)
|| (!empty($img) && (string) $row['salt_img'] === (string) $saltImg)) {
$row['same_counter'] += 2;
} else if ($row['same_counter'] > 0) {
// we should count down one by one, because hackers can send 1/1 1/2 1/n msgs
$row['same_counter'] -= 1;
}
$similarity = $this->checkSimilarity($row['text'], $text); // comper prev to next msgs by blocks
if ($similarity>self::SIMILARITY_THRESHOLD) { // avoiding upper -1
$row['same_counter'] += 3;
}
$wordsIndex = $this->spamWordsIndex($text);
if ($wordsIndex>=self::INDEX_DIVISION) {
$row['same_counter'] += round($wordsIndex/self::INDEX_DIVISION);
}
// if less then period ago - update till now, update counter=0, and failed-1 if it is more then 0
if ($row['time_start'] < $period) {
$row['time_start'] = $now;
$row['counter'] = 0;
if ($row['failed'] > 0)
$row['failed'] = $row['failed'] - 1;
} else {
// if period hasn`t been lapsed, then counter+1 and if it is more then COUNTER_LIMIT - failed + 1
$row['counter'] = $row['counter'] + 1;
if ($row['counter'] >= $counterLimit) {
$row['counter'] = 0; // reset because of failed increase
$row['failed'] = $row['failed'] + 1;
// update data
$this->dbConn->exec("UPDATE anti_spam SET text='$text', "
. "time_start={$row['time_start']}, counter={$row['counter']}, "
. "failed={$row['failed']}, salt='$saltText', salt_img='$saltImg', same_counter={$row['same_counter']} $addSql "
. "WHERE user_id=$userId");
return true;
}
}
}
// echo $row['same_counter'].' '.$row['salt_img'].' '.$saltImg;
$sql = "UPDATE anti_spam SET text='$text', "
. "time_start={$row['time_start']}, counter={$row['counter']}, "
. "failed={$row['failed']}, salt='$saltText', salt_img='$saltImg', same_counter={$row['same_counter']} $addSql "
. "WHERE user_id=$userId";
// echo $sql;
// update data
$this->dbConn->exec($sql);
if ($sameForPeriod)
return true;
// if (mb_strlen($text, 'utf-8') > self::TEXT_NOT_CHECKED && $this->isSameSpam($userId, $saltText, $saltImg, $text))
// return true;
return false; // not a spam - ok
}
}
@arthurkushman
Copy link
Author

Examples:

$userId = 123;
$msgText = 'some text goes here';
// usually U need to check image before upload
$tmpImage = $_FILES['image']['image_tmp'];
// not more then n messages per SPAM_TIME_LIMIT till k fails
$counterLimit = 9;
$countFails = 3;
// means not more then n same messages per t period 
// may be needed in situations where users accidentally post the same content
$addParams = array('period_same_counter' => 1, 'period_same_time' => 5 * 60);

// usage 
$antiSpam = new AntiSpam();
if ($antiSpam->isSpam($userId, $msgText, $tmpImage, $counterLimit, $countFails, $addParams)) {

  // send, set message, post, comment

}

// this time method returns false - text and image are the same
if ($antiSpam->isSpam($userId, $msgText, $tmpImage, $counterLimit, $countFails, $addParams)) {

  // send, set message, post, comment

}

!!! U`ll need a users table with at least 3 columns for blocking !!!

  1. blocked
  2. spam_issue
  3. spam_text

Or U can change this block of code to provide Your own table fields.

      // block user in users table
      $this->dbConn->exec("UPDATE users SET blocked=1, spam_issue=$spamIssue, spam_text='$text' "
              . "WHERE id=$userId");

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment