Last active
December 9, 2016 21:37
-
-
Save Jolly-Pirate/641f1410ff2b3b6eeeb3 to your computer and use it in GitHub Desktop.
Wpoison PHP implementation, to thwart address harvesting web crawlers that spammers use to collect e-mail addresses from web pages.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# put this in the same folder as the script | |
RewriteEngine On | |
RewriteCond %{REQUEST_FILENAME} !-f | |
RewriteCond %{REQUEST_FILENAME} !-d | |
# edit "wpoison" and "members" to your liking, don't forget to change the wpoison.php filename if you modified it | |
# make sure the "wpoison" regex matches the "wpoison".php filename. | |
#RewriteRule wpoison$ /members/wpoison.php | |
#RewriteRule wpoison/(.*)$ /members/wpoison.php | |
#suggested example: | |
RewriteRule email$ /members/email.php | |
RewriteRule email/(.*)$ /members/email.php |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
v1.0 2016-01-18 | |
This script is inspired by the perl script Wpoison http://www.monkeys.com/wpoison/ | |
I don't run CGI/PERL scripts, so I decided to write a php equivalent | |
which behaves in the same manner as the original Wpoison. | |
I added a benchmark for server tweaking. You can keep it or comment it out. | |
REQUIREMENTS: | |
PHP 5+ | |
Dictionary file from ftp://ftp.monkeys.com/pub/wpoison/words.gz (or your own, one word per line) | |
INSTALL: | |
Put the .htacces and this script in a folder, e.g. /var/www/html/members | |
Rename the script to email.php for this example. | |
I use members/email.php to make it look attracting for spammer web crawlers, | |
but it can be anything you want. Make sure you edit the .htaccess file accordingly. | |
Unpack the words.gz into the same folder. | |
USAGE: | |
Put a link on your main page (or any other page of your website) with a href to: | |
http://yourdomain.com/members/email or http://yourdomain.com/members/email/ | |
both will work, thanks to the .htaccess rules | |
NOTE: | |
The <BIG> tag is not supported in HTML5. | |
It's a small detail though, if it doesn't work for you, remove it or use CSS instead. | |
*/ | |
$start_time = microtime(true); // Benchmark start | |
$randomBGCOLOR = '#' . strtoupper(dechex(mt_rand(0, 10000000))); | |
$randomTEXT = '#' . strtoupper(dechex(mt_rand(0, 10000000))); | |
$randomLINK = '#' . strtoupper(dechex(mt_rand(0, 10000000))); | |
$randomVLINK = '#' . strtoupper(dechex(mt_rand(0, 10000000))); | |
echo | |
"<HTML> | |
<HEAD> | |
<TITLE>"; | |
randomwords(3, 7); | |
echo | |
"</TITLE> | |
<META NAME='ROBOTS' CONTENT='NOINDEX, NOFOLLOW'> | |
</HEAD> | |
<BODY BGCOLOR=$randomBGCOLOR TEXT=$randomTEXT LINK=$randomLINK VLINK=$randomVLINK> | |
<BIG> | |
"; | |
// Edit to your liking, check the benchmark to tweak performance according to your server | |
randomwords(10, 30); | |
print ("<P>\n"); | |
randomemails(10, 30); | |
print ("<P>\n"); | |
randomwords(10, 30); | |
print ("<P>\n"); | |
randomlinks(5, 10); | |
print ("<P>\n"); | |
randomwords(10, 30); | |
print ("<P>\n"); | |
// Benchmark end | |
echo "This page was generated in " . (number_format(microtime(true) - $start_time, 4) * 1000) . "ms.\n"; | |
print ("</BIG>\n</BODY>\n</HTML>\n"); | |
// end of the HTML part | |
// Functions | |
function randomwords($min, $max) { | |
$lines = file("words"); // dictionary file | |
for ($a = 0; $a <= mt_rand($min, $max); $a++) { | |
$string .= str_replace("\n", '', $lines[mt_rand(1, 235880)]) . " "; // concatenate the words while removing the carriage return from them | |
} | |
echo $string; | |
} | |
function randomlinks($min, $max) { | |
// script name without the php extension, IF NEEDED | |
// $_SERVER["SCRIPT_NAME"] /folder/script.php using SCRIPT_NAME is better, takes care of any subfolders the script might be in | |
// $_SERVER["SCRIPT_FILENAME"] // /var/www/html/folder/script.php | |
$scriptname = substr($_SERVER["SCRIPT_NAME"], 0, strrpos($_SERVER["SCRIPT_NAME"], ".")); | |
$lines = file("words"); | |
for ($b = 0; $b <= mt_rand($min, $max); $b++) { | |
$string = ""; //clear the string first to avoid incremental appending to it | |
for ($a = 0; $a <= mt_rand($min, $max); $a++) { | |
$string .= str_replace("\n", '', $lines[mt_rand(1, 235880)]) . " "; // concatenate the words while removing carriage return from them | |
$word = str_replace("\n", '', $lines[mt_rand(1, 235880)]); | |
} | |
echo "<A HREF=\"" . $scriptname . "/" . $word . "\">" . $string . "</A><BR>\n"; // without | |
} | |
} | |
function randomemails($min, $max) { | |
// array of possible top-level domains | |
$tld = ARRAY("com", "biz", "info", "org", "gov", "net", | |
"uk", "su", | |
"af", "al", "dz", "as", "ad", "ao", "ai", "aq", "ag", "ar", "am", "aw", "au", | |
"at", "az", "bs", "bh", "bd", "bb", "by", "be", "bz", "bj", "bm", "bt", "bo", | |
"ba", "bw", "bv", "br", "io", "bn", "bg", "bf", "bi", "kh", "cm", "ca", "cv", | |
"ky", "cf", "td", "cl", "cn", "cx", "cc", "co", "km", "cg", "ck", "cr", "ci", | |
"hr", "cu", "cy", "cz", "dk", "dj", "dm", "do", "tp", "ec", "eg", "sv", "gq", | |
"er", "ee", "et", "fk", "fo", "fj", "fi", "fr", "fx", "gf", "pf", "tf", "ga", | |
"gm", "ge", "de", "gh", "gi", "gr", "gl", "gd", "gp", "gu", "gt", "gn", "gw", | |
"gy", "ht", "hm", "hn", "hk", "hu", "is", "in", "id", "ir", "iq", "ie", "il", | |
"it", "jm", "jp", "jo", "kz", "ke", "ki", "kp", "kr", "kw", "kg", "la", "lv", | |
"lb", "ls", "lr", "ly", "li", "lt", "lu", "mo", "mk", "mg", "mw", "my", "mv", | |
"ml", "mt", "mh", "mq", "mr", "mu", "yt", "mx", "fm", "md", "mc", "mn", "ms", | |
"ma", "mz", "mm", "na", "nr", "np", "nl", "an", "nc", "nz", "ni", "ne", "ng", | |
"nu", "nf", "mp", "no", "om", "pk", "pw", "pa", "pg", "py", "pe", "ph", "pn", | |
"pl", "pt", "pr", "qa", "re", "ro", "ru", "rw", "kn", "lc", "vc", "ws", "sm", | |
"st", "sa", "sn", "sc", "sl", "sg", "sk", "si", "sb", "so", "za", "gs", "es", | |
"lk", "sh", "pm", "sd", "sr", "sj", "sz", "se", "ch", "sy", "tw", "tj", "tz", | |
"th", "tg", "tk", "to", "tt", "tn", "tr", "tm", "tc", "tv", "ug", "ua", "ae", | |
"gb", "us", "um", "uy", "uz", "vu", "va", "ve", "vn", "vg", "vi", "wf", "eh", | |
"ye", "yu", "zr", "zm", "zw"); | |
// create a random number of emails | |
for ($j = 0; $j < mt_rand($min, $max); $j++) { | |
// min and max random length can be changed of course, as well as the characters list for the shuffle | |
$a = substr(str_shuffle("abcdefghijklmnopqrstuvwxyz"), 0, mt_rand(5, 12)); | |
$a .= "@"; | |
$a .= substr(str_shuffle("abcdefghijklmnopqrstuvwxyz"), 0, mt_rand(5, 12)); | |
$a .= "."; | |
$a .= $tld[mt_rand(0, (sizeof($tld) - 1))]; | |
echo "<A HREF=\"mailto:" . $a . "\">" . $a . "</A><BR>\n"; | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment