Last active
January 30, 2018 10:59
-
-
Save JamoCA/7d0b14894387b2fc2260 to your computer and use it in GitHub Desktop.
Here's a raw proof-of-concept script written in ColdFusion that identifies & blocks fake Googlebots. This can be easily expanded to cache DNS responses & log new bots.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<cfscript> | |
/* based on info from http://googlewebmastercentral.blogspot.com/2006/09/how-to-verify-googlebot.html */ | |
badBot = 0; | |
blockBadBots = 0; | |
ip = cgi.remote_addr; | |
userAgent = CGI.Http_User_Agent; | |
/* Sample request values */ | |
//userAgent = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"; | |
//ip = "179.179.65.180"; //bad | |
//ip = "66.249.67.29"; //good | |
/* Note: This script uses InetUtil.cfc and is available for download at | |
http://www.phillnacelli.net/blog/index.cfm/2007/2/21/DNS-Lookup-in-ColdFusion */ | |
if (Findnocase("googlebot", userAgent) OR findnocase("Mediapartners-Google", userAgent)){ | |
inetUtils = createObject("component","InetUtils").init(); | |
try { | |
dnsRecords = inetUtils.reverseDNS(ip); // Perform Reverse DNS Lookup | |
domainName = ArrayToList(dnsRecords); | |
if (not refind("/*.googlebot.com$", domainName)){ | |
badBot = 1; /* bad; domain name must end w/"googlebot.com" */ | |
} else { | |
javaInet = createObject("java","java.net.InetAddress"); | |
dnsLookup = javaInet.getByName(domainName); | |
ipLookup = dnsLookup.getAddress(); | |
if (ipLookup neq ip){ | |
badBot = 1; /* bad; domain name must resolve to IP address */ | |
} | |
} | |
} catch (any e) { | |
badBot = 0; | |
} | |
} | |
writeoutput("<p><b>userAgent:</b> #userAgent#</p>"); | |
writeoutput("<p><b>ip:</b> #ip#</p>"); | |
if (isdefined("domainName")){ | |
writeoutput("<p><b>Reverse DNS:</b> #domainName#</p>"); | |
} | |
if (isdefined("ipLookup")){ | |
writeoutput("<p><b>IP from DNS:</b> #ipLookup#</p>"); | |
} | |
writeoutput("<p><b>badBot:</b> #YesNoFormat(badBot)#</p>"); | |
</cfscript> | |
<cfif badBot and blockBadBots> | |
<cfcontent type="text/html; charset=UTF-8"><cfheader statuscode="404" statustext="File Not Found"><CFABORT> | |
</cfif> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment