Skip to content

Instantly share code, notes, and snippets.

@daniil4udo
Created June 9, 2019 07:47
Show Gist options
  • Save daniil4udo/9ea78130bd17e56a40153a01d1c3eb54 to your computer and use it in GitHub Desktop.
Save daniil4udo/9ea78130bd17e56a40153a01d1c3eb54 to your computer and use it in GitHub Desktop.
Example of .HTACCES file to hide you site from crawlers

Example of .HTACCES file to hide you site from crawlers

.HTACCESS examples for Apache (mod_rewrite & mod_setenvif) and NGNIX setting

to hide your site or PBN from crawlers like Ahrefs, Riddler, Detectify etc.

htaccess-hide-your-PBN-mod_rewrite


# BEGIN Hide My Links
<IfModule mod_rewrite.c>
	RewriteEngine on
	RewriteCond %{HTTP_USER_AGENT} ".*AhrefsBot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*MJ12bot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*Riddler.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*trovitBot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*aiHitBot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*Detectify.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*rogerBot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*MegaIndex\.ru/2\.0.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*YandexBot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*ia_archiver.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*bingbot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*dotbot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*Baiduspider.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*archive\.org_bot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*BLEXBot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*LinkpadBot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*spbot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*FlipboardProxy.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*Serpstatbot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*exabot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*SemrushBot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*boitho.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*psbot.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*sogou.*" [OR]
	RewriteCond %{HTTP_USER_AGENT} ".*Slurp.*"
	RewriteRule ".*" "-" [F]
</IfModule>
# END Hide My Links

htaccess-hide-your-PBN-mod_setenvif


# BEGIN Hide My Links
<IfModule mod_setenvif.c>
	BrowserMatchNoCase "AhrefsBot" hml_agent
	BrowserMatchNoCase "MJ12bot" hml_agent
	BrowserMatchNoCase "rogerBot" hml_agent
	BrowserMatchNoCase "MegaIndex.ru/2.0" hml_agent
	BrowserMatchNoCase "YandexBot" hml_agent
	BrowserMatchNoCase "ia_archiver" hml_agent
	BrowserMatchNoCase "bingbot" hml_agent
	BrowserMatchNoCase "Baiduspider" hml_agent
	BrowserMatchNoCase "archive.org_bot" hml_agent
	BrowserMatchNoCase "BLEXBot" hml_agent
	BrowserMatchNoCase "LinkpadBot" hml_agent
	BrowserMatchNoCase "spbot" hml_agent
	BrowserMatchNoCase "Serpstatbot" hml_agent
	BrowserMatchNoCase "exabot" hml_agent
	BrowserMatchNoCase "SemrushBot" hml_agent
	BrowserMatchNoCase "boitho" hml_agent
	BrowserMatchNoCase "psbot" hml_agent
	BrowserMatchNoCase "sogou" hml_agent
	BrowserMatchNoCase "Slurp" hml_agent
	SetEnvIfExpr "-T reqenv('hml_agent')" hml_block
	Order Allow,Deny
	Allow from all
	Deny from env=hml_block
</IfModule>
# END Hide My Links

htaccess-hide-your-PBN-NGNIX


map $http_user_agent $hml_agent {
	default 0;
	~*AhrefsBot 1;
	~*MJ12bot 1;
	~*rogerBot 1;
	~*MegaIndex.ru/2.0 1;
	~*YandexBot 1;
	~*ia_archiver 1;
	~*bingbot 1;
	~*Baiduspider 1;
	~*archive.org_bot 1;
	~*BLEXBot 1;
	~*LinkpadBot 1;
	~*spbot 1;
	~*Serpstatbot 1;
	~*exabot 1;
	~*SemrushBot 1;
	~*boitho 1;
	~*psbot 1;
	~*sogou 1;
	~*Slurp 1;
}

map $request_uri $hml_page {
	default 0;
}

map $remote_addr $hml_ip {
	default 0;
}

server {
	set $hml "";
	if ($hml_agent) {
		set $hml "${hml}A";
	}
	if ($hml = A) {
		return 403;
	}
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment