Skip to content

Instantly share code, notes, and snippets.

@zachflower
Last active December 28, 2015 20:29
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zachflower/7557435 to your computer and use it in GitHub Desktop.
Save zachflower/7557435 to your computer and use it in GitHub Desktop.
Tor Page Crawler
<?php
/**
* Tor Page Crawler
*
* Download web pages anonymously using the Tor network.
* Usage Example:
* $tor = new Tor();
*
* $tor->setUrl('http://zacharyflower.com');
* $tor->setUserAgent('Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36');
* $tor->setReferer('http://twitter.com');
*
* $response = $tor->download();
*/
class Tor {
private $url = NULL;
private $user_agent = NULL;
private $referer = NULL;
private $tries = 0;
private $max_tries = 10;
private function scrambleIP(){
$ip = '127.0.0.1';
$port = '9051';
$auth = 'password';
$command = 'signal NEWNYM';
$fp = fsockopen($ip, $port, $error_number, $err_string, 10);
if(!$fp) {
throw new Exception('Unable to connect to Tor service.');
} else {
fwrite($fp, "AUTHENTICATE \"".$auth."\"\n");
$received = fread($fp, 512);
fwrite($fp, $command."\n");
$received = fread($fp, 512);
}
fclose($fp);
}
public function setUserAgent($user_agent = NULL){
$this->user_agent = $user_agent;
}
public function setUrl($url = NULL){
$this->url = $url;
}
public function setReferer($referer = NULL){
$this->referer = $referer;
}
public function setMaxTries($max_tries = 10){
if ( is_numeric($max_tries) ) {
$this->max_tries = (int)$max_tries;
}
}
public function download() {
if ( empty($this->url) ) {
throw new Exception('URL Required');
}
$this->tries++;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $this->url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
if ( !empty($this->referer) ) {
curl_setopt($ch, CURLOPT_REFERER, $referer);
}
if ( !empty($this->user_agent) ) {
curl_setopt($ch, CURLOPT_USERAGENT, $this->user_agent);
}
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
curl_setopt($ch, CURLOPT_PROXY, "127.0.0.1:9050");
curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
$response = curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ( curl_errno($ch) ) {
throw new Exception('cURL Error: '.curl_error($ch));
}
if ( $http_code != 200 ) {
if ( $this->tries <= $this->max_tries ) {
scramble_ip();
return $this->download();
} else {
throw new Exception('Too many failed download attempts.');
}
}
return $response;
}
}
@ruafozy
Copy link

ruafozy commented Jan 4, 2015

I'm just reading this quickly, but: shouldn't "scramble_ip()" be
"$this->scrambleIP()"?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment