Skip to content

Instantly share code, notes, and snippets.

@deoxxa
Created November 22, 2010 06:45
Show Gist options
  • Save deoxxa/709617 to your computer and use it in GitHub Desktop.
Save deoxxa/709617 to your computer and use it in GitHub Desktop.
SCARY STUFF
function torrent_scrape($torrent_id) {
$times['start'] = microtime(1);
$times['last'] = microtime(1);
$torrents = torrent_get(get_str_build(array('extended' => '', 'torrent_id' => $torrent_id)));
$times['query'] = microtime(1) - $times['last'];
$times['last'] = microtime(1);
// Return if the torrent_id doesn't exist
if (!isset($torrents) || !is_array($torrents) || !count($torrents)) { return; }
$torrent = $torrents[0];
// Only scrape torrents every 1800 seconds
if (($torrent['scrape_time']+1800) > time()) { return; }
api_ping('torrent_scrape', $torrent_id);
// This will be set if the user has an external application that runs off the ping API doing scrapes.
if (isset($config['no_scrape'])) {
return;
}
// The following code is inspired by the Weaboo language setting, because it's stupid and should never be used in production.
// Hopefully none of this will ever actually be needed.
// Set this to 1 if you want some verbose output for testing
$http_debug = 0;
if ($http_debug) { header('Content-Type: text/plain'); }
// Set up the scrape array
$scrape = array();
$handles = array();
foreach ($torrent['trackers'] as $tracker_id => $tracker) {
// Create tracker entry
$scrape[$tracker_id] = array();
// Store the ID for later
$scrape[$tracker_id]['id'] = $tracker_id;
// See -> http://wiki.theory.org/BitTorrentSpecification#Tracker_.27scrape.27_Convention
$scrape[$tracker_id]['url'] = sprintf('%s?info_hash=%s', str_replace('announce', 'scrape', $tracker['tracker']), urlencode($torrent['info_hash']));
// Parse the tracker URL
$url = parse_url($scrape[$tracker_id]['url']);
// URL wa warui da yo
if (!is_array($url) || !count($url) || !isset($url['path'])) {
unset($scrape[$tracker_id]);
continue;
}
// Stupid parse_url doesn't provide defaults or even empty values
if (!isset($url['port'])) { $url['port'] = 80; }
if (!isset($url['query'])) { $url['query'] = ''; }
// Fix the stupid output from parse_url durr
$url['path'] = $url['path'].(strlen($url['query']) ? '?'.$url['query'] : '');
unset($url['query']);
unset($url['scheme']);
$scrape[$tracker_id]['url'] = $url;
unset($url);
// Build the request
$scrape[$tracker_id]['req'] = sprintf(
"GET %s HTTP/1.1\r\nHost: %s:%s\r\nConnection: close\r\nUser-Agent: Jishaku/0.1\r\n\r\n",
$scrape[$tracker_id]['url']['path'], $scrape[$tracker_id]['url']['host'], $scrape[$tracker_id]['url']['port']
);
$scrape[$tracker_id]['response'] = '';
$scrape[$tracker_id]['headers'] = '';
$scrape[$tracker_id]['body'] = '';
$scrape[$tracker_id]['array'] = array();
}
$times['setup'] = microtime(1) - $times['last'];
$times['last'] = microtime(1);
// Resolve DNS
foreach ($torrent['trackers'] as $tracker_id => $tracker) {
if (!isset($scrape[$tracker_id])) { continue; }
if ($http_debug) { printf("init: getting ip for %s\n", $tracker['tracker']); flush(); }
$dns = dns_get_record($scrape[$tracker_id]['url']['host'], DNS_A);
if (is_array($dns) && count($dns)) {
if ($http_debug) { printf("init: got ip for %s (%s)\n", $tracker['tracker'], $dns[0]['ip']); flush(); }
$scrape[$tracker_id]['url']['ip'] = $dns[0]['ip'];
} else {
if ($http_debug) { printf("init: couldn't get ip for %s\n", $tracker['tracker']); flush(); }
unset($scrape[$tracker_id]);
}
}
$times['dns'] = microtime(1) - $times['last'];
$times['last'] = microtime(1);
// Open the sockets
foreach ($torrent['trackers'] as $tracker_id => $tracker) {
if (!isset($scrape[$tracker_id])) { continue; }
if ($http_debug) { printf("init: opening socket for %s\n", $tracker['tracker']); flush(); }
$scrape[$tracker_id]['sent'] = 0;
$scrape[$tracker_id]['read'] = 0;
$scrape[$tracker_id]['error'] = 0;
$scrape[$tracker_id]['fh'] = @stream_socket_client(sprintf("tcp://%s:%s", $scrape[$tracker_id]['url']['ip'], $scrape[$tracker_id]['url']['port']), $errno, $errstr, 0, STREAM_CLIENT_ASYNC_CONNECT);
$handles[(int)$scrape[$tracker_id]['fh']] = $tracker_id;
}
$times['open-sockets'] = microtime(1) - $times['last'];
$times['last'] = microtime(1);
$http_timeout = 15;
$http_start = time();
// Read stuff for $http_timeout seconds
while (time() < ($http_start + $http_timeout)) {
list($read, $write, $error) = array(array(), array(), array());
foreach ($torrent['trackers'] as $tracker_id => $tracker) {
if (!isset($scrape[$tracker_id])) { continue; }
if (!$scrape[$tracker_id]['sent']) {
if (@ftell($scrape[$tracker_id]['fh']) !== false) { $write[] = $scrape[$tracker_id]['fh']; }
} elseif (!$scrape[$tracker_id]['read']) {
if (@ftell($scrape[$tracker_id]['fh']) !== false) { $read[] = $scrape[$tracker_id]['fh']; }
}
// TODO: This might be bad?
// BUG:0000039
if (@ftell($scrape[$tracker_id]['fh']) !== false) { $error[] = $scrape[$tracker_id]['fh']; }
}
if (!count($read) && !count($write)) { break; }
stream_select($read, $write, $error, 1);
if ($http_debug) {
printf("http: %s sockets ready for reading\n", count($read));
printf("http: %s sockets ready for writing\n", count($write));
printf("http: %s sockets with errors\n", count($error));
flush();
}
foreach ($error as $fh) {
if (!isset($handles[(int)$fh])) { continue; }
$tracker_id = $handles[(int)$fh];
$tracker = $torrent['trackers'][$tracker_id];
if ($http_debug) { printf("http: error on %s\n", $tracker['tracker']); flush(); }
fclose($fh);
//unset($scrape[$tracker_id]);
//unset($handles[$fh]);
}
foreach ($read as $fh) {
if (!isset($handles[(int)$fh])) { continue; }
$tracker_id = $handles[(int)$fh];
$tracker = $torrent['trackers'][$tracker_id];
if ($http_debug) { printf("http: activity on %s\n", $tracker['tracker']); flush(); }
if (($buf = @fread($scrape[$tracker_id]['fh'], 1024)) != false) {
if ($http_debug) { printf("http: read %s bytes from %s\n", strlen($buf), $tracker['tracker']); flush(); }
$scrape[$tracker_id]['response'] .= $buf;
} else {
if ($http_debug) { printf("http: closed socket for %s\n", $tracker['tracker']); flush(); }
@fclose($scrape[$tracker_id]['fh']);
$scrape[$tracker_id]['read'] = 1;
}
}
foreach ($write as $fh) {
if (!isset($handles[(int)$fh])) { continue; }
$tracker_id = $handles[(int)$fh];
$tracker = $torrent['trackers'][$tracker_id];
if ($http_debug) { printf("http: ready to write on %s\n", $tracker['tracker']); flush(); }
if (!$scrape[$tracker_id]['sent']) {
if (@fwrite($scrape[$tracker_id]['fh'], $scrape[$tracker_id]['req']) === false) {
unset($scrape[$tracker_id]);
unset($handles[(int)$fh]);
continue;
}
$scrape[$tracker_id]['sent'] = 1;
}
}
}
$times['http'] = microtime(1) - $times['last'];
$times['last'] = microtime(1);
unset($handles);
// Split the headers/body of the HTTP response
foreach ($torrent['trackers'] as $tracker_id => $tracker) {
// Only try to parse stuff that exists still
if (isset($scrape[$tracker_id]) && isset($scrape[$tracker_id]['response']) && strlen($scrape[$tracker_id]['response']) && preg_match('/\r\n\r\n/', $scrape[$tracker_id]['response'])) {
list($scrape[$tracker_id]['headers'], $scrape[$tracker_id]['body']) = explode("\r\n\r\n", $scrape[$tracker_id]['response'], 2);
} elseif (isset($scrape[$tracker_id])) {
unset($scrape[$tracker_id]);
}
}
$times['split'] = microtime(1) - $times['last'];
$times['last'] = microtime(1);
// Parse stuff
foreach ($torrent['trackers'] as $tracker_id => $tracker) {
// Only try to parse stuff that exists still (kyon-kun, denwa)
if (isset($scrape[$tracker_id]) && isset($scrape[$tracker_id]['body']) && strlen($scrape[$tracker_id]['body'])) {
$scrape[$tracker_id]['array'] = torrent_parse($scrape[$tracker_id]['body']);
}
// If it didn't parse properly, discard it
if (!isset($scrape[$tracker_id]['array']) || !is_array($scrape[$tracker_id]['array']) || !count($scrape[$tracker_id]['array'])) {
if ($http_debug) { printf("parse: couldn't parse data for %s\n", $tracker['tracker']); flush(); }
unset($scrape[$tracker_id]);
} else {
if ($http_debug) {
printf("parse: parsed data for %s\n", $tracker['tracker']); flush();
$scrape[$tracker_id]['response'] = '<removed>';
}
}
}
$times['parse'] = microtime(1) - $times['last'];
$times['last'] = microtime(1);
$times['total'] = microtime(1) - $times['start'];
unset($times['last']);
unset($times['start']);
if ($http_debug) {
var_dump($scrape);
var_dump($times);
exit();
}
foreach ($torrent['trackers'] as $tracker_id => $tracker) {
if (!isset($scrape[$tracker_id])) { continue; }
$stats = array_shift($scrape[$tracker_id]['array']['files']);
$torrents_trackers_update_query = sprintf(
"UPDATE `torrents_trackers` SET `scrape_time` = %d, `seeds` = %d, `leechers` = %d, `completed` = %d WHERE `torrent_id` = %d AND `tracker_id` = %d LIMIT 1;",
time(), $stats['complete'], $stats['incomplete'], $stats['downloaded'], $torrent['torrent_id'], $tracker_id
);
$torrents_update_query = sprintf(
"UPDATE `torrents` SET `scrape_time` = %d, `seeds` = (SELECT MAX(`seeds`) FROM `torrents_trackers` WHERE `torrent_id` = %d LIMIT 1), `leechers` = (SELECT MAX(`leechers`) FROM `torrents_trackers` WHERE `torrent_id` = %d LIMIT 1), `completed` = (SELECT MAX(`completed`) FROM `torrents_trackers` WHERE `torrent_id` = %d LIMIT 1) WHERE `torrent_id` = %d LIMIT 1;",
time(), $torrent_id, $torrent_id, $torrent_id, $torrent_id
);
mysql_query($torrents_trackers_update_query);
mysql_query($torrents_update_query);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment