Skip to content

Instantly share code, notes, and snippets.

@melvyn-sopacua
Created August 24, 2014 06:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save melvyn-sopacua/f668994f7e44b2c24692 to your computer and use it in GitHub Desktop.
Save melvyn-sopacua/f668994f7e44b2c24692 to your computer and use it in GitHub Desktop.
Test a local site using it's sitemap
<?php
// vim: ts=4 sw=4 noet tw=78 fo=croqn
/*
* Tests a site performance by using it's sitemap.
*
* The SiteFetcher class runs multiple URLs in parallel. In theory, you can
* instantiate multiple SiteFetcher objects, but unless you fork() this
* program, they will be running in sequence, not in parallel.
*
* Note that the settings are conservative defaults that may still put your
* machine in a high load if the site's performance is abysmal.
*/
class SiteFetcher
{
// Maximum number of URLs requested simultaniously
// (Using a number that leaves one URL out, tests FPC performance.)
public $maxThreads = 5;
// Visit one URL this many times (in one or two runs)
public $threadsPerUrl = 2;
// Acceptable performance cutoff in seconds
public $fastCutOff = 0.500;
// Don't bother anymore after this many seconds
public $tooSlowCutOff = 20.000;
protected $_curl = null;
protected $_filled = 1;
protected $_is_running = -1;
protected $_debug = false;
public function __construct()
{
$this->_curl = curl_multi_init();
}
// Add a URL for processing and runs itself when bucket is full.
public function addUrl($url)
{
if( $this->_debug )
fprintf(STDERR, "Adding URL %s: ", $url);
for( $i = 0; $i < $this->threadsPerUrl; $i++ )
{
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_TIMEOUT, $this->tooSlowCutOff);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_multi_add_handle($this->_curl, $ch);
$this->_filled++;
if( $this->_debug )
fprintf(STDERR, "%d", $i + 1);
if( $this->_filled == $this->maxThreads )
{
if( $this->_debug )
fprintf(STDERR, "\n->Running set automatically\n");
$this->noMoreUrls();
}
}
if( $this->_debug )
fprintf(STDERR, "\n");
}
// Bucket is full or consumer signals the end.
public function noMoreUrls()
{
if( $this->_debug )
fprintf(STDERR, "Running URLs\n");
$this->_run();
if( $this->_debug )
fprintf(STDERR, "Reporting and tearing down multi\n");
$this->_teardown();
}
public function setDebugMode($val)
{
$this->_debug = ($val) ? true : false;
}
protected function _run()
{
if( ( $rc = $this->_exec()) != CURLM_OK )
throw new RuntimeException(curl_multi_strerror($rc));
do
{
curl_multi_select($this->_curl, 1.0);
if( ($rc = $this->_exec()) != CURLM_OK )
throw new RuntimeException(curl_multi_strerror($rc));
} while($this->_is_running);
}
protected function _teardown()
{
while( ($info = curl_multi_info_read($this->_curl)) !== FALSE )
{
$url = curl_getinfo($info['handle'], CURLINFO_EFFECTIVE_URL);
$time = curl_getinfo($info['handle'], CURLINFO_TOTAL_TIME);
if( $info['result'] != CURLE_OK )
{
printf("ERROR: %s: %s\n", $url,
curl_strerror($info['result'])
);
}
elseif( $time > $this->fastCutOff )
{
printf("%-69s: % 9.6f\n", $url, $time);
}
else
{
printf("%-76s: OK\r", $url);
}
curl_multi_remove_handle($this->_curl, $info['handle']);
}
curl_multi_close($this->_curl);
$this->_filled = 1;
$this->_is_running = -1;
$this->_curl = curl_multi_init();
}
protected function _exec()
{
do {
$rc = curl_multi_exec($this->_curl, $this->_is_running);
} while ( $rc == CURLM_CALL_MULTI_PERFORM);
return $rc;
}
}
$sitemap = './sitemap.xml';
$root = simplexml_load_file($sitemap);
if( $root )
{
$urls = $root->url;
$fetcher = new SiteFetcher();
foreach( $urls AS $element)
{
$fetcher->addUrl((string)$element->loc);
}
$fetcher->noMoreUrls();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment