Last active
October 5, 2017 17:35
-
-
Save garbetjie/d9ef3eb95fc5db33316d4b6799ddc07a to your computer and use it in GitHub Desktop.
Test script for memory exhaustion in `GuzzleHttp\Pool` with large request pools
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
To set up: | |
1. Install composer. | |
2. Run `php composer.phar require guzzlehttp/guzzle | |
3. Run this test script with one of the following arguments: | |
3.1. php guzzle-pool-test.php curl (to use GuzzleHttp\Handler\CurlHandler) | |
3.2. php guzzle-pool-test.php multicurl (to use GuzzleHttp\Handler\MultiCurlHandler) | |
3.3. php guzzle-pool-test.php stream (to use GuzzleHttp\Handler\StreamHandler) | |
Changeable variables: | |
$interval: Sets the number of requests that must have run before printing out memory usage (default=100) | |
$concurrency: The number of concurrent requests to run in the pool. | |
*/ | |
use GuzzleHttp\Client; | |
use GuzzleHttp\Handler\CurlHandler; | |
use GuzzleHttp\Handler\CurlMultiHandler; | |
use GuzzleHttp\Handler\StreamHandler; | |
use GuzzleHttp\HandlerStack; | |
use GuzzleHttp\Middleware; | |
use GuzzleHttp\Pool; | |
use Psr\Http\Message\ResponseInterface; | |
require __DIR__ . '/vendor/autoload.php'; | |
switch($argv[1]) { | |
case 'curl': | |
$handler = new CurlHandler(); | |
break; | |
case 'multicurl': | |
$handler = new CurlMultiHandler(); | |
break; | |
case 'stream': | |
$handler = new StreamHandler(); | |
break; | |
default: | |
die('one of curl, multicurl or stream required as first argument.'); | |
} | |
$stack = new HandlerStack($handler); | |
$stack->push(Middleware::httpErrors(), 'http_errors'); | |
$stack->push(Middleware::redirect(), 'allow_redirects'); | |
$stack->push(Middleware::cookies(), 'cookies'); | |
$stack->push(Middleware::prepareBody(), 'prepare_body'); | |
$interval = 100; | |
$concurrency = 50; | |
$client = new Client(['handler' => $stack]); | |
echo sprintf("Using Guzzle handler %s\n", get_class($handler)); | |
echo sprintf("Printing memory usage every %d requests\n", $interval); | |
echo "Fetching package list... "; | |
$packageNames = json_decode( | |
$client->get('https://packagist.org/packages/list.json') | |
->getBody() | |
->getContents() | |
)->packageNames; | |
echo 'done. (' . count($packageNames) . " packages)\n\n"; | |
$requests = function($packageNames) { | |
foreach ($packageNames as $packageVendorPair) { | |
yield new GuzzleHttp\Psr7\Request('GET', "https://packagist.org/p/{$packageVendorPair}.json"); | |
} | |
}; | |
(new Pool( | |
$client, | |
$requests($packageNames), | |
[ | |
'concurrency' => $concurrency, | |
'fulfilled' => function(ResponseInterface $response, $index) use (&$counter, $interval) { | |
$counter++; | |
if ($counter % $interval === 0) { | |
echo sprintf( | |
"Processed %s requests. Memory used: %s MB\n", | |
number_format($counter), | |
number_format(memory_get_peak_usage()/1024/1024, 3) | |
); | |
} | |
}, | |
'rejected' => function($reason, $index) use (&$counter, $interval) { | |
$counter++; | |
if ($counter % $interval === 0) { | |
echo sprintf( | |
'Processed %s requests. Memory used: %s MB', | |
number_format($counter), | |
number_format(memory_get_peak_usage()/1024/1024, 3) | |
); | |
} | |
} | |
] | |
))->promise()->wait(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I suppose the problem may occur because of the same reasons why there's an IMPORTANT notice there before Pool::batch method.
This could be the reason how to get better results for you, if replace Pool::batch(...) with this:
`$pool = new Pool($client, $requests($packageNames), [
'concurrency' => $concurrency,
'fulfilled' => function (ResponseInterface $response, $index) use (&$counter, $interval) {
$counter++;
if ($counter % $interval === 0) {
echo sprintf(
"Processed %s requests. Memory used: %s MB\n",
number_format($counter),
number_format(memory_get_peak_usage()/1024/1024, 3)
);
}
},
'rejected' => function($reason, $index) use (&$counter, $interval) {
$counter++;
if ($counter % $interval === 0) {
echo sprintf(
'Processed %s requests. Memory used: %s MB',
number_format($counter),
number_format(memory_get_peak_usage()/1024/1024, 3)
);
}
}
]);
// Initiate the transfers and create a promise
$promise = $pool->promise();
// Force the pool of requests to complete.
$response = $promise->wait();`
Are there any reasons Pool::batch() is better than this?