Skip to content

Instantly share code, notes, and snippets.

@schmengler
Last active April 6, 2021 07:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save schmengler/c29b210535b04b23e9e6 to your computer and use it in GitHub Desktop.
Save schmengler/c29b210535b04b23e9e6 to your computer and use it in GitHub Desktop.
Draw Elements From Large PHP Array: Benchmark Script
<?php
require 'Timer.php';
$testData = array(
// $k | count($a)
// -------+---------
array( 10, 1000),
array( 10, 10000),
array( 10, 100000),
array( 10, 1000000),
array( 100, 1000),
array( 100, 10000),
array( 100, 100000),
array( 100, 1000000),
array( 1000, 10000),
array( 5000, 10000),
array( 9000, 10000),
array( 1000, 100000),
array( 10000, 100000),
array( 50000, 100000),
array( 90000, 100000),
);
$functions = array(
'value_by_array_rand',
'shuffle_and_splice',
'random_keys',
'richard_durstenfeld',
'fisher_yates_by_value',
'fisher_yates_by_reference',
'richard_durstenfeld_fixed_array',
'fisher_yates_fixed_array',
);
if (isset($argv[1])) {
$functions = array($argv[1]);
}
echo "|------------------------------------------------------------|\n";
printf($format = "| %-25.25s | %8.8s | %9.9s | %7s |\n", 'function', '$k', '$t', 'time');
echo "|---------------------------+----------+-----------+---------|\n";
foreach ($functions as $function) {
foreach ($testData as $data) {
list($k, $n) = $data;
$time = test($function, range(1, $n), $k);
printf($format, $function, $k, $n, PHP_Timer::secondsToTimeString($time));;
}
}
echo "|---------------------------+----------+-----------+---------|\n";
echo "Total memory usage: ", number_format(memory_get_peak_usage()), " Bytes";
function test($f, array $a, $k)
{
PHP_Timer::start();
$result = $f($a, $k);
return PHP_Timer::stop();
}
function value_by_array_rand($array, $k)
{
$keys = array_rand($array, $k);
$result = array();
foreach ($keys as $key) {
$result[] = $array[$key];
}
return $result;
}
function shuffle_and_splice(&$array, $k)
{
shuffle($array);
return array_splice($array, 0, $k);
}
function random_keys($array, $k)
{
$randomArray = [];
while (count($randomArray) < $k) {
$randomKey = mt_rand(0, count($array)-1);
$randomArray[$randomKey] = $array[$randomKey];
}
return $randomArray;
}
function richard_durstenfeld($array, $n)
{
$array_keys = array_keys($array);
$array_length = count($array_keys);
$max_index = $array_length -1;
$iterations = min($n, $array_length);
$random_array = array();
while($iterations--) {
$index = mt_rand(0, $max_index);
$value = $array_keys[$index];
$array_keys[$index] = $array_keys[$max_index];
array_push($random_array, $array[$value]);
$max_index--;
}
return $random_array;
}
function fisher_yates_by_value( $a, $n )
{
$N = count($a);
$n = min($n, $N);
$picked = array_fill(0, $n, 0);
// partially shuffle the array, and generate unbiased selection simultaneously
// this is a variation on fisher-yates-knuth shuffle
for ($i=0; $i<$n; $i++) // O(n) times
{
$selected = mt_rand( 0, --$N ); // unbiased sampling N * N-1 * N-2 * .. * N-n+1
$value = $a[ $selected ];
$a[ $selected ] = $a[ $N ];
$a[ $N ] = $value;
$picked[ $i ] = $value;
}
return $picked;
}
function fisher_yates_by_reference( &$a, $n )
{
$N = count($a);
$n = min($n, $N);
$picked = array_fill(0, $n, 0); $backup = array_fill(0, $n, 0);
// partially shuffle the array, and generate unbiased selection simultaneously
// this is a variation on fisher-yates-knuth shuffle
for ($i=0; $i<$n; $i++) // O(n) times
{
$selected = mt_rand( 0, --$N ); // unbiased sampling N * N-1 * N-2 * .. * N-n+1
$value = $a[ $selected ];
$a[ $selected ] = $a[ $N ];
$a[ $N ] = $value;
$backup[ $i ] = $selected;
$picked[ $i ] = $value;
}
// restore partially shuffled input array from backup
// optional step, if needed it can be ignored, e.g $a is passed by value, hence copied
for ($i=$n-1; $i>=0; $i--) // O(n) times
{
$selected = $backup[ $i ];
$value = $a[ $N ];
$a[ $N ] = $a[ $selected ];
$a[ $selected ] = $value;
$N++;
}
return $picked;
}
function richard_durstenfeld_fixed_array($array, $n)
{
$array_length = count($array);
$max_index = $array_length -1;
$iterations = min($n, $array_length);
$array_keys = SplFixedArray::fromArray(range(0, $max_index), false);
$random_array = array();
while($iterations--) {
$index = mt_rand(0, $max_index);
$value = $array_keys[$index];
$array_keys[$index] = $array_keys[$max_index];
array_push($random_array, $array[$value]);
$max_index--;
}
return $random_array;
}
function fisher_yates_fixed_array( $array, $n )
{
$a = SplFixedArray::fromArray($array, false);
$N = count($a);
$n = min($n, $N);
$picked = array_fill(0, $n, 0);
// partially shuffle the array, and generate unbiased selection simultaneously
// this is a variation on fisher-yates-knuth shuffle
for ($i=0; $i<$n; $i++) // O(n) times
{
$selected = mt_rand( 0, --$N ); // unbiased sampling N * N-1 * N-2 * .. * N-n+1
$value = $a[ $selected ];
$a[ $selected ] = $a[ $N ];
$a[ $N ] = $value;
$picked[ $i ] = $value;
}
return $picked;
}
@simtabi
Copy link

simtabi commented Mar 28, 2021

Hi schmengler,

Stumbled on this, great piece. But a quick question, what if an array has named keys/index keys? tried it out and it fails.

@schmengler
Copy link
Author

@simtabi yes, the functions assume continuous numeric indexes. Background: https://www.schmengler-se.de/en/2015/09/efficiently-draw-random-elements-from-large-php-array/

Problem definition

  • Given an array $array of $n integers, draw $k distinct elements, with $k << $n
  • The array is indexed numerically from 0 to $n-1 and does not contain duplicates.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment