Skip to content

Instantly share code, notes, and snippets.

@abiusx
Last active August 5, 2018 00:36
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save abiusx/060f25c10ff6ccf88f2dc8c8fbcfae0d to your computer and use it in GitHub Desktop.
Deep Copy an array (or variable) in PHP
<?php
/**
* PHP Deep Copy
* @version 3.1.1:
* _zval_id_accurate now receives its zval pool instead of having it static.
* if is_ref and zval_id are available
* in PHP core, we will use them.
* otherwise, we will use PHP based implementations of these
* functions.
*
*
* warnings on attempting uncloneable objects and resources
* Resources and uncloneable objects can not be deep
* copied, this will return the original instead.
* http://php.net/manual/en/resource.php
*/
function _is_ref($container=[],$varname)
{
foreach ($container as $k=>$v)
if ($k!==$varname)
unset($container[$k]);
ob_start();
var_dump($container);
$dump=ob_get_clean();
if (($r=strpos($dump,"=>\n &"))===false)
return false;
else
{
if ($r>=15+strlen($varname) /*array(_) {\n [_]*/
and $r<20+strlen($varname))
return true;
}
return false;
}
/**
* This is a hacky is_ref function.
* Determines whether a variable is a reference or not
* by using var_dump on its symbol table and
* checking whether it has '&' before its value or not.
* @param array $defined_vars should send get_defined_vars() to this. enforced.
* @param mixed $var any variable, should be a variable and not a expression
* @return boolean
*/
function _is_ref_plain($defined_vars=[],$var)
{
$t=&debug_backtrace()[0];
preg_match("/".__FUNCTION__."\(\s*get_defined_vars\s*\(\s*\)\s*,\s*\\$(.*?)\)/i", file($t['file'])[$t['line']-1],$varname);
if ($varname)
$varname=$varname[1];
else
{
trigger_error("You need to provide two arguments to '".__FUNCTION__.
"', namely get_defined_vars() and a variable");
return false; //an expression is not a reference
}
return _is_ref($defined_vars,$varname);
}
/**
* This is a hacky zval_id implementation.
* It tries to be as fast as possible,
* and can only detect unique arrays, leaving them
* modified
* When in doubt, assumes it's a new zval!
* @param mixed &$zval
* @return integer
*/
function _zval_id_light(&$zval)
{
static $id=0;
if (is_array($zval))
{
if (isset($zval['___VISITED___']))
return $zval['___VISITED___'];
else
return $zval['___VISITED___']=$id++;
}
else
return $id++;
}
/**
* This is a more accurate zval_id
* implementation. for each new zval,
* it compares it to all previously checked
* zval and if they are the same, same id is returned.
* @param mixed &$zval
* @param array &$zvals the zval pool. for every set of variables, use a new pool
* @return integer
*/
function _zval_id_accurate(&$zval,&$zvals)
{
if ($zvals===null) $zvals=[];
$backup=$zval;
$zval="___VISITED___";
foreach ($zvals as $k=>$v)
if ($v==='___VISITED___')
{
$id=$k;
break;
}
$zval=$backup;
if (!isset($id))
{
$id=count($zvals);
$zvals[$id]=&$zval;
}
return $id;
}
function _zval_id(&$zval,&$zvals)
{
return _zval_id_accurate($zval,$zvals);
}
/**
* An alias to deep_copy that does not need to be invoked by reference
* deep copies a zval (PHP variable)
* @param mixed &$variable the variable to clone
* @param [type] &$clone the clone
* @return null
*/
function zval_clone(&$variable,&$clone)
{
$clone=&deep_copy($variable);
}
/**
* Deep Copy a PHP Variable
* Note: you need to access the result of this function byreference,
* or an implicit copying will be made
*
* @param mixed &$variable the variable to copy
* @param array &$object_pool for internal use
* @param array &$zval_pool for internal use
* @param integer $depth for internal use
* @return mixed a deep copy of the variable
*/
function &deep_copy(&$variable,&$object_pool=[],&$zval_pool=[],$depth=0,&$id_zvals=[])
{
if (function_exists("zval_id"))
$id=zval_id($variable);
else
$id=_zval_id($variable,$id_zvals);
if (isset($zval_pool[$id]))
return $zval_pool[$id];
if (is_array($variable))
{
$res=[];
$zval_pool[$id]=&$res; //the copy array
foreach ($variable as $k=>$v)
{
$t=&deep_copy($variable[$k],$object_pool,$zval_pool,$depth+1,$id_zvals);
if (function_exists("is_ref"))
if (is_ref($variable[$k]))
$res[$k]=&$t;
else
$res[$k]=$t;
else //php-based is_ref
if (_is_ref($variable,$k))
$res[$k]=&$t;
else
$res[$k]=$t;
}
return $res;
}
elseif (is_object($variable))
{
$hash=spl_object_hash($variable);
if (isset($object_pool[$hash]))
$res=$object_pool[$hash];
else
{
$reflection=new ReflectionObject($variable);
if ($reflection->isCloneable()===false)
{
trigger_error("Attempting to deep copy an unclonable object (depth={$depth})");
$res=$object_pool[$hash]=$variable;
}
else
$res=$object_pool[$hash]=clone $variable;
}
$zval_pool[$id]=&$res;
return $res;
}
else
{
if (is_resource($variable))
trigger_error("Attempting to deep copy a resource of type '".get_resource_type($variable)."' (depth={$depth})");
$zval_pool[$id]=$variable; //copy
return $zval_pool[$id];
}
}
<?php
require "deepcopy.php";
//------------ TESTS ---------------
die(test_zval_id());
function test_zval_id()
{
$a=2;
$b=$a;
$dulldozer=&$b;
var_dump(_zval_id($a,$pool));
var_dump(_zval_id($b,$pool));
var_dump(_zval_id($dulldozer,$pool));
}
function is_ref_test()
{
$a=2;
$b=$a;
$dulldozer=&$b;
// $vars=get_defined_vars();
// foreach ($vars as $k=>$v)
// {
// if ($k!=='b') unset($vars[$k]);
// }
// var_dump($vars);
// echo str_repeat("-",40),PHP_EOL;
// debug_zval_dump($vars);
// $c=&$b;
// debug_zval_dump($c);
var_dump(_is_ref(get_defined_vars(),$a));
var_dump(_is_ref(get_defined_vars(),$dulldozer));
}
die(test_thorough());
die(test_binary_tree_reference_value());
die(test_2array_ref());
die(test_resource());
die(test_array_ref());
die(test_value_ref());
die(test_object_ref());
function test_thorough()
{
$a=[];
$o=new stdClass;
$o->data="o data";
$oref=&$o;
$ocopy=$o;
$v="value";
$vcopy=$v." copy";
$vref=&$v;
$a=[$o,$oref,$ocopy,$v,$vcopy,$vref
,&$o,&$oref,&$ocopy,&$v,&$vcopy,&$vref];
$a2=&deep_copy($a);
$a2[0]->data.=" (deep_copy)";
$a2[3].=" (deep_copy)";
$a2[4].=" (deep_copy)";
$a[0]->data.=" (original)";
$a[3].=" (original)";
$a[4].=" (original)";
var_dump($a);
echo str_repeat("-",80),PHP_EOL;
var_dump($a2);
}
function test_resource()
{
$file=tempnam(0,0);
file_put_contents($file, "hello\nthere");
$f=fopen($file,"rt");
$a=[2,5,$f];
$a2=&deep_copy($a);
var_dump($a);
var_dump($a2);
echo fgets($a[2]),PHP_EOL;
echo fgets($a2[2]),PHP_EOL;
}
function test_2array_ref()
{
$b=['data'=>'b'];
$c=['data'=>'c'];
$a=['data'=>'a'
, 'b'=>&$b
,'c'=>&$c
];
$c['a']=&$a;
$b['a']=&$a;
$a2=&deep_copy($a);
$a['data']='original a';
$a2['data']='a2';
print_r($a);
print_r($a2);
}
function test_array_ref()
{
#doesnt work with 2.2, three copies are made!?
#passes on 2.4
$b=[4,5,6];
$a=[1,2,3,&$b,&$b,$b];
$a2=&deep_copy($a);
$a2[3][0]=44;
$a[3][0]=55;
var_dump($a2);
var_dump($a);
}
function test_binary_tree_reference_value()
{
#doesnt work with 2.2, infinite recursion
#does not pass on any version
$a=[];
$a['ref']=&$a;
$a['val']=$a;
var_dump(serialize($a));
var_dump(unserialize(serialize($a)));
// var_dump(zval_id($a['val']));
// var_dump(zval_id($a['ref']['val']));
// var_dump(zval_id($a['val']['val']));
// var_dump(zval_id($a['val']['val']['val']));
// var_dump(zval_id($a['val']['val']['val']['val']));
echo "Ready: ";
$a2=&deep_copy($a);
echo "done.\n";
var_dump($a2);
}
function test_array_double_ref()
{
#works fine with version 2.2
$a['data']='initial data';
$a['a_val']=$a;
$a['a1']=&$a;
$a['a2']=&$a;
$a2=&deep_copy($a);
$a2['data']='copy data';
$a2['a_val']['data']='copy val data';
var_dump($a);
var_dump($a2);
}
function test_value_ref()
{
$v='value';
$v2=$v;
$vref=&$v;
$a=['val'=>$v,'val copy'=>$v2,'val ref copy'=>$vref,'valref'=>&$v,'val copy ref'=>&$v2,'val ref ref'=>&$vref];
$a2=&deep_copy($a);
$a2['valref']="copy";
$a['val']="original";
$a['val copy']="original copy";
$a['val ref ref']="original reference";
var_dump($a);
echo str_repeat("-",80),PHP_EOL;
echo "3,5 values should be 'copy' here, different from the original, but the same:",PHP_EOL;
var_dump($a2);
echo str_repeat("-",80),PHP_EOL;
echo "3 and 5 should be 'copy reference', 2 should be 'copy value':",PHP_EOL;
$a2['val copy']="copy value";
$a2['val ref ref']='copy reference';
var_dump($a2);
echo str_repeat("-",80),PHP_EOL;
}
function test_object_ref()
{
class test{
static public $count=0;
public $id=0;
public $data="initial data";
function __construct()
{
$this->id=self::$count++;
echo "Construct {$this->id}",PHP_EOL;
}
function __clone()
{
$this->id=self::$count++;
echo "Clone {$this->id}",PHP_EOL;
}
function __destruct()
{
echo "Destruct {$this->id}",PHP_EOL;
self::$count--;
}
}
$o=new test();
$o2=$o;
$oref=&$o;
$a=[$o,$o2,$oref,&$o,&$o2,&$oref];
$a2=&deep_copy($a);
$a2[0]->data="copy";
$a[0]->data="original";
$a[1]="original value";
$a[5]="original reference";
var_dump($a);
echo str_repeat("-",80),PHP_EOL;
echo "All values should be 'copy' here, different from the original, but the same:",PHP_EOL;
var_dump($a2);
echo str_repeat("-",80),PHP_EOL;
echo "3 and 5 should be copy reference, just like the original:",PHP_EOL;
echo "2 should be 'copy reference' and 4 should be 'copy' as well",PHP_EOL;
$a2[1]="copy value";
$a2[5]='copy reference';
var_dump($a2);
echo str_repeat("-",80),PHP_EOL;
}
@axot
Copy link

axot commented Dec 4, 2016

Hi, I read you article, it is awesome. A better way to deep copy variable instead of serialize/unserialize.
I'm curious about this, did you benchmark this method anyway?
If deep copy is fast enough I want to build a local cache as a secondary cache to APC using a static array, it is much faster than apc_fetch.

@abiusx
Copy link
Author

abiusx commented Aug 5, 2018

Hello Axot,
I have a deep copy implementation for PHP 7 as an extension. I wanted to use this for an state-isolation (concolic execution) and the Pure PHP implementation is about 10x slower than the native C implementation.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment