-
-
Save soyuka/4468eab47aceb6abd1bf to your computer and use it in GitHub Desktop.
<?php | |
namespace Fry; | |
use JsonStreamingParser\Listener; | |
/** | |
* This implementation allows to process an object at a specific level | |
* when it has been fully parsed | |
*/ | |
class ObjectListener implements Listener | |
{ | |
/** @var string Current key **/ | |
private $_key; | |
/** @var int Array deep level **/ | |
private $array_level = 0; | |
/** @var int Object deep level **/ | |
private $object_level = 0; | |
/** @var array Pointer that aliases the current array that represents an object or an array **/ | |
private $pointer; | |
/** | |
* @var array $array_pointers Stores different array pointers according to the deep level | |
* @var array $object_pointers Stores different objects pointers according to the deep level | |
* Those are used to track pointers, it's easy to go forward or backwards by using this | |
* As they are only pointers, in PHP "aliases" they shouldn't eat much memory even with big objects | |
*/ | |
private $array_pointers, $object_pointers; | |
/** @var array Main array that stores the current building object **/ | |
private $stack = array(); | |
private $callback, $end_callback; | |
/** | |
* @param function $callback the function called when a json object has been fully parsed | |
* | |
* @throws InvalidArgumentException if callback isn't callable | |
* | |
* @return void | |
*/ | |
public function __construct($callback, $end_callback = null) | |
{ | |
if(!is_callable($callback)) { | |
throw new \InvalidArgumentException("Callback should be a callable function"); | |
} | |
$this->callback = $callback; | |
$this->end_callback = $end_callback; | |
} | |
public function file_position($line, $char) | |
{ | |
} | |
/** | |
* Document start | |
* Init every variables and place the pointer on the stack | |
* | |
* @return void | |
*/ | |
public function start_document() | |
{ | |
$this->stack = array(); | |
$this->array_pointers = array(); | |
$this->array_level = 0; | |
$this->object_level = 0; | |
$this->object_pointers = array(); | |
$this->keys = array(); | |
$this->_key = null; | |
$this->pointer =& $this->stack; | |
} | |
/** | |
* Document end (EOF) | |
* | |
* @return void | |
*/ | |
public function end_document() | |
{ | |
// release memory | |
$this->start_document(); | |
if (is_callable($this->end_callback)) { | |
call_user_func_array($this->end_callback, []); | |
} | |
} | |
/** | |
* Start object | |
* An object began... | |
* | |
* @return void | |
*/ | |
public function start_object() | |
{ | |
//Increase the object level | |
$this->object_level++; | |
//Point on the current array | |
$this->pointer =& $this->array_pointers[$this->array_level]; | |
//Get the current index | |
$array_index = isset($this->pointer) ? count($this->pointer) : 0; | |
//Build an array on this index | |
$this->pointer[$array_index] = array(); | |
//Pointer is now this new array | |
$this->pointer =& $this->pointer[$array_index]; | |
//Store it | |
$this->object_pointers[$this->object_level] =& $this->pointer; | |
} | |
/** | |
* End Object | |
* An object ended | |
* | |
* @return void | |
*/ | |
public function end_object() | |
{ | |
$this->pointer =& $this->array_pointers[$this->array_level]; | |
//We've reach a full object on my root array, callback | |
if($this->array_level == 1 && $this->object_level == 1) { | |
call_user_func_array($this->callback, [$this->stack[0]]); | |
array_shift($this->stack[0]); //release this item from memory | |
} | |
$this->object_level--; | |
} | |
/** | |
* Start array | |
* An array began... | |
* | |
* @return void | |
*/ | |
public function start_array() | |
{ | |
$this->array_level++; | |
//If we have a key it's our index | |
if($this->_key) { | |
$index = $this->_key; | |
$this->_key = null; | |
} else { | |
$index = isset($this->pointer) ? count($this->pointer) : 0; | |
} | |
//This is our array, point on it | |
$this->pointer[$index] = array(); | |
$this->pointer =& $this->pointer[$index]; | |
//Store the pointer | |
$this->array_pointers[$this->array_level] =& $this->pointer; | |
} | |
/** | |
* End array | |
* | |
* Now it ended... | |
* @todo, according to both levels, point to the nearest one array or object | |
* @return void | |
*/ | |
public function end_array() | |
{ | |
//Point on the last known object | |
$this->pointer =& $this->object_pointers[$this->object_level]; | |
$this->array_level--; | |
} | |
/** | |
* Called when a key is founded | |
* @param string $key | |
* @return void | |
*/ | |
public function key($key) | |
{ | |
$this->_key = $key; | |
} | |
/** | |
* Called when a value is founded | |
* @param mixed $value may be a string, integer, boolean, null | |
* @return null | |
*/ | |
public function value($value) | |
{ | |
if($this->_key) { | |
$this->pointer[$this->_key] = $value; | |
return; | |
} | |
$this->pointer[] = $value; | |
} | |
public function whitespace($whitespace) | |
{ | |
} | |
} |
<?php | |
$testfile = __DIR__.'/example.json'; //https://gist.github.com/soyuka/a1d83ff9ff1a6c5cc269 | |
$listener = new ObjectListener(function($obj) { | |
var_dump($obj); | |
}); | |
$stream = fopen($testfile, 'r'); | |
try { | |
$parser = new JsonStreamingParser_Parser($stream, $listener); | |
$parser->parse(); | |
} catch (Exception $e) { | |
fclose($stream); | |
throw $e; | |
} |
Hi, thanks for the gist! Just one hint, the method names of the JsonStreamingParser have changed to camelCase and are no longer with underscore
@Siedlerchr Thanks for the hint!
Please share the new code.
Forked and updated here
For anyone wondering where "JsonStreamingParser\Listener" comes from, you can find the package in Salsify's JsonStreamingParser repo. However, there is no interface JsonStreamingParser\Listener
, which is used in the code above. Instead, I ended up using JsonStreamingParser\Listener\GeoJsonListener
in place of ObjectListener
. My code ended up looking like this:
$testfile = __DIR__.'/sample.json';
$listener = new \JsonStreamingParser\Listener\GeoJsonListener(function($obj) {
var_dump($obj);
});
$stream = fopen($testfile, 'r');
try {
$parser = new \JsonStreamingParser\Parser($stream, $listener);
$parser->parse();
} catch (Exception $e) {
fclose($stream);
throw $e;
}
You can generate sample JSON from this website. It generates properly formatted JSON. I don't know if GeoJsonListener resolves edge cases that the code above claims to handle, but in my personal use cases, GeoJsonListener works fine.
Actually @MAXakaWIZARD hosted something that looks like my code (improved version) on https://github.com/MAXakaWIZARD/JsonCollectionParser everything else can be found at https://github.com/salsify/jsonstreamingparser.
This project https://github.com/halaxa/json-machine simplifies big json parsing so much, that the only thing you need to parse a big json stream or file is simple foreach
. It doesn't get easier than that :)
Hello, From where I can download JsonStreamingParser_Parser ?...is it nothing but new \JsonStreamingParser\Parser ?