Created
December 7, 2016 17:08
-
-
Save jan-krueger/da0b8d4746d9f17a03ba2a8670262e2b to your computer and use it in GitHub Desktop.
My first try to create a simple parser. It is not working, properly, but I know now how to start if I will ever to something compareable. Just archiving.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require_once 'ParserState.enum.php'; | |
class Parser { | |
/** | |
* The JavaScript object. | |
* | |
* @var string | |
*/ | |
private $object; | |
/** | |
* All chars. | |
* | |
* @var array | |
*/ | |
private $chars = []; | |
/** | |
* The current index. | |
* | |
* @var int | |
*/ | |
private $charIndex = 0; | |
/** | |
* The state to determine what we have to expect next. | |
* | |
* @var int | |
*/ | |
private $state = ParserState::OBJECT_EXPECT_START; | |
private $prevState = null; | |
private $buffer = ''; | |
private $path = []; | |
private $valueIndex; | |
private $lastIndex; | |
private $result = []; | |
private $overflow = 0; | |
/** | |
* COUNTERS | |
*/ | |
private $curlyBrackets = 0; | |
private $squareBrackets = 0; | |
const BUFFER_STRING = 1001; | |
const BUFFER_ARRAY = 1000; | |
public function __construct($object) | |
{ | |
if(!(is_string($object))) { | |
// @TODO: Throw error. | |
} | |
$this->object = trim($object); | |
$this->chars = str_split($this->object); | |
$this->charIndex = 0; | |
} | |
public function getResult() { | |
return $this->result; | |
} | |
public function parse() | |
{ | |
$this->buffer = ''; | |
for($this->charIndex = 0; $this->charIndex < count($this->chars);) { | |
$this->overflow++; | |
$c = $this->getCurrentChar(); | |
// Protect overflow | |
if($this->overflow >= 10) { | |
var_dump("OVERFLOW ERROR"); | |
return; | |
} | |
if(!($this->state === ParserState::VALUE_PARSE_STRING) && !($this->state === ParserState::VARIABLE_NAME_EXPECT_VALUE)) { | |
$this->countBrackets($c); | |
} | |
echo ParserState::toString($this->state); | |
var_dump($this->result); | |
switch ($this->state) { | |
// ############ | |
// # FILE # | |
// ############ | |
case ParserState::OBJECT_EXPECT_START: | |
// We are starting, so we expect a '{' | |
if(!($c === '{')) { | |
// @TODO: Error handling | |
var_dump('Expected: {'); | |
return; | |
} | |
$this->consumeChar(); | |
if($this->predictNextChar() === '}') { | |
$this->setBufferType(Parser::BUFFER_ARRAY); | |
$this->appendValue(); | |
$this->swapState(ParserState::DETERMINE_NEXT_STEP); | |
} else { | |
// Now, we are waiting for the name... | |
$this->swapState(ParserState::VARIABLE_NAME_EXPECT_START); | |
} | |
break; | |
// ############ | |
// # VARIABLE # | |
// ############ | |
case ParserState::VARIABLE_NAME_EXPECT_START: | |
// If the next char is a whitespace, then we can just ignore it. | |
if($this->skipWhitespace()) { | |
break; | |
} | |
// If we reach the next firsts -"- than the name of identifier starts | |
if($c === '"') { | |
$this->swapState(ParserState::VARIABLE_NAME_EXPECT_VALUE); | |
$this->consumeChar(); | |
} else { | |
var_dump('Unexpected start of variable name.'); | |
return; | |
} | |
break; | |
case ParserState::VARIABLE_NAME_EXPECT_VALUE: | |
$this->setBufferType(Parser::BUFFER_STRING); | |
// If the next char is a whitespace, then we can just ignore it. | |
if($this->skipWhitespace()) { | |
break; | |
} | |
// If we reach the next -"- than the name of identifier starts | |
if($c === '"') { | |
// set name | |
$this->appendIndex(); | |
// go on | |
$this->clearBuffer(); | |
$this->swapState(ParserState::VARIABLE_NAME_EXPECT_END); | |
$this->consumeChar(); | |
} else { | |
$this->buffer .= $c; | |
$this->consumeChar(); | |
} | |
break; | |
case ParserState::VARIABLE_NAME_EXPECT_END: | |
// If the next char is a whitespace, then we can just ignore it. | |
if($this->skipWhitespace()) { | |
break; | |
} | |
if($c === ':') { | |
// go on | |
$this->consumeChar(); | |
$this->swapState(ParserState::DETERMINE_VALUE_TYPE); | |
} else { | |
var_dump('Expected: : -> ' . $c); | |
return; | |
} | |
break; | |
// ############# | |
// # DETERMINE # | |
// ############# | |
case ParserState::DETERMINE_VALUE_TYPE: | |
if($this->skipWhitespace()) { | |
break; | |
} | |
// It's a string, because... | |
if($c === '"') { | |
$this->swapState(ParserState::VALUE_PARSE_STRING); | |
$this->consumeChar(); | |
} | |
// It's an numeric, because... | |
else if(ctype_digit($c)) { | |
$this->swapState(ParserState::VALUE_PARSE_NUMERIC); | |
} | |
// It's a boolean, because... | |
else if(strtolower($c) === 't' ||strtolower($c) === 'f') { | |
$this->swapState(ParserState::VALUE_PARSE_BOOLEAN); | |
} | |
// It's null, because... | |
else if(strtolower($c) === 'n') { | |
$this->swapState(ParserState::VALUE_PARSE_NULL); | |
} | |
// It's an object, because... | |
else if($c === '{') { | |
$this->swapState(ParserState::VALUE_PARSE_OBJECT); | |
} | |
// It's an array, because... | |
else if($c === '[') { | |
$this->swapState(ParserState::VALUE_PARSE_ARRAY); | |
$this->consumeChar(); | |
} | |
// Error | |
else { | |
var_dump('Expected: string, numeric, boolean, object or array value'); | |
} | |
break; | |
case ParserState::DETERMINE_NEXT_STEP: | |
if($this->skipWhitespace()) { | |
break; | |
} | |
// If the next char is a comma, then we | |
// know that we are looking for the next variable name. | |
if($c === ',') { | |
$this->consumeChar(); | |
$this->swapState(ParserState::VARIABLE_NAME_EXPECT_START); | |
} else if($c === '}') { | |
var_dump($this->curlyBrackets); | |
// If all are closed, we are done | |
if($this->curlyBrackets === 0) { | |
return; | |
} | |
} else if($c === ']') { | |
if ($this->squareBrackets === 0) { | |
$this->appendValue(); | |
$this->swapState(ParserState::DETERMINE_NEXT_STEP); | |
} | |
$this->consumeChar(); | |
} else if($this->prevState === ParserState::VALUE_PARSE_ARRAY) { | |
} else { | |
var_dump('Unexpected end. ' . $c); | |
} | |
break; | |
// ############### | |
// #VALUE PARSING# | |
// ############### | |
case ParserState::VALUE_PARSE_STRING: | |
// Set buffer type | |
$this->setBufferType(Parser::BUFFER_STRING); | |
// Put all in, until we get to the end | |
if($c === '"') { | |
// set parsed value | |
$this->appendValue(); | |
// go on | |
$this->swapState(ParserState::DETERMINE_NEXT_STEP); | |
$this->consumeChar(); | |
} | |
// Append the current char to the string | |
else { | |
$this->buffer .= $c; | |
$this->consumeChar(); | |
} | |
break; | |
case ParserState::VALUE_PARSE_NUMERIC: | |
// We also set it to 'string', because | |
// we will just cast it later on to an integer | |
// or float. | |
$this->setBufferType(Parser::BUFFER_STRING); | |
if($this->skipWhitespace()) { | |
break; | |
} | |
// Did we reach the end? | |
if($c === ',' || $c === ']' || $c === '}') { | |
// set value | |
if(intval($this->buffer) == $this->buffer) { | |
$this->buffer = intval($this->buffer); | |
} else if(floatval($this->buffer) == $this->buffer) { | |
$this->buffer = floatval($this->buffer); | |
} else { | |
var_dump('Unknown numeric type.'); | |
return; | |
} | |
$this->appendValue(); | |
// go on | |
$this->clearBuffer(); | |
$this->swapState(ParserState::DETERMINE_NEXT_STEP); | |
} | |
// Else, append the chars, if it is a digit | |
else if(ctype_digit($c) || $c === '.') { | |
$this->buffer .= $c; | |
$this->consumeChar(); | |
} | |
// Error | |
else { | |
var_dump('Expected digit.'); | |
return; | |
} | |
break; | |
case ParserState::VALUE_PARSE_BOOLEAN: | |
$this->setBufferType(Parser::BUFFER_STRING); | |
if($this->skipWhitespace()) { | |
break; | |
} | |
// Starts with a -t- -> TRUE | |
if(strtolower($c) === 't') { | |
for($x = 0; $x < 4; $x++) { | |
$this->buffer .= $this->getCurrentChar(); | |
$this->consumeChar(); | |
} | |
} | |
// Starts with a -f- -> FALSE | |
else if(strtolower($c) === 'f') { | |
for($x = 0; $x < 5; $x++) { | |
$this->buffer .= $this->getCurrentChar(); | |
$this->consumeChar(); | |
} | |
} | |
$this->buffer = filter_var($this->buffer, FILTER_VALIDATE_BOOLEAN); | |
$this->appendValue(); | |
$this->swapState(ParserState::DETERMINE_NEXT_STEP); | |
break; | |
case ParserState::VALUE_PARSE_NULL: | |
$this->setBufferType(Parser::BUFFER_STRING); | |
if($this->skipWhitespace()) { | |
break; | |
} | |
// Starts with a -t- -> TRUE | |
if(strtolower($c) === 'n') { | |
for($x = 0; $x < 4; $x++) { | |
$this->buffer .= $this->getCurrentChar(); | |
$this->consumeChar(); | |
} | |
} | |
if(strtolower($this->buffer) === 'null') { | |
$this->buffer = null; | |
$this->appendValue(); | |
$this->swapState(ParserState::DETERMINE_NEXT_STEP); | |
} else { | |
var_dump('Unexpected value. Expected null'); | |
return; | |
} | |
break; | |
case ParserState::VALUE_PARSE_ARRAY: | |
$this->path[] = $this->lastIndex; | |
$this->swapState(ParserState::DETERMINE_NEXT_STEP); | |
break; | |
case ParserState::VALUE_PARSE_OBJECT: | |
$this->setBufferType(Parser::BUFFER_ARRAY); | |
$this->path[] = $this->lastIndex; | |
$this->appendValue(); | |
$this->swapState(ParserState::OBJECT_EXPECT_START); | |
break; | |
default: | |
var_dump('Default: ' . ParserState::toString($this->state) . ' -> ' . $c); | |
return; | |
} | |
} | |
} | |
/** | |
* Returns the char at the current charIndex. | |
* | |
* @return mixed | |
*/ | |
private function getCurrentChar() { | |
return $this->chars[$this->charIndex]; | |
} | |
private function predictNextChar() { | |
for($i = $this->charIndex; $i < count($this->chars); $i++) { | |
if(!$this->skipWhitespace($this->chars[$i], false)) { | |
return $this->chars[$i]; | |
} | |
} | |
return null; | |
} | |
/** | |
* Go to the next char. | |
*/ | |
private function consumeChar() { | |
$this->overflow = 0; | |
$this->charIndex++; | |
} | |
private function appendIndex() { | |
$this->valueIndex = $this->buffer; | |
$this->buffer = null; | |
if(is_string($this->valueIndex)) { | |
$this->getCurrentArrayPart()[$this->valueIndex] = null; | |
return; | |
} | |
// @TODO: Error handling | |
var_dump('Expected: valid appendable index => ' . $this->valueIndex); | |
} | |
private function appendValue() | |
{ | |
if (is_string($this->valueIndex)) { | |
$this->getCurrentArrayPart()[$this->valueIndex] = $this->buffer; | |
$this->buffer = null; | |
return; | |
} | |
if ($this->valueIndex === null) { | |
$this->getCurrentArrayPart()[$this->valueIndex] = $this->buffer; | |
$this->buffer = null; | |
return; | |
} | |
// @TODO: Error handling | |
var_dump('Expected: valid appendable value => ' . $this->valueIndex . ' -> ' . $this->buffer); | |
} | |
private function &getCurrentArrayPart() { | |
if(count($this->path) === 0) { | |
return $this->path; | |
} | |
$tmp = &$this->result; | |
foreach ($this->path as $key) { | |
$tmp = &$tmp[$key]; | |
} | |
return $tmp; | |
} | |
private function swapState($state) { | |
$this->clearBuffer(); | |
$this->prevState = $this->state; | |
$this->state = $state; | |
} | |
private function skipWhitespace($c = null, $consume = true) { | |
if($c === null) { | |
$c = $this->getCurrentChar(); | |
} | |
if ( | |
( | |
$c === ' ' || | |
$c === '\t' || | |
$c === '\n' | |
) && | |
$consume | |
) { | |
$this->consumeChar(); | |
return true; | |
} | |
return false; | |
} | |
private function clearBuffer() { | |
$this->buffer = null; | |
} | |
private function setBufferType($type) { | |
if($this->buffer === null) { | |
switch ($type) { | |
case Parser::BUFFER_STRING: | |
$this->buffer = ''; | |
break; | |
case Parser::BUFFER_ARRAY: | |
$this->buffer = []; | |
break; | |
default: | |
var_dump('Invalid buffer type : ' . $type); | |
break; | |
} | |
} | |
} | |
private function countBrackets($c) { | |
if($c === '{') { | |
$this->curlyBrackets++; | |
} else if($c === '}') { | |
$this->curlyBrackets--; | |
} else if($c === '[') { | |
$this->squareBrackets++; | |
} else if($c === ']') { | |
$this->squareBrackets--; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class ParserState { | |
/** | |
* A -{- at the start of the file. | |
*/ | |
const OBJECT_EXPECT_START = 0; | |
/** | |
* A -"- or -'- before the name of a variable starts. | |
*/ | |
const VARIABLE_NAME_EXPECT_START= 1.0; | |
/** | |
* Here we will actually parse the name. | |
*/ | |
const VARIABLE_NAME_EXPECT_VALUE= 1.1; | |
/** | |
* If a variable identifier ends we expect a ':' | |
*/ | |
const VARIABLE_NAME_EXPECT_END = 1.2; | |
/** | |
* Parse the name of a variable. | |
*/ | |
const VARIABLE_NAME_PARSE = 1.3; | |
/** | |
* What do we gonna parse next: | |
* - string | |
* - integer/float | |
* - boolean | |
* - object | |
* - array | |
* - null | |
*/ | |
const VALUE_PARSE_STRING = 2.1; | |
const VALUE_PARSE_NUMERIC = 2.2; | |
const VALUE_PARSE_BOOLEAN = 2.3; | |
const VALUE_PARSE_OBJECT = 2.4; | |
const VALUE_PARSE_ARRAY = 2.5; | |
const VALUE_PARSE_NULL = 2.6; | |
/** | |
* Now we have to determine if we deal with: | |
* - string | |
* - integer/float | |
* - boolean | |
* - object | |
* - array | |
*/ | |
const DETERMINE_VALUE_TYPE = 3.0; | |
/** | |
* After parsing a value, we have to figure | |
* out what to do next. - This appends on the next char | |
* - -,- we will look for the next, variable name | |
* - -}- We are appending an object@TODO | |
* - -]- We are appending an array @TODO | |
*/ | |
const DETERMINE_NEXT_STEP = 3.1; | |
public static function toString($x) { | |
$fooClass = new ReflectionClass ( 'ParserState' ); | |
$constants = $fooClass->getConstants(); | |
$constName = null; | |
foreach ( $constants as $name => $value ) | |
{ | |
if ( $value == $x ) | |
{ | |
$constName = $name; | |
break; | |
} | |
} | |
return $constName; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment