Skip to content

Instantly share code, notes, and snippets.

@jan-krueger
Created December 7, 2016 17:08
Show Gist options
  • Save jan-krueger/da0b8d4746d9f17a03ba2a8670262e2b to your computer and use it in GitHub Desktop.
Save jan-krueger/da0b8d4746d9f17a03ba2a8670262e2b to your computer and use it in GitHub Desktop.
My first try to create a simple parser. It is not working, properly, but I know now how to start if I will ever to something compareable. Just archiving.
<?php
require_once 'ParserState.enum.php';
class Parser {
/**
* The JavaScript object.
*
* @var string
*/
private $object;
/**
* All chars.
*
* @var array
*/
private $chars = [];
/**
* The current index.
*
* @var int
*/
private $charIndex = 0;
/**
* The state to determine what we have to expect next.
*
* @var int
*/
private $state = ParserState::OBJECT_EXPECT_START;
private $prevState = null;
private $buffer = '';
private $path = [];
private $valueIndex;
private $lastIndex;
private $result = [];
private $overflow = 0;
/**
* COUNTERS
*/
private $curlyBrackets = 0;
private $squareBrackets = 0;
const BUFFER_STRING = 1001;
const BUFFER_ARRAY = 1000;
public function __construct($object)
{
if(!(is_string($object))) {
// @TODO: Throw error.
}
$this->object = trim($object);
$this->chars = str_split($this->object);
$this->charIndex = 0;
}
public function getResult() {
return $this->result;
}
public function parse()
{
$this->buffer = '';
for($this->charIndex = 0; $this->charIndex < count($this->chars);) {
$this->overflow++;
$c = $this->getCurrentChar();
// Protect overflow
if($this->overflow >= 10) {
var_dump("OVERFLOW ERROR");
return;
}
if(!($this->state === ParserState::VALUE_PARSE_STRING) && !($this->state === ParserState::VARIABLE_NAME_EXPECT_VALUE)) {
$this->countBrackets($c);
}
echo ParserState::toString($this->state);
var_dump($this->result);
switch ($this->state) {
// ############
// # FILE #
// ############
case ParserState::OBJECT_EXPECT_START:
// We are starting, so we expect a '{'
if(!($c === '{')) {
// @TODO: Error handling
var_dump('Expected: {');
return;
}
$this->consumeChar();
if($this->predictNextChar() === '}') {
$this->setBufferType(Parser::BUFFER_ARRAY);
$this->appendValue();
$this->swapState(ParserState::DETERMINE_NEXT_STEP);
} else {
// Now, we are waiting for the name...
$this->swapState(ParserState::VARIABLE_NAME_EXPECT_START);
}
break;
// ############
// # VARIABLE #
// ############
case ParserState::VARIABLE_NAME_EXPECT_START:
// If the next char is a whitespace, then we can just ignore it.
if($this->skipWhitespace()) {
break;
}
// If we reach the next firsts -"- than the name of identifier starts
if($c === '"') {
$this->swapState(ParserState::VARIABLE_NAME_EXPECT_VALUE);
$this->consumeChar();
} else {
var_dump('Unexpected start of variable name.');
return;
}
break;
case ParserState::VARIABLE_NAME_EXPECT_VALUE:
$this->setBufferType(Parser::BUFFER_STRING);
// If the next char is a whitespace, then we can just ignore it.
if($this->skipWhitespace()) {
break;
}
// If we reach the next -"- than the name of identifier starts
if($c === '"') {
// set name
$this->appendIndex();
// go on
$this->clearBuffer();
$this->swapState(ParserState::VARIABLE_NAME_EXPECT_END);
$this->consumeChar();
} else {
$this->buffer .= $c;
$this->consumeChar();
}
break;
case ParserState::VARIABLE_NAME_EXPECT_END:
// If the next char is a whitespace, then we can just ignore it.
if($this->skipWhitespace()) {
break;
}
if($c === ':') {
// go on
$this->consumeChar();
$this->swapState(ParserState::DETERMINE_VALUE_TYPE);
} else {
var_dump('Expected: : -> ' . $c);
return;
}
break;
// #############
// # DETERMINE #
// #############
case ParserState::DETERMINE_VALUE_TYPE:
if($this->skipWhitespace()) {
break;
}
// It's a string, because...
if($c === '"') {
$this->swapState(ParserState::VALUE_PARSE_STRING);
$this->consumeChar();
}
// It's an numeric, because...
else if(ctype_digit($c)) {
$this->swapState(ParserState::VALUE_PARSE_NUMERIC);
}
// It's a boolean, because...
else if(strtolower($c) === 't' ||strtolower($c) === 'f') {
$this->swapState(ParserState::VALUE_PARSE_BOOLEAN);
}
// It's null, because...
else if(strtolower($c) === 'n') {
$this->swapState(ParserState::VALUE_PARSE_NULL);
}
// It's an object, because...
else if($c === '{') {
$this->swapState(ParserState::VALUE_PARSE_OBJECT);
}
// It's an array, because...
else if($c === '[') {
$this->swapState(ParserState::VALUE_PARSE_ARRAY);
$this->consumeChar();
}
// Error
else {
var_dump('Expected: string, numeric, boolean, object or array value');
}
break;
case ParserState::DETERMINE_NEXT_STEP:
if($this->skipWhitespace()) {
break;
}
// If the next char is a comma, then we
// know that we are looking for the next variable name.
if($c === ',') {
$this->consumeChar();
$this->swapState(ParserState::VARIABLE_NAME_EXPECT_START);
} else if($c === '}') {
var_dump($this->curlyBrackets);
// If all are closed, we are done
if($this->curlyBrackets === 0) {
return;
}
} else if($c === ']') {
if ($this->squareBrackets === 0) {
$this->appendValue();
$this->swapState(ParserState::DETERMINE_NEXT_STEP);
}
$this->consumeChar();
} else if($this->prevState === ParserState::VALUE_PARSE_ARRAY) {
} else {
var_dump('Unexpected end. ' . $c);
}
break;
// ###############
// #VALUE PARSING#
// ###############
case ParserState::VALUE_PARSE_STRING:
// Set buffer type
$this->setBufferType(Parser::BUFFER_STRING);
// Put all in, until we get to the end
if($c === '"') {
// set parsed value
$this->appendValue();
// go on
$this->swapState(ParserState::DETERMINE_NEXT_STEP);
$this->consumeChar();
}
// Append the current char to the string
else {
$this->buffer .= $c;
$this->consumeChar();
}
break;
case ParserState::VALUE_PARSE_NUMERIC:
// We also set it to 'string', because
// we will just cast it later on to an integer
// or float.
$this->setBufferType(Parser::BUFFER_STRING);
if($this->skipWhitespace()) {
break;
}
// Did we reach the end?
if($c === ',' || $c === ']' || $c === '}') {
// set value
if(intval($this->buffer) == $this->buffer) {
$this->buffer = intval($this->buffer);
} else if(floatval($this->buffer) == $this->buffer) {
$this->buffer = floatval($this->buffer);
} else {
var_dump('Unknown numeric type.');
return;
}
$this->appendValue();
// go on
$this->clearBuffer();
$this->swapState(ParserState::DETERMINE_NEXT_STEP);
}
// Else, append the chars, if it is a digit
else if(ctype_digit($c) || $c === '.') {
$this->buffer .= $c;
$this->consumeChar();
}
// Error
else {
var_dump('Expected digit.');
return;
}
break;
case ParserState::VALUE_PARSE_BOOLEAN:
$this->setBufferType(Parser::BUFFER_STRING);
if($this->skipWhitespace()) {
break;
}
// Starts with a -t- -> TRUE
if(strtolower($c) === 't') {
for($x = 0; $x < 4; $x++) {
$this->buffer .= $this->getCurrentChar();
$this->consumeChar();
}
}
// Starts with a -f- -> FALSE
else if(strtolower($c) === 'f') {
for($x = 0; $x < 5; $x++) {
$this->buffer .= $this->getCurrentChar();
$this->consumeChar();
}
}
$this->buffer = filter_var($this->buffer, FILTER_VALIDATE_BOOLEAN);
$this->appendValue();
$this->swapState(ParserState::DETERMINE_NEXT_STEP);
break;
case ParserState::VALUE_PARSE_NULL:
$this->setBufferType(Parser::BUFFER_STRING);
if($this->skipWhitespace()) {
break;
}
// Starts with a -t- -> TRUE
if(strtolower($c) === 'n') {
for($x = 0; $x < 4; $x++) {
$this->buffer .= $this->getCurrentChar();
$this->consumeChar();
}
}
if(strtolower($this->buffer) === 'null') {
$this->buffer = null;
$this->appendValue();
$this->swapState(ParserState::DETERMINE_NEXT_STEP);
} else {
var_dump('Unexpected value. Expected null');
return;
}
break;
case ParserState::VALUE_PARSE_ARRAY:
$this->path[] = $this->lastIndex;
$this->swapState(ParserState::DETERMINE_NEXT_STEP);
break;
case ParserState::VALUE_PARSE_OBJECT:
$this->setBufferType(Parser::BUFFER_ARRAY);
$this->path[] = $this->lastIndex;
$this->appendValue();
$this->swapState(ParserState::OBJECT_EXPECT_START);
break;
default:
var_dump('Default: ' . ParserState::toString($this->state) . ' -> ' . $c);
return;
}
}
}
/**
* Returns the char at the current charIndex.
*
* @return mixed
*/
private function getCurrentChar() {
return $this->chars[$this->charIndex];
}
private function predictNextChar() {
for($i = $this->charIndex; $i < count($this->chars); $i++) {
if(!$this->skipWhitespace($this->chars[$i], false)) {
return $this->chars[$i];
}
}
return null;
}
/**
* Go to the next char.
*/
private function consumeChar() {
$this->overflow = 0;
$this->charIndex++;
}
private function appendIndex() {
$this->valueIndex = $this->buffer;
$this->buffer = null;
if(is_string($this->valueIndex)) {
$this->getCurrentArrayPart()[$this->valueIndex] = null;
return;
}
// @TODO: Error handling
var_dump('Expected: valid appendable index => ' . $this->valueIndex);
}
private function appendValue()
{
if (is_string($this->valueIndex)) {
$this->getCurrentArrayPart()[$this->valueIndex] = $this->buffer;
$this->buffer = null;
return;
}
if ($this->valueIndex === null) {
$this->getCurrentArrayPart()[$this->valueIndex] = $this->buffer;
$this->buffer = null;
return;
}
// @TODO: Error handling
var_dump('Expected: valid appendable value => ' . $this->valueIndex . ' -> ' . $this->buffer);
}
private function &getCurrentArrayPart() {
if(count($this->path) === 0) {
return $this->path;
}
$tmp = &$this->result;
foreach ($this->path as $key) {
$tmp = &$tmp[$key];
}
return $tmp;
}
private function swapState($state) {
$this->clearBuffer();
$this->prevState = $this->state;
$this->state = $state;
}
private function skipWhitespace($c = null, $consume = true) {
if($c === null) {
$c = $this->getCurrentChar();
}
if (
(
$c === ' ' ||
$c === '\t' ||
$c === '\n'
) &&
$consume
) {
$this->consumeChar();
return true;
}
return false;
}
private function clearBuffer() {
$this->buffer = null;
}
private function setBufferType($type) {
if($this->buffer === null) {
switch ($type) {
case Parser::BUFFER_STRING:
$this->buffer = '';
break;
case Parser::BUFFER_ARRAY:
$this->buffer = [];
break;
default:
var_dump('Invalid buffer type : ' . $type);
break;
}
}
}
private function countBrackets($c) {
if($c === '{') {
$this->curlyBrackets++;
} else if($c === '}') {
$this->curlyBrackets--;
} else if($c === '[') {
$this->squareBrackets++;
} else if($c === ']') {
$this->squareBrackets--;
}
}
}
<?php
class ParserState {
/**
* A -{- at the start of the file.
*/
const OBJECT_EXPECT_START = 0;
/**
* A -"- or -'- before the name of a variable starts.
*/
const VARIABLE_NAME_EXPECT_START= 1.0;
/**
* Here we will actually parse the name.
*/
const VARIABLE_NAME_EXPECT_VALUE= 1.1;
/**
* If a variable identifier ends we expect a ':'
*/
const VARIABLE_NAME_EXPECT_END = 1.2;
/**
* Parse the name of a variable.
*/
const VARIABLE_NAME_PARSE = 1.3;
/**
* What do we gonna parse next:
* - string
* - integer/float
* - boolean
* - object
* - array
* - null
*/
const VALUE_PARSE_STRING = 2.1;
const VALUE_PARSE_NUMERIC = 2.2;
const VALUE_PARSE_BOOLEAN = 2.3;
const VALUE_PARSE_OBJECT = 2.4;
const VALUE_PARSE_ARRAY = 2.5;
const VALUE_PARSE_NULL = 2.6;
/**
* Now we have to determine if we deal with:
* - string
* - integer/float
* - boolean
* - object
* - array
*/
const DETERMINE_VALUE_TYPE = 3.0;
/**
* After parsing a value, we have to figure
* out what to do next. - This appends on the next char
* - -,- we will look for the next, variable name
* - -}- We are appending an object@TODO
* - -]- We are appending an array @TODO
*/
const DETERMINE_NEXT_STEP = 3.1;
public static function toString($x) {
$fooClass = new ReflectionClass ( 'ParserState' );
$constants = $fooClass->getConstants();
$constName = null;
foreach ( $constants as $name => $value )
{
if ( $value == $x )
{
$constName = $name;
break;
}
}
return $constName;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment