Skip to content

Instantly share code, notes, and snippets.

@domwebber
Last active April 28, 2022 11:20
Show Gist options
  • Save domwebber/2f91cb792facc0e218fda8155c2e0f44 to your computer and use it in GitHub Desktop.
Save domwebber/2f91cb792facc0e218fda8155c2e0f44 to your computer and use it in GitHub Desktop.
PHP Business Logic Lexer

This code uses doctrine/lexer.

Note: input sanitisation should be implemented

This is a proof-of-concept query language for use with Doctrine ORM. As-is, this project is not recommended for production use.

Example use:

use Salesportal\Query\Lexer;
use Salesportal\Query\Parser;
$input = 'name = "Dom" or username = "domwebber"';
$parser = new Parser( new Lexer );
$result = $parser->parse( $input );
var_dump( $result );

Doctrine ORM integration:

use Doctrine\Common\Collections\Criteria;

function getFilterable(): array {
    return [
        "name",
        "email"
    ];
}

function getRestrictedColumnNames(): array {
    return [
        "page",
        "order"
    ]
}

function criteriaFilter( $expression_builder, Node $parsed ){
    if( $parsed instanceof Comparison ){
        $left_node = criteriaFilter(
            $expression_builder,
            $parsed->getLeft()
        );
        $right_node = criteriaFilter(
            $expression_builder,
            $parsed->getRight()
        );

        switch( strtolower( $parsed->getValue() ) ){
            case Lexer::EXPR_EQUALS:
                return $expression_builder->eq( $left_node, $right_node );

            case Lexer::EXPR_NOT_EQUALS:
                return $expression_builder->neq( $left_node, $right_node );
                
            case Lexer::EXPR_LESS_THAN:
                return $expression_builder->lt( $left_node, $right_node );

            case Lexer::EXPR_LESS_THAN_EQUALS:
                return $expression_builder->lte( $left_node, $right_node );

            case Lexer::EXPR_GREATER_THAN:
                return $expression_builder->gt( $left_node, $right_node );

            case Lexer::EXPR_GREATER_THAN_EQUALS:
                return $expression_builder->gte( $left_node, $right_node );

            case Lexer::EXPR_CONTAINS:
                return $expression_builder->contains( $left_node, $right_node );

            default:
                throw new Exception( "Uncaught comparison operator" );
        }

    }elseif( $parsed instanceof Expression ){
        $left_node = criteriaFilter(
            $expression_builder,
            $parsed->getLeft()
        );
        $right_node = criteriaFilter(
            $expression_builder,
            $parsed->getRight()
        );

        if( $parsed->getValue() == Lexer::EXPR_AND ){
            return $expression_builder->andX( $left_node, $right_node );

        }elseif( $parsed->getValue() == Lexer::EXPR_OR ){
            return $expression_builder->orX( $left_node, $right_node );

        }

    }elseif( $parsed instanceof Value ){
        //Note: Sanitisation could be added here
        return $parsed->getValue();

    }elseif( $parsed instanceof Identifier ){
        //Filter out restricted column names that cannot be used (e.g. ones used for ordering or pagination)
        $current_filterable = array_diff( getFilterable(), getRestrictedColumnNames() );

        if( !in_array( $parsed->getValue(), $current_filterable ) ){
            throw new Exception( "Invalid column in query" );

        }

        return $parsed->getValue();

    }
        
    //This point *shouldn't* be reached
    throw new Exception( "An unknown error occurred." );

}

$expression_builder = Criteria::expr();
$criteria = Criteria::create();

//Translate the parsed expression into a criteria filter
$expression = criteriaFilter( $expression_builder, $parsed_expression );
if( $expression ){
    $criteria->where( $expression );

}  
<?php
namespace Salesportal\Query\AST;
class BinaryTree extends Node {
protected $left;
protected $right;
public function getLeft(){
return $this->left;
}
public function setLeft( $left ){
$this->left = $left;
return $this;
}
public function getRight(){
return $this->right;
}
public function setRight( $right ){
$this->right = $right;
return $this;
}
}
<?php
namespace Salesportal\Query\AST;
class Comparison extends BinaryTree {
}
<?php
namespace Salesportal\Query\AST;
class Expression extends BinaryTree {
/**
* Alias for get Value.
* Named to make more sense in an expression/comparison setting.
*
* @since 1.0.0
*
* @return mixed
*/
public function getOperator(){
return $this->getValue();
}
}
<?php
namespace Salesportal\Query\AST;
class Identifier extends Node {
}
<?php
namespace Salesportal\Query;
use function is_numeric;
use function stripos;
use DateTime;
use DateTimeZone;
use Doctrine\Common\Lexer\AbstractLexer;
/**
* Basic Query Filter Lexer.
* Extended Doctrine Lexer port for BasicQueryFilter\Parser.
* Uses inspiration from Apache Lucene and smallhomelab\lucene-query-parser.
*
* @since 1.0.0
* @author Dom Webber <dom.webber@hotmail.com>
* @see https://github.com/ralphschindler/basic-query-filter
* @see https://github.com/smallhomelab/lucene-query-parser
*/
class Lexer extends AbstractLexer {
const T_NONE = 0;
const T_WHITESPACE = 1;
const T_INTEGER = 2;
const T_STRING = 3;
const T_DATETIME = 4;
const T_FLOAT = 5;
const T_OPEN_PARENTHESIS = 6;
const T_CLOSE_PARENTHESIS = 7;
//Identifier tokens
public const T_IDENTIFIER = 100;
public const T_IDENTIFIER_SEPARATOR = 101;
//Keyword tokens
const T_AND = 200;
const T_OR = 201;
const T_EQUALS = 202;
const T_NOT_EQUALS = 203;
const T_LESS_THAN = 204;
const T_LESS_THAN_EQUALS = 205;
const T_GREATER_THAN = 206;
const T_GREATER_THAN_EQUALS = 207;
const T_CONTAINS = 208;
const EXPR_AND = "and";
const EXPR_OR = "or";
const EXPR_EQUALS = "eq";
const EXPR_NOT_EQUALS = "ne";
const EXPR_LESS_THAN = "lt";
const EXPR_LESS_THAN_EQUALS = "lte";
const EXPR_GREATER_THAN = "gt";
const EXPR_GREATER_THAN_EQUALS = "gte";
const EXPR_CONTAINS = "contains";
const REGEX_DOUBLE_QUOTED_STRING = "(?:\"[^\"]+\")";
const REGEX_SINGLE_QUOTED_STRING = "(?:'[^']+')";
const REGEX_FIELD = "[a-z0-9_]+";
const REGEX_NUMBER = "(?:[0-9]+(?:[\.][0-9]+)*)(?:e[+-]?[0-9]+)?";
const REGEX_COMPARISON_OPERATOR = "!?=|<=?|>=?";
const GROUP_VALUES = [
self::T_INTEGER,
self::T_STRING,
self::T_DATETIME,
self::T_FLOAT
];
const GROUP_LOGIC_OPERATORS = [
self::T_AND,
self::T_OR
];
const GROUP_COMPARISON_OPERATORS = [
self::T_EQUALS,
self::T_NOT_EQUALS,
self::T_LESS_THAN,
self::T_LESS_THAN_EQUALS,
self::T_GREATER_THAN,
self::T_GREATER_THAN_EQUALS,
self::T_CONTAINS
];
/**
* @inheritDoc
*/
protected function getCatchablePatterns(){
return [
self::REGEX_DOUBLE_QUOTED_STRING,
self::REGEX_SINGLE_QUOTED_STRING,
self::REGEX_FIELD,
self::REGEX_NUMBER,
self::REGEX_COMPARISON_OPERATOR
];
}
/**
* @inheritDoc
*/
protected function getNonCatchablePatterns(){
return [
];
}
/**
* Determine whether a token is an identifier.
*
* @since 1.0.0
*
* @param int $code
* @return bool
*/
public static function isIdentifier( int $code ): bool {
return $code == self::T_IDENTIFIER;
}
/**
* Determines whether a token is either a comparison or a logic operator.
*
* @since 1.0.0
*
* @param int $code
* @return bool
*/
public static function isOperator( int $code ): bool {
return self::isComparisonOperator( $code ) || self::isLogicOperator( $code );
}
/**
* Determines whether a token is a comparison operator.
*
* @since 1.0.0
*
* @param int $code
* @return bool
*/
public static function isComparisonOperator( int $code ): bool {
return in_array( $code, self::GROUP_COMPARISON_OPERATORS, true );
}
/**
* Determine whether a token code is a logic operator.
*
* @since 1.0.0
*
* @param int $code
* @return bool
*/
public static function isLogicOperator( int $code ): bool {
return in_array( $code, self::GROUP_LOGIC_OPERATORS, true );
}
/**
* Determine whether a token is a value.
*
* @since 1.0.0
*
* @param int $code
* @return bool
*/
public static function isValue( int $code ): bool {
return in_array( $code, self::GROUP_VALUES, true );
}
/**
* @inheritDoc
*/
protected function getType( &$value ){
$value_lowercase = strtolower( $value );
switch( true ){
//Classify number values
case is_numeric( $value ):
//Sub-classify floats
if( strpos( $value, "." ) !== false || stripos( $value, "e" ) !== false ){
return self::T_FLOAT;
}
return self::T_INTEGER;
//Classify single-quoted and double-quoted strings
case $value[ 0 ] === "\"" || $value[ 0 ] === "'":
if( $value[ 0 ] === "\"" ){
//Strip the double-quotes from the start and end character places
$value = str_replace( "\"\"", "\"", substr( $value, 1, strlen( $value ) - 2 ) );
}else{
//Strip the single-quotes from the start and end character places
$value = str_replace( "''", "'", substr( $value, 1, strlen( $value ) - 2 ) );
}
//Recognise quoted ISO8601 datetime strings
$parsed_datetime = DateTime::createFromFormat( DateTime::ISO8601, $value, new DateTimeZone( "UTC" ) );
if( $parsed_datetime instanceof DateTime ) return self::T_DATETIME;
//Fallback to standard string format
return self::T_STRING;
//Classify open parenthesises
case $value == "(":
return self::T_OPEN_PARENTHESIS;
//Classify close parenthesises
case $value == ")":
return self::T_CLOSE_PARENTHESIS;
//Classify the "and" logic operator
case $value_lowercase == self::EXPR_AND || $value == "&&":
return self::T_AND;
//Classify the "or" logic operator
case $value_lowercase == self::EXPR_OR || $value == "||":
return self::T_OR;
//Classify the "equals" comparison operator
case $value_lowercase == self::EXPR_EQUALS || $value == "=":
return self::T_EQUALS;
//Classify the "not equals" comparison operator
case $value_lowercase == self::EXPR_NOT_EQUALS || $value == "!=":
return self::T_NOT_EQUALS;
//Classify the "less than" comparison operator
case $value_lowercase == self::EXPR_LESS_THAN || $value == "<":
return self::T_LESS_THAN;
//Classify the "less than equals" comparison operator
case $value_lowercase == self::EXPR_LESS_THAN_EQUALS || $value == "<=":
return self::T_LESS_THAN_EQUALS;
//Classify the "greater than" comparison operator
case $value_lowercase == self::EXPR_GREATER_THAN || $value == ">":
return self::T_GREATER_THAN;
//Classify the "greater than equals" comparison operator
case $value_lowercase == self::EXPR_GREATER_THAN_EQUALS || $value == ">=":
return self::T_GREATER_THAN_EQUALS;
//Classify the "contains" comparison operator
case $value_lowercase == self::EXPR_CONTAINS:
return self::T_CONTAINS;
//Classify whitespace
case trim( $value ) === "":
return self::T_WHITESPACE;
//Classify field separators
case $value == ".":
return self::T_IDENTIFIER_SEPARATOR;
//Classify identifiers
case ctype_alpha( $value[ 0 ] ):
return self::T_IDENTIFIER;
}
//Default to none-type
return self::T_NONE;
}
}
<?php
namespace Salesportal\Query\AST;
class Node {
protected $value;
/**
* Retrieve the value.
*
* @since 1.0.0
*
* @return mixed
*/
public function getValue(){
return $this->value;
}
/**
* Set the value.
*
* @since 1.0.0
*
* @param mixed $value
* @return $this
*/
public function setValue( $value ){
$this->value = $value;
return $this;
}
}
<?php
namespace Salesportal\Query;
use RuntimeException;
class ParseException extends RuntimeException {
}
<?php
namespace Salesportal\Query;
use function in_array;
use Salesportal\Query\ParseException;
use Salesportal\Query\Lexer;
use Salesportal\Query\AST\Node;
use Salesportal\Query\AST\BinaryTree;
use Salesportal\Query\AST\Comparison;
use Salesportal\Query\AST\Expression;
use Salesportal\Query\AST\Identifier;
use Salesportal\Query\AST\Value;
use DateTime;
use DateTimeZone;
/**
* Simple Business Logic Parser.
* Basic Query Filter Parser. Extended Doctrine Lexer port for BasicQueryFilter\Parser.
* Uses inspiration from Apache Lucene and smallhomelab\lucene-query-parser.
*
* @since 1.0.0
* @author Dom Webber <dom.webber@hotmail.com>
* @see https://github.com/ralphschindler/basic-query-filter
* @see https://github.com/smallhomelab/lucene-query-parser
*/
class Parser {
/**
* Set the maximum number of tokens able to be looked through.
*
* @since 1.0.0
*
* @var int
*/
protected const MAX_TOKENS = 250;
/**
* Query Lexer.
*
* @since 1.0.0
*
* @var Lexer
*/
protected Lexer $lexer;
/**
* Constructor.
*
* @since 1.0.0
*
* @param Lexer $lexer
*/
public function __construct( Lexer $lexer ){
$this->lexer = $lexer;
}
/**
* Parse a query string.
* See link's details on postfix evaluation pseudocode for the algorithm structure
* followed here.
*
* @since 1.0.0
* @see http://www.solomonlrussell.com/spring16/cs2/ClassSource/Week6/stackcode.html
*
* @param string $query_string
* @return Expression
*/
public function parse( string $query_string ){
$this->lexer->setInput( $query_string );
$this->lexer->moveNext();
//Returned query representation is in postfix form as an array
$postfix_query = $this->parseExpression();
$operand_stack = [];
$operand1 = null;
$operand2 = null;
//Loop through the postfix ordered tokens
foreach( $postfix_query as $token ){
//Check whether the token is an operand
if( Lexer::isValue( $token[ "type" ] ) || Lexer::isIdentifier( $token[ "type" ] ) ){
//Build the current node
if( Lexer::isValue( $token[ "type" ] ) ){
//Setup the node
$node = new Value;
//Integer typecast
if( $token[ "type" ] === Lexer::T_INTEGER ){
$node->setValue( (int) $token[ "value" ] );
//DateTime typecast
}elseif( $token[ "type" ] === Lexer::T_DATETIME ){
$node->setValue( DateTime::createFromFormat( DateTime::ISO8601, $token[ "value" ], new DateTimeZone( "UTC" ) ) );
//Float typecast
}elseif( $token[ "type" ] === Lexer::T_FLOAT ){
$node->setValue( (float) $token[ "value" ] );
//String cast
}else{
$node->setValue( $token[ "value" ] );
}
}else{
$node = ( new Identifier )->setValue( $token[ "value" ] );
}
//Push the node to the stack
$operand_stack[] = $node;
//Handle operators
}elseif( Lexer::isComparisonOperator( $token[ "type" ] ) || Lexer::isLogicOperator( $token[ "type" ] ) ){
//Build current node expression/comparison
if( Lexer::isComparisonOperator( $token[ "type" ] ) ){
$node = new Comparison;
//Normalize the operators
switch( $token[ "type" ] ){
case Lexer::T_EQUALS:
$token[ "value" ] = Lexer::EXPR_EQUALS;
break;
case Lexer::T_NOT_EQUALS:
$token[ "value" ] = Lexer::EXPR_NOT_EQUALS;
break;
case Lexer::T_LESS_THAN:
$token[ "value" ] = Lexer::EXPR_LESS_THAN;
break;
case Lexer::T_LESS_THAN_EQUALS:
$token[ "value" ] = Lexer::EXPR_LESS_THAN_EQUALS;
break;
case Lexer::T_GREATER_THAN:
$token[ "value" ] = Lexer::EXPR_GREATER_THAN;
break;
case Lexer::T_GREATER_THAN_EQUALS:
$token[ "value" ] = Lexer::EXPR_GREATER_THAN_EQUALS;
break;
case Lexer::T_CONTAINS:
$token[ "value" ] = Lexer::EXPR_CONTAINS;
break;
default:
throw new ParseException( "Invalid comparison operator." );
}
}else{
$node = new Expression;
//Normalize the operators
switch( $token[ "type" ] ){
case Lexer::T_AND:
$token[ "value" ] = Lexer::EXPR_AND;
break;
case Lexer::T_OR:
$token[ "value" ] = Lexer::EXPR_OR;
break;
}
}
//Pop the operand stack to the representation as the second operand
$node->setRight( array_pop( $operand_stack ) );
//Pop the operand stack to the representation as the first operand
$node->setLeft( array_pop( $operand_stack ) );
//Change the left side to value if it isn't a sub-query
if( !( $node->getLeft() instanceof Expression ) ){
$node->setLeft( $node->getLeft() );
}
//Change the right side to value if it isn't a sub-query
if( !( $node->getRight() instanceof Expression ) ){
$node->setRight( $node->getRight() );
}
//Assign the representation operator
$node->setValue( $token[ "value" ] );
//Append the representation to the stack
$operand_stack[] = $node;
}else{
throw new ParseException( "Unexpected value \"" . $token[ "value" ] . "\" at position " . $token[ "position" ] . "." );
}
}
//Check that the stack is empty
if( count( $operand_stack ) !== 1 ){
throw new ParseException( "An unknown parsing exception occurred." );
}
//Retrieve the first stack element
$tree = reset( $operand_stack );
//Validate the tree
$tree = $this->validate( $tree );
if( !( $tree instanceof BinaryTree ) ) throw new ParseException( "Query must be an expression" );
return $tree;
}
/**
* Parse a query expression into an array.
* Uses a shunting-yard algorithm to calculate the postfix logical order for expression
* evaluation and query parsing.
*
* @since 1.0.0
* @see https://en.m.wikipedia.org/wiki/Shunting-yard_algorithm
*
* @return mixed[]
*/
protected function parseExpression(): array {
//Setup the output stack and the output stack
$output_stack = [];
$operator_stack = [];
//Setup counter
$i = 0;
//While there are tokens to be read
while( $this->lexer->lookahead !== null ){
//Increment counter
$i++;
//Check the counter against the maximum
if( $i >= self::MAX_TOKENS ){
throw new ParseException( "Maximum number of query tokens reached." );
}
//Retrieve the current token
$current_token = $this->lexer->lookahead;
//Check whether the current token is an operand
if( Lexer::isIdentifier( $current_token[ "type" ] ) || Lexer::isValue( $current_token[ "type" ] ) ){
$output_stack[] = $current_token;
//Check whether the current token is an operator
}elseif( Lexer::isOperator( $current_token[ "type" ] ) ){
//Loop through the stack operators in order of precedence
while(
//Determine whether there is an operator on the top of the stack
(
//Determine whether there is a value on the top of the stack (i.e. not empty)
end( $operator_stack )
//Determine whether the top stack value is an operator
&& Lexer::isOperator( end( $operator_stack )[ "type" ] )
)
//Determine whether the current token or stack top has higher precedence
&& (
(
//Determine whether there is a value on the top of the stack (i.e. not empty)
end( $operator_stack )
//Determine whether the current token has higher precedence
&& end( $operator_stack )[ "type" ] > $current_token[ "type" ]
)
|| (
//Figure precedence for matching operators
(
//Determine whether there is a value on top of the stack (i.e. not empty)
end( $operator_stack )
//Determine whether the stack top and current tokens match
&& end( $operator_stack )[ "type" ] == $current_token[ "type" ]
)
//Determine whether the operator is left-associative
&& Lexer::isComparisonOperator( end( $operator_stack )[ "type" ] )
)
)
//Determine whether the stack top is an open parenthesis
&& (
//Determine whether there is a value on top of the stack (i.e. not empty)
end( $operator_stack )
//Check whether the stack top is an open parenthesis
&& end( $operator_stack )[ "type" ] !== Lexer::T_OPEN_PARENTHESIS
)
){
//Pop from operator stack and append to output stack
$output_stack[] = \array_pop( $operator_stack );
}
//Append the current token to the operator stack
$operator_stack[] = $current_token;
//Check whether the current token is an open parenthesis
}elseif( $current_token[ "type" ] === Lexer::T_OPEN_PARENTHESIS ){
//Append to the operator stack
$operator_stack[] = $current_token;
//Check whether the current token is a close parenthesis
}elseif( $current_token[ "type" ] === Lexer::T_CLOSE_PARENTHESIS ){
//Loop through the stack items from the end to the start until an open parenthesis is reached
while( end( $operator_stack ) && end( $operator_stack )[ "type" ] !== Lexer::T_OPEN_PARENTHESIS ){
//Pop from the operator stack and append to the output stack
$output_stack[] = \array_pop( $operator_stack );
//Remove the open parenthesis from the stack
if( end( $operator_stack ) && end( $operator_stack )[ "type" ] === Lexer::T_OPEN_PARENTHESIS ){
//Pop from operator stack
array_pop( $operator_stack );
}
}
}elseif( $current_token[ "type" ] !== Lexer::T_WHITESPACE ){
throw new ParseException( "Unexpected token at position " . $current_token[ "position" ] );
}
//Increment to next token
$this->lexer->moveNext();
}
//Iterate through the operator stack
while( count( $operator_stack ) > 0 ){
//Pop from the operator stack and append to the output stack
$output_stack[] = array_pop( $operator_stack );
}
return $output_stack;
}
/**
* Validates the setup of a binary tree.
* Recursive binary tree validation.
*
* @since 1.0.0
*
* @throws ParseException
* @param Node $node
* @return BinaryTree
*/
protected function validate( Node $node ): Node {
if( $node instanceof Comparison ){
if( !( $node->getLeft() instanceof Identifier ) ){
throw new ParseException( "Expression must have an identifier on the left side" );
}
if( !( $node->getRight() instanceof Value ) ){
throw new ParseException( "Expression must have a value on the left side" );
}
}elseif( $node instanceof Expression ){
if( $node->getLeft() === null || $node->getRight() === null ){
throw new ParseException( "Expression doesn't have valid logical order" );
}
//Check that the expression doesn't use an identifier or a value directly
if( $node->getLeft() instanceof Identifier || $node->getRight() instanceof Identifier ){
throw new ParseException( "Expression cannot use an identifier directly in a logical expression." );
}elseif( $node->getLeft() instanceof Value || $node->getRight() instanceof Value ){
throw new ParseException( "Expression cannot use a value directly in a logical expression." );
}
$this->validate( $node->getLeft() );
$this->validate( $node->getRight() );
}
return $node;
}
}
<?php
/**
* Salesportal.
*
* @since 1.0.0
* @package Salesportal
* @copyright 2021 Lacey Tech Solutions
* @link https://lacey-tech.com
*/
namespace Salesportal\Tests\Query;
use DateTime;
use DateTimeZone;
use Generator;
use PHPUnit\Framework\TestCase;
use Salesportal\Query\Parser;
use Salesportal\Query\Lexer;
use Salesportal\Query\Token;
use Salesportal\Query\ParseException;
use Salesportal\Query\AST\Node;
use Salesportal\Query\AST\BinaryTree;
use Salesportal\Query\AST\Comparison;
use Salesportal\Query\AST\Expression;
use Salesportal\Query\AST\Identifier;
use Salesportal\Query\AST\Value;
/**
* Tests the Query parser.
* This tests that the logic strings are correctly
* parsed and assumed into the correct formats and node
* structure.
*
* @since 1.0.0
* @author Dom Webber <dom.webber@hotmail.com>
*/
final class ParserTest extends TestCase
{
/**
* The Lexer instance to use with the Parser.
*
* @since 1.0.0
*
* @var Lexer
*/
private $lexer;
/**
* Setup.
*
* @since 1.0.0
*
* @return void
*/
protected function setUp(): void
{
$container = require __DIR__ . "/../../app/bootstrap.php";
$this->lexer = $container->get(Lexer::class);
}
/**
* Data provider for valid parsing tests.
*
* @since 1.0.0
*
* @return Generator
*/
public function validParseProvider(): Generator
{
yield "Equals comparison with an integer" => [
"field = 12",
(new Comparison())
->setLeft(
(new Identifier())->setValue("field")
)
->setValue(Lexer::EXPR_EQUALS)
->setRight(
(new Value())->setValue(12)
)
];
yield "Equals comparison to null" => [
"author = null",
(new Comparison())
->setLeft(
(new Identifier())->setValue("author")
)
->setValue(Lexer::EXPR_EQUALS)
->setRight(
(new Value())->setValue(null)
)
];
yield "Bracket-ed equals comparison to a single-quoted string" => [
"(name = 'Dom')",
(new Comparison())
->setLeft(
(new Identifier())->setValue("name")
)
->setValue(Lexer::EXPR_EQUALS)
->setRight(
(new Value())->setValue("Dom")
)
];
yield "Not-equals comparison to float" => [
"height != 12.25",
(new Comparison())
->setLeft(
(new Identifier())->setValue("height")
)
->setValue(Lexer::EXPR_NOT_EQUALS)
->setRight(
(new Value())->setValue(12.25)
)
];
yield "Less than comparison to DateTime" => [
"created < '2021-01-08T01:43:34+0000'",
(new Comparison())
->setLeft(
(new Identifier())->setValue("created")
)
->setValue(Lexer::EXPR_LESS_THAN)
->setRight(
(new Value())->setValue(
DateTime::createFromFormat(
DateTime::ISO8601,
"2021-01-08T01:43:34+0000",
new DateTimeZone("UTC")
)
)
)
];
yield "Equals comparison to boolean true" => [
"enabled = true",
(new Comparison())
->setLeft(
(new Identifier())->setValue("enabled")
)
->setValue(Lexer::EXPR_EQUALS)
->setRight(
(new Value())->setValue(true)
)
];
yield "Less than equals comparison to float" => [
"price <= 12.20",
(new Comparison())
->setLeft(
(new Identifier())->setValue("price")
)
->setValue(Lexer::EXPR_LESS_THAN_EQUALS)
->setRight(
(new Value())->setValue(12.20)
)
];
yield "Greater than comparison to DateTime" => [
"modified > \"2021-01-08T01:43:34+0000\"",
(new Comparison())
->setLeft(
(new Identifier())->setValue("modified")
)
->setValue(Lexer::EXPR_GREATER_THAN)
->setRight(
(new Value())->setValue(
DateTime::createFromFormat(
DateTime::ISO8601,
"2021-01-08T01:43:34+0000",
new DateTimeZone("UTC")
)
)
)
];
yield "Greater than equals comparison to scientific \"e\" notation" => [
"distance >= 6.022e23",
(new Comparison())
->setLeft(
(new Identifier())->setValue("distance")
)
->setValue(Lexer::EXPR_GREATER_THAN_EQUALS)
->setRight(
(new Value())->setValue(6.022e23)
)
];
yield "Like comparison to single-quoted string" => [
"description like 'impressive'",
(new Comparison())
->setLeft(
(new Identifier())->setValue("description")
)
->setValue(Lexer::EXPR_LIKE)
->setRight(
(new Value())->setValue("impressive")
)
];
yield "Complex query with a right-side subquery" => [
"username = 'domwebber' or ( email = 'dom.webber@hotmail.com' and telephone = '01234567890' )",
(new Expression())
->setLeft(
(new Comparison())
->setLeft(
(new Identifier())->setValue("username")
)
->setValue(Lexer::EXPR_EQUALS)
->setRight(
(new Value())->setValue("domwebber")
)
)
->setValue(Lexer::EXPR_OR)
->setRight(
(new Expression())
->setLeft(
(new Comparison())
->setLeft(
(new Identifier())->setValue("email")
)
->setValue(Lexer::EXPR_EQUALS)
->setRight(
(new Value())->setValue("dom.webber@hotmail.com")
)
)
->setValue(Lexer::EXPR_AND)
->setRight(
(new Comparison())
->setLeft(
(new Identifier())->setValue("telephone")
)
->setValue(Lexer::EXPR_EQUALS)
->setRight(
(new Value())->setValue("01234567890")
)
)
)
];
yield "Complex query with subqueries on both sides" => [
"( name != 'Dom' or time != '2021-01-08T01:43:34+0000' ) or ( time = '2021-01-08T01:43:34+0000' and enabled = true )",
(new Expression())
->setLeft(
(new Expression())
->setLeft(
(new Comparison())
->setLeft(
(new Identifier())->setValue("name")
)
->setValue(Lexer::EXPR_NOT_EQUALS)
->setRight(
(new Value())->setValue("Dom")
)
)
->setValue(Lexer::EXPR_OR)
->setRight(
(new Comparison())
->setLeft(
(new Identifier())->setValue("time")
)
->setValue(Lexer::EXPR_NOT_EQUALS)
->setRight(
(new Value())->setValue(
DateTime::createFromFormat(
DateTime::ISO8601,
"2021-01-08T01:43:34+0000",
new DateTimeZone("UTC")
)
)
)
)
)
->setValue(Lexer::EXPR_OR)
->setRight(
(new Expression())
->setLeft(
(new Comparison())
->setLeft(
(new Identifier())->setValue("time")
)
->setValue(Lexer::EXPR_EQUALS)
->setRight(
(new Value())->setValue(
DateTime::createFromFormat(
DateTime::ISO8601,
"2021-01-08T01:43:34+0000",
new DateTimeZone("UTC")
)
)
)
)
->setValue(Lexer::EXPR_AND)
->setRight(
(new Comparison())
->setLeft(
(new Identifier())->setValue("enabled")
)
->setValue(Lexer::EXPR_EQUALS)
->setRight(
(new Value())->setValue(true)
)
)
)
];
}
/**
* Test the Query Parser with valid logical expressions.
* This tests that queries are represented after parsing as expected and that
* the queries succeed in parsing.
*
* @since 1.0.0
* @dataProvider validParseProvider
*
* @param string $input
* @param BinaryTree $expectation
* @return void
*/
public function testValidParse(string $input, BinaryTree $expectation): void
{
//Create the parser instance
$parser = new Parser($this->lexer);
//Run the parser
$output = $parser->parse($input);
//Test the assertions
$this->assertTrue($expectation == $output);
}
/**
* Data provider for the invalid parsing tests.
*
* @since 1.0.0
*
* @return Generator
*/
public function invalidParseProvider(): Generator
{
yield "Empty string" => [
""
];
yield "Missing right parenthesis" => [
"name = 'Dom' or (email = 'dom.webber@hotmail.com' and telephone = '01234567890' "
];
yield "Misplaced right parenthesis" => [
"name = 'Dom' or email = 'dom.webber@hotmail.com' )"
];
yield "Misformatted float in scientific notation" => [
"value = 6.022e"
];
yield "Misformatted float" => [
"value = 12."
];
yield "Missing ending quote in double-quoted string" => [
"name = \"Dom"
];
yield "Missing start quote in double-quoted string" => [
"name = Dom\""
];
yield "Missing ending quote in single-quoted string" => [
"name = 'Dom"
];
yield "Missing start quote in single-quoted string" => [
"name = Dom'"
];
yield "Mispaced left parenthesis" => [
"( name = 'Dom'"
];
yield "Incorrect order" => [
"'Dom' = name"
];
yield "Logic operator instead of a comparison operator" => [
"name and 'Dom'"
];
yield "Logic operator instead of a comparison operator and incorrect order" => [
"'Dom' and name"
];
yield "Incorrect order in subquery" => [
"name = 'Dom' and ( name or 'Dom' )"
];
yield "Misplaced comparison operator" => [
"name = < 12"
];
yield "Missing value in comparison" => [
"name ="
];
yield "Misplaced logic operator" => [
"name = 'Dom' and"
];
yield "Invalid tokens and symbols" => [
"name = 'Dom' $@*&"
];
yield "Value in expression in subquery" => [
"name = 'Dom' and ( null or null )"
];
yield "Empty subquery parenthesises" => [
"name = 'Dom' and ()"
];
yield "Misplaced logic operator and misplaced value" => [
"and 'Dom'"
];
yield "Missing identifier and value in comparison" => [
"="
];
yield "Complex query with missing subquery end closing parenthesis" => [
"( name != 'Dom' or time != '2021-01-08T01:43:34+0000' ) or ( time = '2021-01-08T01:43:34+0000' and enabled = true "
];
yield "Exhausting token limit" => [
str_repeat("enabled = false and ", 63) . "disabled = true"
];
}
/**
* Test the Query Parser with invalid logical expressions.
* This tests that incorrectly formatted and invalid query expressions fail. This
* ensures that no precedence assumptions are made in the case that brackets are
* missing and that expressions in the wrong order are not accepted into the system.
*
* @since 1.0.0
* @dataProvider invalidParseProvider
*
* @param string $input
* @return void
*/
public function testInvalidParse(string $input): void
{
//Create the parser instance
$parser = new Parser($this->lexer);
$this->expectException(ParseException::class);
//Run the parser
$output = $parser->parse($input);
}
/**
* Data provider for the valid comparison operator normalization tests.
*
* @since 1.0.0
*
* @return Generator
*/
public function validNormalizeComparisonOperatorProvider(): Generator
{
yield "Equals comparison operator word" => [
Lexer::EXPR_EQUALS,
new Token([
"value" => "eq",
"type" => Lexer::T_EQUALS,
"position" => 0
])
];
yield "Equals comparison operator symbol" => [
Lexer::EXPR_EQUALS,
new Token([
"value" => "=",
"type" => Lexer::T_EQUALS,
"position" => 0
])
];
yield "Not equals comparison operator word" => [
Lexer::EXPR_NOT_EQUALS,
new Token([
"value" => "ne",
"type" => Lexer::T_NOT_EQUALS,
"position" => 0
])
];
yield "Not equals comparison operator symbol" => [
Lexer::EXPR_NOT_EQUALS,
new Token([
"value" => "!=",
"type" => Lexer::T_NOT_EQUALS,
"position" => 0
])
];
yield "Less than comparison operator word" => [
Lexer::EXPR_LESS_THAN,
new Token([
"value" => "lt",
"type" => Lexer::T_LESS_THAN,
"position" => 0
])
];
yield "Less than comparison operator symbol" => [
Lexer::EXPR_LESS_THAN,
new Token([
"value" => "lt",
"type" => Lexer::T_LESS_THAN,
"position" => 0
])
];
yield "Less than equals comparison operator word" => [
Lexer::EXPR_LESS_THAN_EQUALS,
new Token([
"value" => "lte",
"type" => Lexer::T_LESS_THAN_EQUALS,
"position" => 0
])
];
yield "Less than equals comparison operator symbol" => [
Lexer::EXPR_LESS_THAN_EQUALS,
new Token([
"value" => "<=",
"type" => Lexer::T_LESS_THAN_EQUALS,
"position" => 0
])
];
yield "Greater than comparison operator word" => [
Lexer::EXPR_GREATER_THAN,
new Token([
"value" => "gt",
"type" => Lexer::T_GREATER_THAN,
"position" => 0
])
];
yield "Greater than comparison operator symbol" => [
Lexer::EXPR_GREATER_THAN,
new Token([
"value" => ">",
"type" => Lexer::T_GREATER_THAN,
"position" => 0
])
];
yield "Greater than equals comparison operator word" => [
Lexer::EXPR_GREATER_THAN_EQUALS,
new Token([
"value" => "gte",
"type" => Lexer::T_GREATER_THAN_EQUALS,
"position" => 0
])
];
yield "Greater than equals comparison operator symbol" => [
Lexer::EXPR_GREATER_THAN_EQUALS,
new Token([
"value" => ">=",
"type" => Lexer::T_GREATER_THAN_EQUALS,
"position" => 0
])
];
yield "Like comparison operator (only exists as word)" => [
Lexer::EXPR_LIKE,
new Token([
"value" => "contains",
"type" => Lexer::T_LIKE,
"position" => 0
])
];
}
/**
* Test the comparison operator normalization with valid values.
*
* @since 1.0.0
* @dataProvider validNormalizeComparisonOperatorProvider
*
* @param Token $token
* @param string $expectation
* @return void
*/
public function testValidNormalizeComparisonOperator(string $expectation, Token $token): void
{
//Extend the parser
$parser = new class ($this->lexer) extends Parser
{
/**
* Provide testing access to the Comparison Operator normalization method.
*
* @since 1.0.0
*
* @param string $value
* @return string
*/
public function testNormalizeComparisonOperator(Token $token): string
{
return $this->normalizeComparisonOperator($token);
}
};
//Run the test method
$output = $parser->testNormalizeComparisonOperator($token);
$this->assertEquals($expectation, $output);
}
/**
* Data provider for the invalid comparison operator normalization tests.
*
* @since 1.0.0
*
* @return Generator
*/
public function invalidNormalizeComparisonOperatorProvider(): Generator
{
yield "Non-matching token type" => [
new Token([
"value" => "invalid",
"type" => -1,
"position" => 0
])
];
}
/**
* Test the comparison operator normalization with invalid values.
*
* @since 1.0.0
* @dataProvider invalidNormalizeComparisonOperatorProvider
*
* @param Token $token
* @return void
*/
public function testInvalidNormalizeComparisonOperator(Token $token): void
{
//Extend the parser
$parser = new class ($this->lexer) extends Parser
{
/**
* Provide testing access to the Comparison Operator normalization method.
*
* @since 1.0.0
*
* @param string $value
* @return string
*/
public function testNormalizeComparisonOperator(Token $token): string
{
return $this->normalizeComparisonOperator($token);
}
};
$this->expectException(ParseException::class);
//Test with an invalid token
$parser->testNormalizeComparisonOperator($token);
}
/**
* Data provider for the valid logic operator normalization.
*
* @since 1.0.0
*
* @return Generator
*/
public function validNormalizeLogicOperatorProvider(): Generator
{
yield "And operator word" => [
Lexer::EXPR_AND,
new Token([
"value" => "and",
"type" => Lexer::T_AND,
"position" => 0
])
];
yield "And operator symbol" => [
Lexer::EXPR_AND,
new Token([
"value" => "&&",
"type" => Lexer::T_AND,
"position" => 0
])
];
yield "Or operator word" => [
Lexer::EXPR_OR,
new Token([
"value" => "or",
"type" => Lexer::T_OR,
"position" => 0
])
];
yield "Or operator symbol" => [
Lexer::EXPR_OR,
new Token([
"value" => "||",
"type" => Lexer::T_OR,
"position" => 0
])
];
}
/**
* Test the logic operator normalization with valid values.
*
* @since 1.0.0
* @dataProvider validNormalizeLogicOperatorProvider
*
* @return void
*/
public function testValidNormalizeLogicOperator(string $expectation, Token $input_token): void
{
//Extend the parser
$parser = new class ($this->lexer) extends Parser
{
/**
* Provide testing access to the Logic Operator normalization method.
*
* @since 1.0.0
*
* @param string $value
* @return string
*/
public function testNormalizeLogicOperator(Token $token): string
{
return $this->normalizeLogicOperator($token);
}
};
//Run the test method
$output = $parser->testNormalizeLogicOperator($input_token);
$this->assertEquals($expectation, $output);
}
/**
* Data provider for the invalid logic operator normalization tests.
*
* @since 1.0.0
*
* @return Generator
*/
public function invalidNormalizeLogicOperatorProvider(): Generator
{
yield "Non-matching token type" => [
new Token([
"value" => "invalid",
"type" => -1,
"position" => 0
])
];
}
/**
* Test the logic operator normalization with invalid values.
*
* @since 1.0.0
* @dataProvider invalidNormalizeLogicOperatorProvider
*
* @param Token $token
* @return void
*/
public function testInvalidNormalizeLogicOperator(Token $input_token): void
{
//Extend the parser
$parser = new class ($this->lexer) extends Parser
{
/**
* Provide testing access to the Logic Operator normalization method.
*
* @since 1.0.0
*
* @param string $value
* @return string
*/
public function testNormalizeLogicOperator(Token $token): string
{
return $this->normalizeLogicOperator($token);
}
};
$this->expectException(ParseException::class);
//Test with an invalid token
$parser->testNormalizeLogicOperator($input_token);
}
}
<?php
namespace Salesportal\Query\AST;
class Value extends Node {
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment