Last active
August 31, 2017 16:16
-
-
Save vierbergenlars/6186002 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace vierbergenlars\Norch\QueryParser; | |
class Lexer | |
{ | |
/** | |
* This class should not be instanciated | |
*/ | |
private function __construct() | |
{ | |
; | |
} | |
public static function tokenize($string) | |
{ | |
$len = strlen($string); | |
$tokens = array(); | |
$current_token = new Token(Token::T_NONE, 0); | |
$i = 0; | |
while($i < $len) { | |
$c = $string[$i]; | |
switch($c) { | |
case '\\': // Escape character | |
$current_token->addData($string[++$i]); | |
break; | |
case ' ': | |
self::push($tokens, $current_token, $i); | |
break; | |
case ':': | |
if($current_token->getData() == null) | |
throw new ParseException('Expected T_FIELD_NAME, got nothing', $string, $i); | |
if(!$current_token->isTypeNoneOr(Token::T_FIELD_NAME)) | |
throw new ParseException('Expected T_FIELD_NAME, got ' . Token::getName($current_token->getType()), $string, $i); | |
$current_token->setType(Token::T_FIELD_NAME); | |
self::push($tokens, $current_token, $i); | |
$current_token->setType(Token::T_FIELD_VALUE); | |
break; | |
case '^': | |
if($current_token->getData() == null) | |
throw new ParseException('Expected T_FIELD_NAME, got nothing', $string, $i); | |
if(!$current_token->isTypeNoneOr(Token::T_FIELD_NAME)) | |
throw new ParseException('Expected T_FIELD_NAME, got ' . Token::getName($current_token->getType()), $string, $i); | |
$current_token->setType(Token::T_FIELD_NAME); | |
$field_token = $current_token; | |
self::push($tokens, $current_token, $i); | |
$current_token->setType(Token::T_FIELD_WEIGHT); | |
self::readInt($current_token, $string, $i); | |
self::push($tokens, $current_token, $i); | |
if($i + 1 < $len && $string[$i + 1] == ':') // Peek one ahead. Duplicate T_FIELD_NAME token if a T_FIELD_VALUE follows. | |
$current_token = $field_token; | |
break; | |
case '@': | |
if($current_token->getData() != null) | |
throw new ParseException('Expected nothing, got ' . Token::getName($current_token->getType()), $string, $i); | |
$current_token->setType(Token::T_FIELD_SEARCH); | |
break; | |
case '"': | |
if($current_token->getData() == null) { | |
$current_token->setTypeIfNone(Token::T_STRING); | |
self::readEncString($current_token, $string, $i); | |
if($i + 1 < $len && $string[$i + 1] != ' ') // Peek one ahead. Should be empty | |
throw new ParseException('Unexpected T_STRING', $string, $i + 1); | |
} else { | |
throw new ParseException('Unexpected T_STRING', $string, $i); | |
} | |
break; | |
default: | |
$current_token->addData($c); | |
} | |
$i++; | |
} | |
self::push($tokens, $current_token, $i); | |
return $tokens; | |
} | |
static private function push(&$tokens, &$current_token, $i) | |
{ | |
if($current_token->getData() === null) | |
return; | |
$current_token->setTypeIfNone(Token::T_STRING); | |
$tokens[] = $current_token; | |
$current_token = new Token(Token::T_NONE, $i); | |
} | |
static private function readEncString(Token $current_token, $string, &$i) | |
{ | |
while(++$i < strlen($string)) { | |
if($string[$i] == '\\') { | |
$current_token->addData($string[++$i]); | |
} else if($string[$i] != '"') { | |
$current_token->addData($string[$i]); | |
} else { | |
break; | |
} | |
} | |
} | |
static private function readInt(Token $current_token, $string, &$i) | |
{ | |
while(++$i < strlen($string)) { | |
if(in_array($string[$i], array('0', '1', '2', '3', '4', '5', '6', '7', | |
'8', '9', '-'), true)) { | |
$current_token->addData($string[$i]); | |
} else { | |
$i--; | |
break; | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace vierbergenlars\Norch\QueryParser; | |
use vierbergenlars\Norch\SearchQuery\QueryBuilder; | |
use vierbergenlars\Norch\QueryParser\Token; | |
class Compiler extends QueryBuilder | |
{ | |
public function updateQuery($queryExpr) | |
{ | |
$tokens = Lexer::tokenize($queryExpr); | |
$searchQuery = ''; | |
while(false !== ($token = current($tokens))) { | |
switch($token->getType()) { | |
case Token::T_STRING: | |
$searchQuery.= ' ' . $token->getData(); | |
break; | |
case Token::T_FIELD_NAME: | |
$nextToken = next($tokens); | |
if($nextToken === false) | |
throw new ParseException('Unexpected end of token stream'); | |
switch($nextToken->getType()) { | |
case Token::T_FIELD_VALUE: | |
$this->addFilter($token->getData(), $nextToken->getData()); | |
break; | |
case Token::T_FIELD_WEIGHT: | |
$this->addWeight($token->getData(), $nextToken->getData()); | |
break; | |
default: | |
throw new ParseException('Unexpected ' . Token::getName($nextToken->getType()), $queryExpr, $token->getStartPosition()); | |
} | |
break; | |
case Token::T_FIELD_SEARCH: | |
$this->addSearchField($token->getData()); | |
break; | |
default: | |
throw new ParseException('Unexpected ' . Token::getName($token->getType()) . ' (This is a lexer bug, please report it)', $queryExpr, $token->getStartPosition()); | |
} | |
next($tokens); | |
} | |
$this->setSearchQuery(substr($searchQuery, 1)); | |
return $this; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace vierbergenlars\Norch\QueryParser; | |
class Token | |
{ | |
const T_NONE = 0; | |
const T_FIELD_NAME = 1; | |
const T_STRING = 2; | |
const T_FIELD_WEIGHT = 3; | |
const T_FIELD_VALUE = 4; | |
const T_FIELD_SEARCH = 5; | |
protected $type; | |
protected $data = null; | |
protected $startPos; | |
function __construct($type, $startPos) | |
{ | |
$this->type = $type; | |
$this->startPos = $startPos; | |
} | |
function addData($data) | |
{ | |
$this->data.=$data; | |
} | |
function setType($type) | |
{ | |
$this->type = $type; | |
} | |
function setTypeIfNone($type) | |
{ | |
if($this->type == self::T_NONE) | |
$this->type = $type; | |
} | |
function isTypeNoneOr($type) | |
{ | |
return ($this->type == self::T_NONE || $this->type == $type); | |
} | |
function getType() | |
{ | |
return $this->type; | |
} | |
function getData() | |
{ | |
return $this->data; | |
} | |
function getStartPosition() | |
{ | |
return $this->startPos; | |
} | |
static function getName($token) | |
{ | |
$refl = new \ReflectionClass(__CLASS__); | |
$constants = $refl->getConstants(); | |
$token_name = array_search($token, $constants); | |
if($token_name) | |
return $token_name; | |
return 'UNKNOWN_TOKEN'; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment