Skip to content

Instantly share code, notes, and snippets.

@vierbergenlars
Last active August 31, 2017 16:16
Show Gist options
  • Save vierbergenlars/6186002 to your computer and use it in GitHub Desktop.
Save vierbergenlars/6186002 to your computer and use it in GitHub Desktop.
<?php
namespace vierbergenlars\Norch\QueryParser;
class Lexer
{
/**
* This class should not be instanciated
*/
private function __construct()
{
;
}
public static function tokenize($string)
{
$len = strlen($string);
$tokens = array();
$current_token = new Token(Token::T_NONE, 0);
$i = 0;
while($i < $len) {
$c = $string[$i];
switch($c) {
case '\\': // Escape character
$current_token->addData($string[++$i]);
break;
case ' ':
self::push($tokens, $current_token, $i);
break;
case ':':
if($current_token->getData() == null)
throw new ParseException('Expected T_FIELD_NAME, got nothing', $string, $i);
if(!$current_token->isTypeNoneOr(Token::T_FIELD_NAME))
throw new ParseException('Expected T_FIELD_NAME, got ' . Token::getName($current_token->getType()), $string, $i);
$current_token->setType(Token::T_FIELD_NAME);
self::push($tokens, $current_token, $i);
$current_token->setType(Token::T_FIELD_VALUE);
break;
case '^':
if($current_token->getData() == null)
throw new ParseException('Expected T_FIELD_NAME, got nothing', $string, $i);
if(!$current_token->isTypeNoneOr(Token::T_FIELD_NAME))
throw new ParseException('Expected T_FIELD_NAME, got ' . Token::getName($current_token->getType()), $string, $i);
$current_token->setType(Token::T_FIELD_NAME);
$field_token = $current_token;
self::push($tokens, $current_token, $i);
$current_token->setType(Token::T_FIELD_WEIGHT);
self::readInt($current_token, $string, $i);
self::push($tokens, $current_token, $i);
if($i + 1 < $len && $string[$i + 1] == ':') // Peek one ahead. Duplicate T_FIELD_NAME token if a T_FIELD_VALUE follows.
$current_token = $field_token;
break;
case '@':
if($current_token->getData() != null)
throw new ParseException('Expected nothing, got ' . Token::getName($current_token->getType()), $string, $i);
$current_token->setType(Token::T_FIELD_SEARCH);
break;
case '"':
if($current_token->getData() == null) {
$current_token->setTypeIfNone(Token::T_STRING);
self::readEncString($current_token, $string, $i);
if($i + 1 < $len && $string[$i + 1] != ' ') // Peek one ahead. Should be empty
throw new ParseException('Unexpected T_STRING', $string, $i + 1);
} else {
throw new ParseException('Unexpected T_STRING', $string, $i);
}
break;
default:
$current_token->addData($c);
}
$i++;
}
self::push($tokens, $current_token, $i);
return $tokens;
}
static private function push(&$tokens, &$current_token, $i)
{
if($current_token->getData() === null)
return;
$current_token->setTypeIfNone(Token::T_STRING);
$tokens[] = $current_token;
$current_token = new Token(Token::T_NONE, $i);
}
static private function readEncString(Token $current_token, $string, &$i)
{
while(++$i < strlen($string)) {
if($string[$i] == '\\') {
$current_token->addData($string[++$i]);
} else if($string[$i] != '"') {
$current_token->addData($string[$i]);
} else {
break;
}
}
}
static private function readInt(Token $current_token, $string, &$i)
{
while(++$i < strlen($string)) {
if(in_array($string[$i], array('0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', '-'), true)) {
$current_token->addData($string[$i]);
} else {
$i--;
break;
}
}
}
}
<?php
namespace vierbergenlars\Norch\QueryParser;
use vierbergenlars\Norch\SearchQuery\QueryBuilder;
use vierbergenlars\Norch\QueryParser\Token;
class Compiler extends QueryBuilder
{
public function updateQuery($queryExpr)
{
$tokens = Lexer::tokenize($queryExpr);
$searchQuery = '';
while(false !== ($token = current($tokens))) {
switch($token->getType()) {
case Token::T_STRING:
$searchQuery.= ' ' . $token->getData();
break;
case Token::T_FIELD_NAME:
$nextToken = next($tokens);
if($nextToken === false)
throw new ParseException('Unexpected end of token stream');
switch($nextToken->getType()) {
case Token::T_FIELD_VALUE:
$this->addFilter($token->getData(), $nextToken->getData());
break;
case Token::T_FIELD_WEIGHT:
$this->addWeight($token->getData(), $nextToken->getData());
break;
default:
throw new ParseException('Unexpected ' . Token::getName($nextToken->getType()), $queryExpr, $token->getStartPosition());
}
break;
case Token::T_FIELD_SEARCH:
$this->addSearchField($token->getData());
break;
default:
throw new ParseException('Unexpected ' . Token::getName($token->getType()) . ' (This is a lexer bug, please report it)', $queryExpr, $token->getStartPosition());
}
next($tokens);
}
$this->setSearchQuery(substr($searchQuery, 1));
return $this;
}
}
<?php
namespace vierbergenlars\Norch\QueryParser;
class Token
{
const T_NONE = 0;
const T_FIELD_NAME = 1;
const T_STRING = 2;
const T_FIELD_WEIGHT = 3;
const T_FIELD_VALUE = 4;
const T_FIELD_SEARCH = 5;
protected $type;
protected $data = null;
protected $startPos;
function __construct($type, $startPos)
{
$this->type = $type;
$this->startPos = $startPos;
}
function addData($data)
{
$this->data.=$data;
}
function setType($type)
{
$this->type = $type;
}
function setTypeIfNone($type)
{
if($this->type == self::T_NONE)
$this->type = $type;
}
function isTypeNoneOr($type)
{
return ($this->type == self::T_NONE || $this->type == $type);
}
function getType()
{
return $this->type;
}
function getData()
{
return $this->data;
}
function getStartPosition()
{
return $this->startPos;
}
static function getName($token)
{
$refl = new \ReflectionClass(__CLASS__);
$constants = $refl->getConstants();
$token_name = array_search($token, $constants);
if($token_name)
return $token_name;
return 'UNKNOWN_TOKEN';
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment