Last active
August 30, 2022 19:13
-
-
Save WinterSilence/7584564280d27980bc08cd1e35c0f2df to your computer and use it in GitHub Desktop.
Extended PhpToken with more `is*` methods + iterator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace EnsoStudio\PhpTokenizer; | |
use ArrayIterator; | |
use FilterIterator; | |
use SeekableIterator; | |
use OutOfBoundsException; | |
use PhpToken; | |
/** | |
* Iterates the PHP tokens (`PhpToken` instances). | |
* | |
* This class permits access to methods of the inner iterator(`ArrayIterator`) via the `__call()` method. | |
* | |
* @method PhpToken|null current() | |
* @method int|null key() | |
* @method ArrayIterator getInnerIterator() | |
* @method PhpToken[] getArrayCopy() | |
* @method void seek(int $offset) | |
* @method bool uasort(callable $callback) | |
* @method bool uksort(callable $callback) | |
* @method bool asort(int $flags = SORT_REGULAR) | |
* @method bool ksort(int $flags = SORT_REGULAR) | |
* @method bool natcasesort() | |
* @method bool natsort() | |
* @method PhpToken|null offsetGet(int $offset) | |
* @method void offsetSet(int $offset, PhpToken $value) | |
* @method void offsetUnset(int $offset) | |
*/ | |
class TokenIterator extends FilterIterator implements SeekableIterator | |
{ | |
/** | |
* @var string Prefix adding to invert filter result: filter '!isIgnorable' equal to `!$token->isIgnorable()` | |
*/ | |
public const NOT = '!'; | |
/** | |
* @var string|null Optional, method calling to check whether the current token of the iterator is acceptable | |
*/ | |
protected $filterBy; | |
/** | |
* @var bool Whether to invert filter result | |
*/ | |
protected $invertedFilter = false; | |
/** | |
* @var int|string|array|null Optional, either a single value to match the token's ID or textual content, or an | |
* array thereof | |
*/ | |
protected $filterKind; | |
/** | |
* @var bool[] The `self::accept()` cache | |
*/ | |
protected $filterCache = []; | |
/** | |
* @var int The position of last token | |
*/ | |
protected $lastKey; | |
/** | |
* @var int The position of pinned token | |
* @see self::pin() | |
*/ | |
protected $firstKey = 0; | |
/** | |
* @param PhpToken[] $tokens The token instances | |
* @param string|null $filter The name of token method or method with prefix {@see self::NOT} | |
* @param int|string|array|null $filterKind Either a single value to match the token's ID, textual content, or | |
* an array thereof | |
*/ | |
public function __construct(array $tokens, string $filter = null, $filterKind = null) | |
{ | |
$innerIterator = new ArrayIterator(\array_values($tokens)); | |
parent::__construct($innerIterator); | |
$this->lastKey = $innerIterator->count($tokens) - 1; | |
if ($filter !== null || $filterKind !== null) { | |
if ($filter === null) { | |
$this->filterBy = 'is'; | |
} else { | |
$this->filterBy = \ltrim($filter, self::NOT); | |
$this->invertedFilter = $this->filterBy !== $filter; | |
} | |
$this->filterKind = $filterKind; | |
} | |
} | |
/** | |
* Check whether the current token of the iterator is acceptable. | |
* | |
* @return bool | |
*/ | |
public function accept(): bool | |
{ | |
if (!isset($this->filterCache[$key])) { | |
$key = $this->key(); | |
if ($this->filterBy === null) { | |
$this->filterCache[$key] = true; | |
} else { | |
$accept = (bool) $this->current()->{$this->filterBy}($this->filterKind); | |
$this->filterCache[$key] = $this->invertedFilter ? !$accept : $accept; | |
} | |
} | |
return $this->filterCache[$key]; | |
} | |
/** | |
* Seeks to a given position in the inner iterator. | |
* | |
* Note: if you want iterate tokens using `foreach`, then call `self::pin()` after this. | |
* | |
* @param int $offset The position to seek to | |
* @return void | |
* @throws OutOfBoundsException if the $offset is not seekable | |
*/ | |
public function seek(int $offset) | |
{ | |
$this->getInnerIterator()->seek($offset); | |
} | |
/** | |
* Pins current element. `self::rewind()` rewinds iterator to pinned(current) element instead first element. | |
* | |
* ~~~php | |
* // seek to second token | |
* $tokenIterator->seek(1); | |
* foreach ($tokenIterator as $token) { | |
* // start from first token | |
* } | |
* foreach ($tokenIterator->pin() as $key => $token) { | |
* // start from second token | |
* } | |
* ~~~ | |
* | |
* @return $this | |
*/ | |
public function pin(): self | |
{ | |
$this->firstKey = $this->key(); | |
return $this; | |
} | |
/** | |
* Resets pinned element. | |
* | |
* @return void | |
*/ | |
public function unpin() | |
{ | |
$this->firstKey = 0; | |
} | |
/** | |
* Checks if is last token. | |
* | |
* @return bool | |
*/ | |
public function isEnd(): bool | |
{ | |
return !$this->valid() || $this->key() === $this->lastKey; | |
} | |
/** | |
* Returns position of first token in next line. | |
* | |
* @return int|null Position of first token in next line | |
*/ | |
public function findNextLine() | |
{ | |
if (!$this->isEnd()) { | |
$currentLine = $this->current()->line; | |
foreach ($this as $key => $token) { | |
if ($currentLine !== $token->line) { | |
return $key; | |
} | |
} | |
} | |
return null; | |
} | |
/** | |
* Finds the next token of given kind and returns his position. | |
* | |
* @param int|string|array|null $kind Either a single value to match the token's ID, textual content, or an array | |
* thereof | |
* @return int|null Token position | |
*/ | |
public function findNextKind($kind) | |
{ | |
if (!$this->isEnd()) { | |
foreach ($this as $key => $token) { | |
if ($token->is($kind)) { | |
return $key; | |
} | |
} | |
} | |
return null; | |
} | |
/** | |
* Returns array of tokens goes after current token. | |
* | |
* @param int|string|array|null $brace Either a single value to match the token's ID, textual content, or an array | |
* thereof | |
* @return bool | |
*/ | |
public function isNextKind($kind): bool | |
{ | |
if ($this->isEnd()) { | |
return false; | |
} | |
return $this->getInnerIterator()->offsetGet($this->key() + 1)->is($kind); | |
} | |
/** | |
* Finds the token of close brace of given kind and returns his position. | |
* | |
* @param int|string|array|null $brace Either a single value to match the token's ID, textual content, or an array | |
* thereof. {@see Token::BRACES} | |
* @return int|null Token position | |
*/ | |
public function findClosingBrace($brace) | |
{ | |
return $this->findNextKind(Token::BRACES[$brace] ?? $brace); | |
} | |
/** | |
* Returns the filtered tokens. | |
* | |
* @return array | |
*/ | |
public function toArray(): array | |
{ | |
return \iterator_to_array(clone $this->getInnerIterator(), false); | |
} | |
/** | |
* Returns new instance with filtered tokens. | |
* | |
* @param strin $filter The name of token method or method with prefix {@see self::NOT} | |
* @param int|string|array|null $filterKind Either a single value to match the token's ID, textual content, or | |
* an array thereof | |
* @param bool $filteredTokens If TRUE, new instance contains filtered tokens, else, all tokens | |
* @return static | |
*/ | |
public function withFilter(string $filter, $filterKind = null, bool $filteredTokens = true): self | |
{ | |
$tokens = $filteredTokens ? $this->toArray() : $this->getArrayCopy(); | |
return new static($tokens, $filter, $filterKind); | |
} | |
/** | |
* Parent implementation update position of inner iterator only after calling `self::rewind()` or `self::next()`. | |
* | |
* @return int|null | |
*/ | |
public function key() | |
{ | |
return $this->getInnerIterator()->key(); | |
} | |
/** | |
* Parent implementation update position of inner iterator only after calling `self::rewind()` or `self::next()`. | |
* | |
* @return PhpToken|null | |
*/ | |
public function current() | |
{ | |
return $this->getInnerIterator()->current(); | |
} | |
/** | |
* Rewinds to the first element or to pinned element. | |
* | |
* @return void | |
* @see self::pin() | |
*/ | |
public function rewind() | |
{ | |
parent::rewind(); | |
if ($this->firstKey !== 0) { | |
$this->seek($this->firstKey); | |
} | |
} | |
/** | |
* This method permits access to methods of the inner iterator `ArrayIterator`. | |
* | |
* Note: this method resets position of inner iterator after call sorting method(`asort`, `ksort` and etc). | |
* | |
* @param string $name The name of `ArrayIterator` method | |
* @param array $arguments The method arguments | |
* @return mixed | |
*/ | |
public function __call(string $name, array $arguments) | |
{ | |
$result = $this->getInnerIterator()->{$name}(...$arguments); | |
if (strpos($name, 'sort') === false) { | |
$this->rewind(); | |
} | |
return $result; | |
} | |
/** | |
* This method is called by `var_dump()`/`print_r()` when dumping an object to get the properties that should be | |
* shown. | |
* | |
* @return array | |
*/ | |
public function __debugInfo(): array | |
{ | |
return $this->toArray(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace EnsoStudio\PhpTokenizer; | |
const T_BRACE_OPEN = 40; | |
const T_BRACE_CLOSE = 41; | |
const T_COMMA = 44; | |
const T_SEMICOLON = 59; | |
const T_ASSIGN = 61; | |
const T_SQUARE_BRACE_OPEN = 91; | |
const T_SQUARE_BRACE_CLOSE = 93; | |
const T_CURLY_BRACE_OPEN = 123; | |
const T_CURLY_BRACE_CLOSE = 125; | |
const T_DOT = 260; | |
/** | |
* Extended PHP token contained methods to detect token type/group. | |
*/ | |
class Token extends \PhpToken | |
{ | |
/** | |
* The extra tokens, note that ID's can be changes in PHP 8! | |
*/ | |
public const EXTRA_TOKENS = [ | |
'(' => [T_BRACE_OPEN, 'T_BRACE_OPEN'], | |
')' => [T_BRACE_CLOSE, 'T_BRACE_CLOSE'], | |
',' => [T_COMMA, 'T_COMMA'], | |
';' => [T_SEMICOLON, 'T_SEMICOLON'], | |
'=' => [T_ASSIGN, 'T_ASSIGN'], | |
'[' => [T_SQUARE_BRACE_OPEN, 'T_SQUARE_BRACE_OPEN'], | |
']' => [T_SQUARE_BRACE_CLOSE, 'T_SQUARE_BRACE_CLOSE'], | |
'{' => [T_CURLY_BRACE_OPEN, 'T_CURLY_BRACE_OPEN'], | |
'}' => [T_CURLY_BRACE_CLOSE, 'T_CURLY_BRACE_CLOSE'], | |
'.' => [T_DOT, 'T_DOT'], | |
]; | |
/** | |
* The braces as tag open/close pairs | |
*/ | |
public const BRACES = [ | |
T_BRACE_OPEN => T_BRACE_CLOSE, | |
T_SQUARE_BRACE_OPEN => T_SQUARE_BRACE_CLOSE, | |
T_CURLY_BRACE_OPEN => T_CURLY_BRACE_CLOSE, | |
]; | |
/** | |
* @var string|false The name of the token, `T_*` constant name | |
*/ | |
protected $tokenName; | |
/** | |
* Returns the name of the token. | |
* | |
* @return string|null | |
*/ | |
public function getTokenName(): ?string | |
{ | |
if (!isset($this->tokenName)) { | |
$this->tokenName = parent::getTokenName(); | |
if ($this->tokenName === null) { | |
if (isset(static::EXTRA_TOKENS[$this->text])) { | |
$this->tokenName = static::EXTRA_TOKENS[$this->text][1]; | |
} else { | |
$this->tokenName = false; | |
} | |
} | |
} | |
return $this->tokenName ?: null; | |
} | |
/** | |
* This method is called by `var_dump()`/`print_r()` when dumping an object to get the properties that should be | |
* shown. | |
* | |
* @return array{id: int, text: string, line: int, pos: int, name: string} | |
*/ | |
public function __debugInfo(): array | |
{ | |
$info = \get_object_vars($this); | |
if (!$this->isNumber() && !$this->isIdentifier()) { | |
$info['text'] = htmlspecialchars($info['text'], ENT_NOQUOTES | ENT_HTML5); | |
} | |
if ($info['name'] === null) { | |
$info['name'] = $this->getTokenName(); | |
} | |
return $info; | |
} | |
/** | |
* Tells whether the token is interface, trait or class. | |
* | |
* @return bool | |
*/ | |
public function isClassLike(): bool | |
{ | |
return $this->is([\T_CLASS, \T_INTERFACE, \T_TRAIT]); | |
} | |
/** | |
* Tells whether the token is function. | |
* | |
* @return bool | |
*/ | |
public function isFunction(): bool | |
{ | |
$tokens = \defined('\T_FN') ? [\T_FUNCTION, \T_FN] : [\T_FUNCTION]; | |
return $this->is($tokens); | |
} | |
/** | |
* Tells whether the token is variable. | |
* | |
* @return bool | |
*/ | |
public function isVariable(): bool | |
{ | |
return $this->is(\T_VARIABLE); | |
} | |
/** | |
* Tells whether the token is modifier. | |
* | |
* @return bool | |
*/ | |
public function isModifier(): bool | |
{ | |
return $this->is([\T_PRIVATE, \T_PROTECTED, \T_PUBLIC, \T_STATIC, \T_ABSTRACT, \T_FINAL, \T_GLOBAL]); | |
} | |
/** | |
* Tells whether the token is type cast (`(type)`). | |
* | |
* @return bool | |
*/ | |
public function isTypeCast(): bool | |
{ | |
return $this->is( | |
[\T_UNSET_CAST, \T_STRING_CAST, \T_OBJECT_CAST, \T_INT_CAST, \T_DOUBLE_CAST, \T_BOOL_CAST, \T_ARRAY_CAST] | |
); | |
} | |
/** | |
* Tells whether the token is number. | |
* | |
* @return bool | |
*/ | |
public function isNumber(): bool | |
{ | |
return $this->is([\T_LNUMBER, \T_DNUMBER]); | |
} | |
/** | |
* Tells whether the token is string. | |
* | |
* @return bool | |
*/ | |
public function isString(): bool | |
{ | |
return $this->is([\T_STRING, \T_NUM_STRING, \T_ENCAPSED_AND_WHITESPACE, \T_CONSTANT_ENCAPSED_STRING]); | |
} | |
/** | |
* Tells whether the token is string-identifier(e.g. keywords like as `parent`, `self` and function names). | |
* | |
* @return bool | |
*/ | |
public function isIdentifier(): bool | |
{ | |
return $this->is(\T_STRING); | |
} | |
/** | |
* Tells whether the token is PHP comment. | |
* | |
* @return bool | |
*/ | |
public function isComment(): bool | |
{ | |
return $this->is([\T_DOC_COMMENT, \T_COMMENT]); | |
} | |
/** | |
* Tells whether the token is DocBlock. | |
* | |
* @return bool | |
*/ | |
public function isDocBlock(): bool | |
{ | |
return $this->is(\T_DOC_COMMENT) && \preg_match('~^\s*\/\*\*\s*.+\*\/\s*$~s', $this->text); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment