Skip to content

Instantly share code, notes, and snippets.

@wrossmann
Created April 1, 2015 22:34
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save wrossmann/774677bb7e82e4c6e128 to your computer and use it in GitHub Desktop.
String tokenizer class allows mulitple simultaneous token chars, and multiple simultaneous tokenizers.
<?php
class StrTok {
const INCL_PREV = 1; // include previous token at beginning of string
const INCL_CUR = 2; // include current token at end of string
const SKIP_EMPTY = 4; // skip empty strings
private $string, $tokens, $flags, $curpos;
public function __construct($string, $tokens, $flags=0) {
$this->string = $string;
$this->strlen = strlen($string);
$this->tokens = $tokens;
$this->flags = $flags;
$this->curpos = 0;
}
private function checkFlag($flag) {
return $flag & $this->flags;
}
public function tok() {
if( $this->curpos >= $this->strlen ) { return false; }
$curpos = $this->curpos;
$curtokpos = NULL;
// find the position of the next token
$matches = array();
if( preg_match(
sprintf('/[%s]/', preg_quote($this->tokens)),
$this->string, $matches, PREG_OFFSET_CAPTURE, $curpos)
) {
$curtokpos = $matches[0][1];
}
if( $this->checkFlag(self::INCL_PREV) ) { $curpos -= $curpos==0?0:1; }
$return = '';
if( is_null($curtokpos) ) {
// token not found, return rest of string
$this->curpos = $this->strlen+1;
$return = substr($this->string, $curpos);
} else {
$this->curpos = $curtokpos + 1;
$length = $curtokpos-$curpos+($this->checkFlag(self::INCL_CUR)?1:0);
$return = substr($this->string, $curpos, $length);
}
if( $this->checkFlag(self::SKIP_EMPTY) && $return === '' ) {
// warning: many consecutive tokens can cause stack overflow here.
$return = $this->tok();
}
return $return;
}
}
// example
$str = "*Here's some text^that is meant*to be separated^based on where%the divider characters^are";
$st = new StrTok($str, '*^%', StrTok::INCL_PREV|StrTok::SKIP_EMPTY);
while( ($tok = $st->tok()) !== false ) {
echo $tok . "\n";
}
/* output:
*Here's some text
^that is meant
*to be separated
^based on where
%the divider characters
^are
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment