Created
October 2, 2011 23:39
-
-
Save AzeemMichael/1258112 to your computer and use it in GitHub Desktop.
The string tokenizer class allows an application to break a string into tokens.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* The string tokenizer class allows an application to break a string into tokens. | |
* | |
* @author Azeem Michael | |
* @example The following is one example of the use of the tokenizer. The code: | |
* <code> | |
* <?php | |
* $str = "this is:@\t\n a test!"; | |
* $delim = " !@:'\t\n\0"; // remove these chars | |
* $st = new StringTokenizer($str, $delim); | |
* echo 'Total tokens: '.$st->countTokens().'<br/>'; | |
* while ($st->hasMoreTokens()) { | |
* echo $st->nextToken() . '<br/>'; | |
* } | |
* prints the following output: | |
* Total tokens: 4 | |
* this | |
* is | |
* a | |
* test | |
* ?> | |
* </code> | |
*/ | |
class StringTokenizer { | |
/** @var string | |
*/ | |
private $string; | |
/** @var string | |
*/ | |
private $token; | |
/** @var string | |
*/ | |
private $delim; | |
/** | |
* Constructs a string tokenizer for the specified string. | |
* @param string $str String to tokenize | |
* @param string $delim The set of delimiters (the characters that separate tokens) | |
* specified at creation time, default to " \n\r\t\0" | |
*/ | |
public function __construct($str, $delim=" \n\r\t\0") { | |
$this->string = $str; | |
$this->delim = $delim; | |
$this->token = strtok($str, $delim); | |
} | |
/** | |
* Destructor to prevent memory leaks | |
*/ | |
public function __destruct() { | |
unset($this); | |
} | |
/** | |
* Calculates the number of times that this tokenizer's nextToken method can | |
* be called before it generates an exception | |
* @return int - number of tokens | |
*/ | |
public function countTokens() { | |
$counter = 0; | |
while($this->hasMoreTokens()) { | |
$counter++; | |
$this->nextToken(); | |
} | |
$this->token = strtok($this->string, $this->delim); | |
return $counter; | |
} | |
/** | |
* Tests if there are more tokens available from this tokenizer's string. It | |
* does not move the internal pointer in any way. To move the internal pointer | |
* to the next element call nextToken() | |
* @return boolean - true if has more tokens, false otherwise | |
*/ | |
public function hasMoreTokens() { | |
return ($this->token !== false); | |
} | |
/** | |
* Returns the next token from this string tokenizer and advances the internal | |
* pointer by one. | |
* @return string - next element in the tokenized string | |
*/ | |
public function nextToken() { | |
$hold = $this->token; //hold current pointer value | |
$this->token = strtok($this->delim); //increment pointer | |
return $hold; //return current pointer value | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment