Skip to content

Instantly share code, notes, and snippets.

@mmstick
Last active October 2, 2018 22:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mmstick/3f06f3e6dd84ebec9f74a746945fd80d to your computer and use it in GitHub Desktop.
Save mmstick/3f06f3e6dd84ebec9f74a746945fd80d to your computer and use it in GitHub Desktop.
PHP String Tokenizer: Practicing with PHP
#!/bin/php
<?php
/// TODO:
/// - Implement Backslash escapes
// An ADT-like type that can either be a normal value or a variable.
class Token {
// 0 = Normal; 1 = Variable; 2 = Error; 3 = None
public $kind = 0;
// Contains the string for the associated type.
public $data = NULL;
}
class Tokenizer {
// Contains the source string to tokenize from.
private $data = NULL;
// Stores the length of the data that was read.
private $len = 0;
// States how many characters have currently been read.
private $read = 0;
// 1 = found $
private $flags = 0;
public function __construct(string $data) {
$this->len = strlen($data);
$this->data = $data;
}
// Returns a Token which is either None, Error, Normal, or Variable
public function next() {
$output = new Token;
$start = $this->read;
for ($i = $this->read; $i < $this->len; $i++) {
$character = $this->data[$i];
switch ($character) {
case '$':
if ($output->kind == 1) {
break;
} elseif ($start != $this->read) {
$len = $this->read - $start;
$output->data = substr($this->data, $start, $len);
return $output;
} else {
$this->flags = $this->flags | 1;
}
break;
case '{':
if ($this->flags & 1 != 0) {
$output->kind = 1;
}
break;
case '}':
if ($output->kind == 1) {
$start = $start + 2;
$len = $this->read - $start;
$data = substr($this->data, $start, $len);
$this->read++;
if (preg_match("/[^A-Za-z0-9\_]/", $data)) {
$output->data = "invalid characters in variable";
$output->kind = 2;
} else {
$output->data = $data;
}
return $output;
}
break;
default:
if ($this->flags & 1 != 0) {
$this->flags = 0;
}
}
$this->read++;
}
if ($output->kind == 1) {
$output->kind = 2;
$output->data = "variable not terminated";
return $output;
} elseif ($start == $this->len) {
$output->kind = 3;
return $output;
} else {
$output->data = substr($this->data, $start, $this->read);
return $output;
}
}
}
function tokenize(string $token_string) {
$tokenizer = new Tokenizer($token_string);
$complete = false;
while (!$complete) {
$next_value = $tokenizer->next();
switch ($next_value->kind) {
case 0:
echo "Normal { ", $next_value->data, " }\n";
break;
case 1:
echo "Variable { ", $next_value->data, " }\n";
break;
case 2:
echo "Error { ", $next_value->data, " }\n";
$complete = true;
break;
case 3:
$complete = true;
break;
}
}
}
$token_string = join(" ", array_slice($argv, 1));
tokenize($token_string);
?>
#!/bin/php
<?php
function tokenize(string $input) {
preg_match_all('/\${([^}]*)}|\G([^$]+)/', $input, $matches, PREG_PATTERN_ORDER);
$mlen = count($matches[0]);
for ($mid = 0; $mid < $mlen; $mid++) {
for ($gid = 1; $gid < 3; $gid++) {
$element = $matches[$gid][$mid];
if (!empty($element)) {
switch ($gid) {
case 1:
echo 'Variable { ', $element, " }\n";
break;
case 2:
echo 'Normal { ', $element, " }\n";
break;
}
break;
}
}
}
}
$token_string = join(" ", array_slice($argv, 1));
tokenize($token_string);
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment