Last active
October 2, 2018 22:26
-
-
Save mmstick/3f06f3e6dd84ebec9f74a746945fd80d to your computer and use it in GitHub Desktop.
PHP String Tokenizer: Practicing with PHP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/php | |
<?php | |
/// TODO: | |
/// - Implement Backslash escapes | |
// An ADT-like type that can either be a normal value or a variable. | |
class Token { | |
// 0 = Normal; 1 = Variable; 2 = Error; 3 = None | |
public $kind = 0; | |
// Contains the string for the associated type. | |
public $data = NULL; | |
} | |
class Tokenizer { | |
// Contains the source string to tokenize from. | |
private $data = NULL; | |
// Stores the length of the data that was read. | |
private $len = 0; | |
// States how many characters have currently been read. | |
private $read = 0; | |
// 1 = found $ | |
private $flags = 0; | |
public function __construct(string $data) { | |
$this->len = strlen($data); | |
$this->data = $data; | |
} | |
// Returns a Token which is either None, Error, Normal, or Variable | |
public function next() { | |
$output = new Token; | |
$start = $this->read; | |
for ($i = $this->read; $i < $this->len; $i++) { | |
$character = $this->data[$i]; | |
switch ($character) { | |
case '$': | |
if ($output->kind == 1) { | |
break; | |
} elseif ($start != $this->read) { | |
$len = $this->read - $start; | |
$output->data = substr($this->data, $start, $len); | |
return $output; | |
} else { | |
$this->flags = $this->flags | 1; | |
} | |
break; | |
case '{': | |
if ($this->flags & 1 != 0) { | |
$output->kind = 1; | |
} | |
break; | |
case '}': | |
if ($output->kind == 1) { | |
$start = $start + 2; | |
$len = $this->read - $start; | |
$data = substr($this->data, $start, $len); | |
$this->read++; | |
if (preg_match("/[^A-Za-z0-9\_]/", $data)) { | |
$output->data = "invalid characters in variable"; | |
$output->kind = 2; | |
} else { | |
$output->data = $data; | |
} | |
return $output; | |
} | |
break; | |
default: | |
if ($this->flags & 1 != 0) { | |
$this->flags = 0; | |
} | |
} | |
$this->read++; | |
} | |
if ($output->kind == 1) { | |
$output->kind = 2; | |
$output->data = "variable not terminated"; | |
return $output; | |
} elseif ($start == $this->len) { | |
$output->kind = 3; | |
return $output; | |
} else { | |
$output->data = substr($this->data, $start, $this->read); | |
return $output; | |
} | |
} | |
} | |
function tokenize(string $token_string) { | |
$tokenizer = new Tokenizer($token_string); | |
$complete = false; | |
while (!$complete) { | |
$next_value = $tokenizer->next(); | |
switch ($next_value->kind) { | |
case 0: | |
echo "Normal { ", $next_value->data, " }\n"; | |
break; | |
case 1: | |
echo "Variable { ", $next_value->data, " }\n"; | |
break; | |
case 2: | |
echo "Error { ", $next_value->data, " }\n"; | |
$complete = true; | |
break; | |
case 3: | |
$complete = true; | |
break; | |
} | |
} | |
} | |
$token_string = join(" ", array_slice($argv, 1)); | |
tokenize($token_string); | |
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/php | |
<?php | |
function tokenize(string $input) { | |
preg_match_all('/\${([^}]*)}|\G([^$]+)/', $input, $matches, PREG_PATTERN_ORDER); | |
$mlen = count($matches[0]); | |
for ($mid = 0; $mid < $mlen; $mid++) { | |
for ($gid = 1; $gid < 3; $gid++) { | |
$element = $matches[$gid][$mid]; | |
if (!empty($element)) { | |
switch ($gid) { | |
case 1: | |
echo 'Variable { ', $element, " }\n"; | |
break; | |
case 2: | |
echo 'Normal { ', $element, " }\n"; | |
break; | |
} | |
break; | |
} | |
} | |
} | |
} | |
$token_string = join(" ", array_slice($argv, 1)); | |
tokenize($token_string); | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment