Skip to content

Instantly share code, notes, and snippets.

@hakre
Created December 30, 2012 20:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hakre/4415105 to your computer and use it in GitHub Desktop.
Save hakre/4415105 to your computer and use it in GitHub Desktop.
Some very rudimentary Tagsoup Example
<?php
/**
* Tagsoup
*/
class Tagsoup
{
const NODETYPE_COMMENT = 1;
const NODETYPE_TEXT = 2;
const NODETYPE_STARTTAG = 3;
const NODETYPE_ENDTAG = 4;
public static function parseAt($string, $offset) {
$mode = 'is';
$tokens = [
self::NODETYPE_COMMENT => '<!--.*-->',
self::NODETYPE_STARTTAG => '<(?:[a-z]+:)?[a-z]+(\s+(?:[a-z]+:)?[a-z]+="[^"]+")*\s*>',
self::NODETYPE_ENDTAG => '<\/(?:[a-z]+:)?[a-z]+\s*>',
];
$rest = self::NODETYPE_TEXT;
$found = strlen($string);
$foundToken = FALSE;
if ($offset >= $found) {
return $foundToken;
}
foreach ($tokens as $name => $token) {
$r = preg_match("/$token/$mode", $string, $matches, PREG_OFFSET_CAPTURE, $offset);
if ($r) {
$position = $matches[0][1];
if ($position < $found) {
$found = $position;
$foundToken = array($name, $position, strlen($matches[0][0]), $matches[0][0]);
}
}
}
if ($found > $offset) {
$buffer = substr($string, $offset, $found - $offset);
$foundToken = array($rest, $offset, strlen($buffer), $buffer);
}
return $foundToken;
}
}
class TagsoupNode extends Tagsoup
{
protected $type;
protected $string;
protected $length;
protected $offset;
protected function __construct($type, $offset, $length, $string) {
$this->type = $type;
$this->offset = $offset;
$this->length = $length;
$this->string = $string;
}
/**
* @param $array
* @return TagsoupNode
*/
public static function createFromArray(array $array) {
list($type, $offset, $length, $string) = $array;
return self::create($type, $offset, $length, $string);
}
/**
* @param int $type
* @param int $offset
* @param int $length
* @param string $string
* @return TagsoupNode
* @throws InvalidArgumentException
*/
public static function create($type, $offset, $length, $string) {
$class = '';
switch ($type) {
case self::NODETYPE_TEXT:
case self::NODETYPE_COMMENT:
$typeName = 'Node';
break;
case self::NODETYPE_STARTTAG:
$typeName = 'Tag';
break;
case self::NODETYPE_ENDTAG:
$typeName = 'Tag';
break;
default:
throw new InvalidArgumentException(sprintf("Unkown Type '%s'.", $type));
}
/* @var $node TagsoupNode */
$class = 'Tagsoup' . $typeName;
$node = new $class($type, $offset, $length, $string);
return $node;
}
public function getType() {
return $this->type;
}
public function getStart() {
return $this->offset;
}
public function getEnd() {
return $this->offset + $this->length;
}
public function getLength() {
return $this->length;
}
public function __toString() {
return $this->string;
}
}
class TagsoupTag extends TagsoupNode
{
public function getTagName() {
preg_match('/^<\/?((?:[a-z]+:)?[a-z]+)/is', $this->string, $m);
return $m[1];
}
public function getTagNsPrefix() {
$tagName = $this->getTagName();
if (false === $pos = strpos($tagName, ':'))
return FALSE;
return substr($tagName, 0, $pos);
}
}
class TagsoupIterator implements Iterator
{
private $string;
private $startOffset;
private $offset;
private $index;
public function __construct($string, $startOffset = 0) {
$this->string = $string;
$this->offset = $this->startOffset = $startOffset;
$this->index = 0;
}
public function getOffset() {
return $this->offset;
}
/**
* @return mixed|TagsoupNode
*/
public function current() {
$result = Tagsoup::parseAt($this->string, $this->offset);
return TagsoupNode::createFromArray($result);
}
public function next() {
$this->offset = $this->current()->getEnd();
$this->index++;
}
public function key() {
return $this->index;
}
public function valid() {
return (bool)Tagsoup::parseAt($this->string, $this->offset);
}
public function rewind() {
$this->offset = $this->startOffset;
$this->index = 0;
}
}
// $test = Tagsoup::parseAt($string, 0);
// var_dump($test, tagsoupNode::createFromArray($test));
class TagsoupForwardNavigator extends IteratorIterator
{
private $it;
public function __construct(TagsoupIterator $it) {
$it->rewind();
$this->it = $it;
parent::__construct(new NoRewindIterator($it));
}
/**
* @param $name
* @return TagsoupNode
*/
public function nextStartTag($name) {
foreach ($this as $node) {
if ($node->getType() === Tagsoup::NODETYPE_STARTTAG && $node->getTagName() === $name) {
return $node;
}
}
}
/**
* @param $name
* @return TagsoupNode[]
*/
public function getUntilEndTag($name) {
$result = [];
foreach ($this as $node) {
if ($node->getType() === Tagsoup::NODETYPE_ENDTAG && $node->getTagName() === $name) {
break;
}
$result[] = $node;
}
return $result;
}
public function nextCondition(callable $condition) {
foreach ($this as $node) {
if ($condition($node, $this)) return $node;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment