Skip to content

Instantly share code, notes, and snippets.

@xeoncross
Created February 4, 2013 22:28
Show Gist options
  • Star 11 You must be signed in to star a gist
  • Fork 6 You must be signed in to fork a gist
  • Save xeoncross/4710324 to your computer and use it in GitHub Desktop.
Save xeoncross/4710324 to your computer and use it in GitHub Desktop.
PHP: Nested Parenthesis Parser
<?php
// @rodneyrehm
// http://stackoverflow.com/a/7917979/99923
class ParensParser
{
// something to keep track of parens nesting
protected $stack = null;
// current level
protected $current = null;
// input string to parse
protected $string = null;
// current character offset in string
protected $position = null;
// start of text-buffer
protected $buffer_start = null;
public function parse($string)
{
if (!$string) {
// no string, no data
return array();
}
if ($string[0] == '(') {
// killer outer parens, as they're unnecessary
$string = substr($string, 1, -1);
}
$this->current = array();
$this->stack = array();
$this->string = $string;
$this->length = strlen($this->string);
// look at each character
for ($this->position=0; $this->position < $this->length; $this->position++) {
switch ($this->string[$this->position]) {
case '(':
$this->push();
// push current scope to the stack an begin a new scope
array_push($this->stack, $this->current);
$this->current = array();
break;
case ')':
$this->push();
// save current scope
$t = $this->current;
// get the last scope from stack
$this->current = array_pop($this->stack);
// add just saved scope to current scope
$this->current[] = $t;
break;
/*
case ' ':
// make each word its own token
$this->push();
break;
*/
default:
// remember the offset to do a string capture later
// could've also done $buffer .= $string[$position]
// but that would just be wasting resources…
if ($this->buffer_start === null) {
$this->buffer_start = $this->position;
}
}
}
return $this->current;
}
protected function push()
{
if ($this->buffer_start !== null) {
// extract string from buffer start to current position
$buffer = substr($this->string, $this->buffer_start, $this->position - $this->buffer_start);
// clean buffer
$this->buffer_start = null;
// throw token into current scope
$this->current[] = $buffer;
}
}
}
$string = '(TOP (S (NP (PRP I)) (VP (VBP love) (NP (NP (DT a) (JJ big) (NN bed)) (PP (IN of) (NP (NNS roses))))) (. .)))';
$p = new ParensParser();
$result = $p->parse($string);
var_dump($result);
@trino
Copy link

trino commented Sep 3, 2015

thank you.

@mstaack
Copy link

mstaack commented Nov 22, 2017

awesome! thanks

@perfunke
Copy link

perfunke commented Jul 3, 2018

Exactly what I need! [
Extracting affiliations from scientific publications like
"Palmén, Rachel (Universitat Oberta de Catalunya (UOC) (Open University of Catalonia), Barcelona, Spain)" ]
Best,
per funke at gmail dot com

@drupol
Copy link

drupol commented Dec 16, 2018

Alternatively, you can use:

    /**
     * Parse a string into an array.
     *
     * @param string $subject
     *   The subject string.
     *
     * @return array|bool
     *   The array.
     */
    private function parse(string $subject)
    {
        $result = [];

        \preg_match_all('~[^\[\]]+|\[(?<nested>(?R)*)\]~', $subject, $matches);

        foreach (\array_filter($matches['nested']) as $match) {
            $item = [];
            $position = \strpos($match, '[');

            if (false !== $position) {
                $item['value'] = \substr($match, 0, $position);
            } else {
                $item['value'] = $match;
            }

            if ([] !== $children = $this->parse($match)) {
                $item['children'] = $children;
            }

            $result[] = $item;
        }

        return $result;
    }

This has been used in phptree.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment