Skip to content

Instantly share code, notes, and snippets.

@sandeepshetty
Last active June 28, 2016 15:17
Show Gist options
  • Save sandeepshetty/5725818 to your computer and use it in GitHub Desktop.
Save sandeepshetty/5725818 to your computer and use it in GitHub Desktop.
Extracting machine tags from a string.
<?php
// Namespace and predicate start with a letter or underscore,
// followed by any number of letters, numbers, or underscores.
// If value contains spaces, it needs to be wrapped in quotes.
// Literal quotes within quotes need to be escaped using a backslash (\)
$test_cases='
#repost
#repost:url=http://example.com/post/2
#repost:content=\'Hello World.
Did I hear you say "Hello"
How are you doing \\\'Foobar\\\'?\'
#repost:published="3 hrs \'ago\'"
#repost:author="Foo \\"Baz\\" Bar"
#repost:author_url=http://example.com/foobar/
#repost:author_photo=http://example.com/foobar/photo
Non-machine-tag content
#machinetags
';
function extract_machine_tags($str)
{
preg_match_all('/#(?P<namespace>[a-zA-Z_][a-zA-Z0-9_]+):(?P<predicate>[a-zA-Z_][a-zA-Z0-9_]+)=(["\'])?(?P<value>(?(3)(?:(?:(?!\3|\\\\).|\\\\\\3)*)|(?:[^\s]+)))(?(3)\\3)/s', $str, $matches);
$stripslashes = function (&$value, $key) {
$value = stripslashes($value);
};
array_walk($matches['value'], $stripslashes);
array_walk($matches[4], $stripslashes);
$matches['machinetags'] = $matches[0];
foreach($matches as $key=>$val) if (is_int($key)) unset($matches[$key]);
return $matches;
}
print_r($test_cases);
print_r(extract_machine_tags($test_cases));
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment