Skip to content

Instantly share code, notes, and snippets.

@num8er
Last active December 3, 2019 08:37
Show Gist options
  • Save num8er/f34288c291fe5c677bf9ae577822dd12 to your computer and use it in GitHub Desktop.
Save num8er/f34288c291fe5c677bf9ae577822dd12 to your computer and use it in GitHub Desktop.
WrapperMatcher (matching anything between opening and closing patterns)
<?php
class WrapperMatcher {
private $startPattern = 'N_';
private $endPattern = '_N';
/**
* WrapperMatcher constructor.
*
* @param string $startPattern Left pattern (opening wrapper). Example: "N_ some string _N" $startPattern = 'N_'
* @param string $endPattern Right pattern (closing wrapper). Example: "N_ some string _N" $startPattern = '_N'
*/
public function __construct($startPattern, $endPattern) {
$startPattern = trim($startPattern);
if (!$startPattern) throw new Error('Invalid wrapper start pattern');
$endPattern = trim($endPattern);
if (!$endPattern) throw new Error('Invalid wrapper start pattern');
$this->startPattern = $startPattern;
$this->endPattern = $endPattern;
}
private function lookupPositions($searchString) {
$chunkSize =
strlen($this->startPattern) > strlen($this->endPattern)
? strlen($this->startPattern)
: strlen($this->endPattern);
$minChunkSize =
strlen($this->startPattern) > strlen($this->endPattern)
? strlen($this->endPattern)
: strlen($this->startPattern);
$foundPositions = [];
$cursor = 0;
do {
$chunk = substr($searchString, $cursor, $chunkSize);
if (strpos($chunk, $this->startPattern) === 0) {
$foundPositions[] = ['type' => 'start', 'pos' => $cursor];
$cursor += $chunkSize - $minChunkSize;
}
if (strpos($chunk, $this->endPattern) === 0) {
$foundPositions[] = ['type' => 'end', 'pos' => $cursor];
$cursor += $chunkSize - $minChunkSize;
}
$cursor++;
}
while($cursor < strlen($searchString));
$previousPosition = null;
$nextPosition = null;
$positions = [];
foreach($foundPositions AS $i => $foundPosition) {
if ($foundPosition['type'] === 'start') {
$previousPosition = $foundPosition;
$nextPosition = null;
$c = $i + 1;
while ($nextPosition['type'] === 'end' || $c < sizeof($foundPositions)) {
if ($nextPosition) {
$previousPosition = $nextPosition;
}
$nextPosition = $foundPositions[$c];
if ($nextPosition['type'] === 'start' || $c === sizeof($foundPositions) - 1) {
$positions[] = [
$foundPosition['pos'],
$previousPosition['pos']
];
break;
}
$c++;
}
}
}
if ($nextPosition) {
$positions[sizeof($positions) - 1][1] = $nextPosition['pos'];
}
$positions = array_filter($positions, function($position) {
return $position[0] !== $position[1];
});
return $positions;
}
private function getMatches($searchString, $positions) {
$matches = [[], []];
foreach ($positions AS $position) {
$matches[0][] = substr($searchString, $position[0], $position[1] - $position[0] + strlen($this->endPattern));
$matches[1][] = substr($searchString, $position[0] + strlen($this->startPattern), $position[1] - $position[0] - strlen($this->startPattern));
}
return $matches;
}
public function matchForString($searchString) {
if ($this->startPattern === $this->endPattern) {
$pattern = preg_quote($this->startPattern);
$regexp = '/'.$pattern.'(.*?)'.$pattern.'/u';
preg_match_all($regexp, $searchString, $matches);
array_walk($matches[0], function(&$item) {
$item = trim($item);
});
return $matches;
}
$positions = $this->lookupPositions($searchString);
return $this->getMatches($searchString, $positions);
}
}
$string = 'Some text [N]abc_New_New_New[N] other text [N]ghi_jkl[N] and other text:;.#{}()[][N]abc_New_[N].!@';
$matches = (new WrapperMatcher('[N]', '[N]'))->matchForString($string);
var_dump($matches);
$string = 'Some text [N]abc_New_New_New[/N] other text [N]ghi_jkl[/N] and other text:;.#{}()[][N]abc_New_[N].!@';
$matches = (new WrapperMatcher('[N]', '[/N]'))->matchForString($string);
var_dump($matches);
$string = 'Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@';
$matches = (new WrapperMatcher('N_', '_N'))->matchForString($string);
var_dump($matches);
$string = "في الصيف الماضي ، أنشأ N_Lego_N N_Lego_New_N مجموعة ذات سمة N_Friends_n.";
$matches = (new WrapperMatcher('N_', '_N'))->matchForString($string);
var_dump($matches);
$string = "N_Lego_New_N N_New_Friends_New_N N_Lego_New_N ";
$matches = (new WrapperMatcher('N_', '_N'))->matchForString($string);
var_dump($matches);
$string = 'Some N_Mercedes-Benz_N, N_Chick-fil-A_N text N_abc_New_New_New_N other text N_ghi_jkl_N and other text:;.#{}()[]N_McDonald\'s*!&HP_"hey"_&quot;A&quot;_N.!@ N_"Actions Speak Louder Than Words"_N';
$matches = (new WrapperMatcher('N_', '_N'))->matchForString($string);
var_dump($matches);
$string = 'Some N_ free escaped text "string" which has @ny S!mB0l _N';
$matches = (new WrapperMatcher('N_', '_N'))->matchForString($string);
var_dump($matches);
$string = 'this is not working and like I\'m like, you know, three years later, and I\'m like sort of a king in this, in this world that I exist in. N_Ξ_N N_Я_N N_$_N I N_❤_N NY';
$matches = (new WrapperMatcher('N_', '_N'))->matchForString($string);
var_dump($matches);
array(2) {
[0]=>
array(3) {
[0]=>
string(21) "[N]abc_New_New_New[N]"
[1]=>
string(13) "[N]ghi_jkl[N]"
[2]=>
string(14) "[N]abc_New_[N]"
}
[1]=>
array(3) {
[0]=>
string(15) "abc_New_New_New"
[1]=>
string(7) "ghi_jkl"
[2]=>
string(8) "abc_New_"
}
}
array(2) {
[0]=>
array(2) {
[0]=>
string(22) "[N]abc_New_New_New[/N]"
[1]=>
string(14) "[N]ghi_jkl[/N]"
}
[1]=>
array(2) {
[0]=>
string(15) "abc_New_New_New"
[1]=>
string(7) "ghi_jkl"
}
}
array(2) {
[0]=>
array(3) {
[0]=>
string(19) "N_abc_New_New_New_N"
[1]=>
string(11) "N_ghi_jkl_N"
[2]=>
string(11) "N_abc_New_N"
}
[1]=>
array(3) {
[0]=>
string(15) "abc_New_New_New"
[1]=>
string(7) "ghi_jkl"
[2]=>
string(7) "abc_New"
}
}
array(2) {
[0]=>
array(2) {
[0]=>
string(8) "N_Lego_N"
[1]=>
string(12) "N_Lego_New_N"
}
[1]=>
array(2) {
[0]=>
string(4) "Lego"
[1]=>
string(8) "Lego_New"
}
}
array(2) {
[0]=>
array(3) {
[0]=>
string(12) "N_Lego_New_N"
[1]=>
string(19) "N_New_Friends_New_N"
[2]=>
string(12) "N_Lego_New_N"
}
[1]=>
array(3) {
[0]=>
string(8) "Lego_New"
[1]=>
string(15) "New_Friends_New"
[2]=>
string(8) "Lego_New"
}
}
array(2) {
[0]=>
array(6) {
[0]=>
string(17) "N_Mercedes-Benz_N"
[1]=>
string(15) "N_Chick-fil-A_N"
[2]=>
string(19) "N_abc_New_New_New_N"
[3]=>
string(11) "N_ghi_jkl_N"
[4]=>
string(39) "N_McDonald's*!&HP_"hey"_&quot;A&quot;_N"
[5]=>
string(37) "N_"Actions Speak Louder Than Words"_N"
}
[1]=>
array(6) {
[0]=>
string(13) "Mercedes-Benz"
[1]=>
string(11) "Chick-fil-A"
[2]=>
string(15) "abc_New_New_New"
[3]=>
string(7) "ghi_jkl"
[4]=>
string(35) "McDonald's*!&HP_"hey"_&quot;A&quot;"
[5]=>
string(33) ""Actions Speak Louder Than Words""
}
}
array(2) {
[0]=>
array(1) {
[0]=>
string(53) "N_ free escaped text "string" which has @ny S!mB0l _N"
}
[1]=>
array(1) {
[0]=>
string(49) " free escaped text "string" which has @ny S!mB0l "
}
}
array(2) {
[0]=>
array(4) {
[0]=>
string(6) "N_Ξ_N"
[1]=>
string(6) "N_Я_N"
[2]=>
string(5) "N_$_N"
[3]=>
string(7) "N_❤_N"
}
[1]=>
array(4) {
[0]=>
string(2) "Ξ"
[1]=>
string(2) "Я"
[2]=>
string(1) "$"
[3]=>
string(3) "❤"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment