Last active
December 3, 2019 08:37
-
-
Save num8er/f34288c291fe5c677bf9ae577822dd12 to your computer and use it in GitHub Desktop.
WrapperMatcher (matching anything between opening and closing patterns)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class WrapperMatcher { | |
private $startPattern = 'N_'; | |
private $endPattern = '_N'; | |
/** | |
* WrapperMatcher constructor. | |
* | |
* @param string $startPattern Left pattern (opening wrapper). Example: "N_ some string _N" $startPattern = 'N_' | |
* @param string $endPattern Right pattern (closing wrapper). Example: "N_ some string _N" $startPattern = '_N' | |
*/ | |
public function __construct($startPattern, $endPattern) { | |
$startPattern = trim($startPattern); | |
if (!$startPattern) throw new Error('Invalid wrapper start pattern'); | |
$endPattern = trim($endPattern); | |
if (!$endPattern) throw new Error('Invalid wrapper start pattern'); | |
$this->startPattern = $startPattern; | |
$this->endPattern = $endPattern; | |
} | |
private function lookupPositions($searchString) { | |
$chunkSize = | |
strlen($this->startPattern) > strlen($this->endPattern) | |
? strlen($this->startPattern) | |
: strlen($this->endPattern); | |
$minChunkSize = | |
strlen($this->startPattern) > strlen($this->endPattern) | |
? strlen($this->endPattern) | |
: strlen($this->startPattern); | |
$foundPositions = []; | |
$cursor = 0; | |
do { | |
$chunk = substr($searchString, $cursor, $chunkSize); | |
if (strpos($chunk, $this->startPattern) === 0) { | |
$foundPositions[] = ['type' => 'start', 'pos' => $cursor]; | |
$cursor += $chunkSize - $minChunkSize; | |
} | |
if (strpos($chunk, $this->endPattern) === 0) { | |
$foundPositions[] = ['type' => 'end', 'pos' => $cursor]; | |
$cursor += $chunkSize - $minChunkSize; | |
} | |
$cursor++; | |
} | |
while($cursor < strlen($searchString)); | |
$previousPosition = null; | |
$nextPosition = null; | |
$positions = []; | |
foreach($foundPositions AS $i => $foundPosition) { | |
if ($foundPosition['type'] === 'start') { | |
$previousPosition = $foundPosition; | |
$nextPosition = null; | |
$c = $i + 1; | |
while ($nextPosition['type'] === 'end' || $c < sizeof($foundPositions)) { | |
if ($nextPosition) { | |
$previousPosition = $nextPosition; | |
} | |
$nextPosition = $foundPositions[$c]; | |
if ($nextPosition['type'] === 'start' || $c === sizeof($foundPositions) - 1) { | |
$positions[] = [ | |
$foundPosition['pos'], | |
$previousPosition['pos'] | |
]; | |
break; | |
} | |
$c++; | |
} | |
} | |
} | |
if ($nextPosition) { | |
$positions[sizeof($positions) - 1][1] = $nextPosition['pos']; | |
} | |
$positions = array_filter($positions, function($position) { | |
return $position[0] !== $position[1]; | |
}); | |
return $positions; | |
} | |
private function getMatches($searchString, $positions) { | |
$matches = [[], []]; | |
foreach ($positions AS $position) { | |
$matches[0][] = substr($searchString, $position[0], $position[1] - $position[0] + strlen($this->endPattern)); | |
$matches[1][] = substr($searchString, $position[0] + strlen($this->startPattern), $position[1] - $position[0] - strlen($this->startPattern)); | |
} | |
return $matches; | |
} | |
public function matchForString($searchString) { | |
if ($this->startPattern === $this->endPattern) { | |
$pattern = preg_quote($this->startPattern); | |
$regexp = '/'.$pattern.'(.*?)'.$pattern.'/u'; | |
preg_match_all($regexp, $searchString, $matches); | |
array_walk($matches[0], function(&$item) { | |
$item = trim($item); | |
}); | |
return $matches; | |
} | |
$positions = $this->lookupPositions($searchString); | |
return $this->getMatches($searchString, $positions); | |
} | |
} | |
$string = 'Some text [N]abc_New_New_New[N] other text [N]ghi_jkl[N] and other text:;.#{}()[][N]abc_New_[N].!@'; | |
$matches = (new WrapperMatcher('[N]', '[N]'))->matchForString($string); | |
var_dump($matches); | |
$string = 'Some text [N]abc_New_New_New[/N] other text [N]ghi_jkl[/N] and other text:;.#{}()[][N]abc_New_[N].!@'; | |
$matches = (new WrapperMatcher('[N]', '[/N]'))->matchForString($string); | |
var_dump($matches); | |
$string = 'Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@'; | |
$matches = (new WrapperMatcher('N_', '_N'))->matchForString($string); | |
var_dump($matches); | |
$string = "في الصيف الماضي ، أنشأ N_Lego_N N_Lego_New_N مجموعة ذات سمة N_Friends_n."; | |
$matches = (new WrapperMatcher('N_', '_N'))->matchForString($string); | |
var_dump($matches); | |
$string = "N_Lego_New_N N_New_Friends_New_N N_Lego_New_N "; | |
$matches = (new WrapperMatcher('N_', '_N'))->matchForString($string); | |
var_dump($matches); | |
$string = 'Some N_Mercedes-Benz_N, N_Chick-fil-A_N text N_abc_New_New_New_N other text N_ghi_jkl_N and other text:;.#{}()[]N_McDonald\'s*!&HP_"hey"_"A"_N.!@ N_"Actions Speak Louder Than Words"_N'; | |
$matches = (new WrapperMatcher('N_', '_N'))->matchForString($string); | |
var_dump($matches); | |
$string = 'Some N_ free escaped text "string" which has @ny S!mB0l _N'; | |
$matches = (new WrapperMatcher('N_', '_N'))->matchForString($string); | |
var_dump($matches); | |
$string = 'this is not working and like I\'m like, you know, three years later, and I\'m like sort of a king in this, in this world that I exist in. N_Ξ_N N_Я_N N_$_N I N_❤_N NY'; | |
$matches = (new WrapperMatcher('N_', '_N'))->matchForString($string); | |
var_dump($matches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
array(2) { | |
[0]=> | |
array(3) { | |
[0]=> | |
string(21) "[N]abc_New_New_New[N]" | |
[1]=> | |
string(13) "[N]ghi_jkl[N]" | |
[2]=> | |
string(14) "[N]abc_New_[N]" | |
} | |
[1]=> | |
array(3) { | |
[0]=> | |
string(15) "abc_New_New_New" | |
[1]=> | |
string(7) "ghi_jkl" | |
[2]=> | |
string(8) "abc_New_" | |
} | |
} | |
array(2) { | |
[0]=> | |
array(2) { | |
[0]=> | |
string(22) "[N]abc_New_New_New[/N]" | |
[1]=> | |
string(14) "[N]ghi_jkl[/N]" | |
} | |
[1]=> | |
array(2) { | |
[0]=> | |
string(15) "abc_New_New_New" | |
[1]=> | |
string(7) "ghi_jkl" | |
} | |
} | |
array(2) { | |
[0]=> | |
array(3) { | |
[0]=> | |
string(19) "N_abc_New_New_New_N" | |
[1]=> | |
string(11) "N_ghi_jkl_N" | |
[2]=> | |
string(11) "N_abc_New_N" | |
} | |
[1]=> | |
array(3) { | |
[0]=> | |
string(15) "abc_New_New_New" | |
[1]=> | |
string(7) "ghi_jkl" | |
[2]=> | |
string(7) "abc_New" | |
} | |
} | |
array(2) { | |
[0]=> | |
array(2) { | |
[0]=> | |
string(8) "N_Lego_N" | |
[1]=> | |
string(12) "N_Lego_New_N" | |
} | |
[1]=> | |
array(2) { | |
[0]=> | |
string(4) "Lego" | |
[1]=> | |
string(8) "Lego_New" | |
} | |
} | |
array(2) { | |
[0]=> | |
array(3) { | |
[0]=> | |
string(12) "N_Lego_New_N" | |
[1]=> | |
string(19) "N_New_Friends_New_N" | |
[2]=> | |
string(12) "N_Lego_New_N" | |
} | |
[1]=> | |
array(3) { | |
[0]=> | |
string(8) "Lego_New" | |
[1]=> | |
string(15) "New_Friends_New" | |
[2]=> | |
string(8) "Lego_New" | |
} | |
} | |
array(2) { | |
[0]=> | |
array(6) { | |
[0]=> | |
string(17) "N_Mercedes-Benz_N" | |
[1]=> | |
string(15) "N_Chick-fil-A_N" | |
[2]=> | |
string(19) "N_abc_New_New_New_N" | |
[3]=> | |
string(11) "N_ghi_jkl_N" | |
[4]=> | |
string(39) "N_McDonald's*!&HP_"hey"_"A"_N" | |
[5]=> | |
string(37) "N_"Actions Speak Louder Than Words"_N" | |
} | |
[1]=> | |
array(6) { | |
[0]=> | |
string(13) "Mercedes-Benz" | |
[1]=> | |
string(11) "Chick-fil-A" | |
[2]=> | |
string(15) "abc_New_New_New" | |
[3]=> | |
string(7) "ghi_jkl" | |
[4]=> | |
string(35) "McDonald's*!&HP_"hey"_"A"" | |
[5]=> | |
string(33) ""Actions Speak Louder Than Words"" | |
} | |
} | |
array(2) { | |
[0]=> | |
array(1) { | |
[0]=> | |
string(53) "N_ free escaped text "string" which has @ny S!mB0l _N" | |
} | |
[1]=> | |
array(1) { | |
[0]=> | |
string(49) " free escaped text "string" which has @ny S!mB0l " | |
} | |
} | |
array(2) { | |
[0]=> | |
array(4) { | |
[0]=> | |
string(6) "N_Ξ_N" | |
[1]=> | |
string(6) "N_Я_N" | |
[2]=> | |
string(5) "N_$_N" | |
[3]=> | |
string(7) "N_❤_N" | |
} | |
[1]=> | |
array(4) { | |
[0]=> | |
string(2) "Ξ" | |
[1]=> | |
string(2) "Я" | |
[2]=> | |
string(1) "$" | |
[3]=> | |
string(3) "❤" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment