Skip to content

Instantly share code, notes, and snippets.

@num8er
Last active November 27, 2019 07:18
Show Gist options
  • Save num8er/20ce54a99284478c4ccdc06377d3d64d to your computer and use it in GitHub Desktop.
Save num8er/20ce54a99284478c4ccdc06377d3d64d to your computer and use it in GitHub Desktop.
Matching substrings between wrappers (with unicode chars in input)
<?php
function matchStringsBetweenWrappers($string, $start, $end) {
// preserving symbols from being removed
$preserveDashPlaceholder = md5($string.'1');
$string = str_replace('-', $preserveDashPlaceholder, $string);
$preserveQuotePlaceholder = md5($string.'2');
$string = str_replace("'", $preserveQuotePlaceholder, $string);
$preserveWildcardPlaceholder = md5($string.'3');
$string = str_replace('*', $preserveWildcardPlaceholder, $string);
$preserveExclamationPlaceholder = md5($string.'4');
$string = str_replace('!', $preserveExclamationPlaceholder, $string);
$preserveDQuotePlaceholder = md5($string.'5');
$string = str_replace('"', $preserveDQuotePlaceholder, $string);
$preserveQuotPlaceholder = md5($string.'6');
$string = str_replace('&quot;', $preserveQuotPlaceholder, $string);
$preserveANDPlaceholder = md5($string.'7');
$string = str_replace('&', $preserveANDPlaceholder, $string);
// cleaning string from non alpha numeric chars
$string = trim(preg_replace('/\W/', ' ', $string));
// replacing multiple whitespaces with single
$string = preg_replace(array('/\s{2,}/', '/[\t\n]/'), ' ', $string);
// putting symbols to placeholder
$string = str_replace($preserveDashPlaceholder, '-', $string);
$string = str_replace($preserveQuotePlaceholder, "'", $string);
$string = str_replace($preserveWildcardPlaceholder, '*', $string);
$string = str_replace($preserveExclamationPlaceholder, '!', $string);
$string = str_replace($preserveDQuotePlaceholder, '"', $string);
$string = str_replace($preserveQuotPlaceholder, '&quot;', $string);
$string = str_replace($preserveANDPlaceholder, '&', $string);
// injecting some symbol (any) between words to delimit them
$string = ' '.implode(' ', explode(' ', $string)).' ';
//print "\n\nCleaned string: \"".$string."\"\n\n";
preg_match_all('/ '.$start.'(.*?)'.$end.' /ui', $string, $matches);
array_walk($matches[0], function(&$item) {
$item = preg_replace(array('/\s{2,}/', '/[\t\n]/'), ' ', $item);
$item = trim($item);
});
array_walk($matches[1], function(&$item) {
$item = preg_replace(array('/\s{2,}/', '/[\t\n]/'), ' ', $item);
$item = trim($item);
});
return $matches;
}
$string = 'Some text N_abc_New_New_New_N other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@';
$matches = matchStringsBetweenWrappers($string, 'N_', '_N');
var_dump($matches);
$string = "في الصيف الماضي ، أنشأ N_Lego_N N_Lego_New_N مجموعة ذات سمة N_Friends_n.";
$matches = matchStringsBetweenWrappers($string, 'N_', '_N');
var_dump($matches);
$string = "N_Lego_New_N N_New_Friends_New_N N_Lego_New_N ";
$matches = matchStringsBetweenWrappers($string, 'N_', '_N');
var_dump($matches);
$string = 'Some N_Mercedes-Benz_N, N_Chick-fil-A_N text N_abc_New_New_New_N other text N_ghi_jkl_N and other text:;.#{}()[]N_McDonald\'s*!&HP_"hey"_&quot;A&quot;_N.!@ N_"Actions Speak Louder Than Words"_N';
$matches = matchStringsBetweenWrappers($string, 'N_', '_N');
var_dump($matches);
array(2) {
[0]=>
array(3) {
[0]=>
string(19) "N_abc_New_New_New_N"
[1]=>
string(11) "N_ghi_jkl_N"
[2]=>
string(11) "N_abc_New_N"
}
[1]=>
array(3) {
[0]=>
string(15) "abc_New_New_New"
[1]=>
string(7) "ghi_jkl"
[2]=>
string(7) "abc_New"
}
}
array(2) {
[0]=>
array(3) {
[0]=>
string(8) "N_Lego_N"
[1]=>
string(12) "N_Lego_New_N"
[2]=>
string(11) "N_Friends_n"
}
[1]=>
array(3) {
[0]=>
string(4) "Lego"
[1]=>
string(8) "Lego_New"
[2]=>
string(7) "Friends"
}
}
array(2) {
[0]=>
array(3) {
[0]=>
string(12) "N_Lego_New_N"
[1]=>
string(19) "N_New_Friends_New_N"
[2]=>
string(12) "N_Lego_New_N"
}
[1]=>
array(3) {
[0]=>
string(8) "Lego_New"
[1]=>
string(15) "New_Friends_New"
[2]=>
string(8) "Lego_New"
}
}
array(2) {
[0]=>
array(6) {
[0]=>
string(17) "N_Mercedes-Benz_N"
[1]=>
string(15) "N_Chick-fil-A_N"
[2]=>
string(19) "N_abc_New_New_New_N"
[3]=>
string(11) "N_ghi_jkl_N"
[4]=>
string(39) "N_McDonald's*!&HP_"hey"_&quot;A&quot;_N"
[5]=>
string(37) "N_"Actions Speak Louder Than Words"_N"
}
[1]=>
array(6) {
[0]=>
string(13) "Mercedes-Benz"
[1]=>
string(11) "Chick-fil-A"
[2]=>
string(15) "abc_New_New_New"
[3]=>
string(7) "ghi_jkl"
[4]=>
string(35) "McDonald's*!&HP_"hey"_&quot;A&quot;"
[5]=>
string(33) ""Actions Speak Louder Than Words""
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment