Last active
November 27, 2019 07:18
-
-
Save num8er/20ce54a99284478c4ccdc06377d3d64d to your computer and use it in GitHub Desktop.
Matching substrings between wrappers (with unicode chars in input)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function matchStringsBetweenWrappers($string, $start, $end) { | |
// preserving symbols from being removed | |
$preserveDashPlaceholder = md5($string.'1'); | |
$string = str_replace('-', $preserveDashPlaceholder, $string); | |
$preserveQuotePlaceholder = md5($string.'2'); | |
$string = str_replace("'", $preserveQuotePlaceholder, $string); | |
$preserveWildcardPlaceholder = md5($string.'3'); | |
$string = str_replace('*', $preserveWildcardPlaceholder, $string); | |
$preserveExclamationPlaceholder = md5($string.'4'); | |
$string = str_replace('!', $preserveExclamationPlaceholder, $string); | |
$preserveDQuotePlaceholder = md5($string.'5'); | |
$string = str_replace('"', $preserveDQuotePlaceholder, $string); | |
$preserveQuotPlaceholder = md5($string.'6'); | |
$string = str_replace('"', $preserveQuotPlaceholder, $string); | |
$preserveANDPlaceholder = md5($string.'7'); | |
$string = str_replace('&', $preserveANDPlaceholder, $string); | |
// cleaning string from non alpha numeric chars | |
$string = trim(preg_replace('/\W/', ' ', $string)); | |
// replacing multiple whitespaces with single | |
$string = preg_replace(array('/\s{2,}/', '/[\t\n]/'), ' ', $string); | |
// putting symbols to placeholder | |
$string = str_replace($preserveDashPlaceholder, '-', $string); | |
$string = str_replace($preserveQuotePlaceholder, "'", $string); | |
$string = str_replace($preserveWildcardPlaceholder, '*', $string); | |
$string = str_replace($preserveExclamationPlaceholder, '!', $string); | |
$string = str_replace($preserveDQuotePlaceholder, '"', $string); | |
$string = str_replace($preserveQuotPlaceholder, '"', $string); | |
$string = str_replace($preserveANDPlaceholder, '&', $string); | |
// injecting some symbol (any) between words to delimit them | |
$string = ' '.implode(' ', explode(' ', $string)).' '; | |
//print "\n\nCleaned string: \"".$string."\"\n\n"; | |
preg_match_all('/ '.$start.'(.*?)'.$end.' /ui', $string, $matches); | |
array_walk($matches[0], function(&$item) { | |
$item = preg_replace(array('/\s{2,}/', '/[\t\n]/'), ' ', $item); | |
$item = trim($item); | |
}); | |
array_walk($matches[1], function(&$item) { | |
$item = preg_replace(array('/\s{2,}/', '/[\t\n]/'), ' ', $item); | |
$item = trim($item); | |
}); | |
return $matches; | |
} | |
$string = 'Some text N_abc_New_New_New_N other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@'; | |
$matches = matchStringsBetweenWrappers($string, 'N_', '_N'); | |
var_dump($matches); | |
$string = "في الصيف الماضي ، أنشأ N_Lego_N N_Lego_New_N مجموعة ذات سمة N_Friends_n."; | |
$matches = matchStringsBetweenWrappers($string, 'N_', '_N'); | |
var_dump($matches); | |
$string = "N_Lego_New_N N_New_Friends_New_N N_Lego_New_N "; | |
$matches = matchStringsBetweenWrappers($string, 'N_', '_N'); | |
var_dump($matches); | |
$string = 'Some N_Mercedes-Benz_N, N_Chick-fil-A_N text N_abc_New_New_New_N other text N_ghi_jkl_N and other text:;.#{}()[]N_McDonald\'s*!&HP_"hey"_"A"_N.!@ N_"Actions Speak Louder Than Words"_N'; | |
$matches = matchStringsBetweenWrappers($string, 'N_', '_N'); | |
var_dump($matches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
array(2) { | |
[0]=> | |
array(3) { | |
[0]=> | |
string(19) "N_abc_New_New_New_N" | |
[1]=> | |
string(11) "N_ghi_jkl_N" | |
[2]=> | |
string(11) "N_abc_New_N" | |
} | |
[1]=> | |
array(3) { | |
[0]=> | |
string(15) "abc_New_New_New" | |
[1]=> | |
string(7) "ghi_jkl" | |
[2]=> | |
string(7) "abc_New" | |
} | |
} | |
array(2) { | |
[0]=> | |
array(3) { | |
[0]=> | |
string(8) "N_Lego_N" | |
[1]=> | |
string(12) "N_Lego_New_N" | |
[2]=> | |
string(11) "N_Friends_n" | |
} | |
[1]=> | |
array(3) { | |
[0]=> | |
string(4) "Lego" | |
[1]=> | |
string(8) "Lego_New" | |
[2]=> | |
string(7) "Friends" | |
} | |
} | |
array(2) { | |
[0]=> | |
array(3) { | |
[0]=> | |
string(12) "N_Lego_New_N" | |
[1]=> | |
string(19) "N_New_Friends_New_N" | |
[2]=> | |
string(12) "N_Lego_New_N" | |
} | |
[1]=> | |
array(3) { | |
[0]=> | |
string(8) "Lego_New" | |
[1]=> | |
string(15) "New_Friends_New" | |
[2]=> | |
string(8) "Lego_New" | |
} | |
} | |
array(2) { | |
[0]=> | |
array(6) { | |
[0]=> | |
string(17) "N_Mercedes-Benz_N" | |
[1]=> | |
string(15) "N_Chick-fil-A_N" | |
[2]=> | |
string(19) "N_abc_New_New_New_N" | |
[3]=> | |
string(11) "N_ghi_jkl_N" | |
[4]=> | |
string(39) "N_McDonald's*!&HP_"hey"_"A"_N" | |
[5]=> | |
string(37) "N_"Actions Speak Louder Than Words"_N" | |
} | |
[1]=> | |
array(6) { | |
[0]=> | |
string(13) "Mercedes-Benz" | |
[1]=> | |
string(11) "Chick-fil-A" | |
[2]=> | |
string(15) "abc_New_New_New" | |
[3]=> | |
string(7) "ghi_jkl" | |
[4]=> | |
string(35) "McDonald's*!&HP_"hey"_"A"" | |
[5]=> | |
string(33) ""Actions Speak Louder Than Words"" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment