Last active
September 14, 2015 08:03
-
-
Save nineinchnick/917e644df42ccd62db5c to your computer and use it in GitHub Desktop.
Tests does search for an ascii char with strpos works in utf-8 strings.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** @link http://stackoverflow.com/questions/2748956/how-would-you-create-a-string-of-all-utf-8-characters */ | |
function unichr($i) | |
{ | |
return iconv('UCS-4LE', 'UTF-8', pack('V', $i)); | |
} | |
$codeunits = array(); | |
for ($i = 0; $i<0xD800; $i++) { | |
$codeunits[] = unichr($i); | |
} | |
for ($i = 0xE000; $i<0xFFFF; $i++) { | |
$codeunits[] = unichr($i); | |
} | |
$all = implode($codeunits); | |
file_put_contents('all_utf8.txt', $all); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function test($data, $char) { | |
$pos = 0; | |
$mbpos = 0; | |
$count = 0; | |
$mbcount = 0; | |
while (($mbpos = mb_strpos($data, $char, $mbpos + 1, 'UTF-8')) !== false) { | |
$mbcount++; | |
} | |
while (($pos = strpos($data, $char, $pos + 1)) !== false) { | |
$count++; | |
} | |
assert($mbcount === $count); | |
echo 'Tested '.mb_strlen($data, 'UTF-8').' chars and '.strlen($data).' bytes of data! Found: '.$mbcount.' occurences of '.$char.'.'."\n"; | |
} | |
$files = []; | |
if (is_dir($argv[1])) { | |
$dh = opendir($argv[1]); | |
while (($file = readdir($dh)) !== false) { | |
if (strpos($file, '.') !== 0) { | |
$files[] = $argv[1] . '/' . $file; | |
} | |
} | |
closedir($dh); | |
} else { | |
$files = [$argv[1]]; | |
} | |
foreach ($files as $file) { | |
test(file_get_contents($file), $argv[2]); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment