Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
ctype_lower misditect non-lower characters on Mac OSX 10.8.
<?php
$expected = [];
$result = [];
for ($i = 0; $i <= 0xFF; ++$i) {
setlocale(LC_ALL, 'C');
if (ctype_lower(chr($i))) {
$expected[] = $i;
}
setlocale(LC_ALL, 'en_US.UTF-8');
if (ctype_lower(chr($i))) {
$result[] = $i;
}
}
var_dump(
[] === array_diff($result, $expected)
);
#include <stdio.h>
#include <locale.h>
#include <ctype.h>
int main() {
int i;
int c;
for (i = 0; i <= 0xFF; ++i) {
setlocale(LC_ALL, "C");
if (islower(i)) {
c = i;
}
setlocale(LC_ALL, "en_US.UTF-8");
if (islower(i) && c != i) {
printf("0x%X, ", i);
}
}
printf("\n");
return 0;
}
<?php
for ($i = 0; $i < 0x10FFFF; $i += 1) {
$char = utf8_chr($i);
if (ctype_lower($char)) {
echo "{$char}, ";
}
}
function utf8_chr($code_point)
{
if ($code_point < 0 || (0xD800 <= $code_point && $code_point <= 0xDFFF) || 0x10FFFF < $code_point) {
return false;
}
$ret = str_repeat('0', 8 - strlen(dechex($code_point))).dechex($code_point);
$ret = hex2bin($ret);
return mb_convert_encoding($ret, 'UTF-8', 'UTF-32BE');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.