Created
March 10, 2016 19:01
-
-
Save Rican7/b3d4f15dd429b6900fa2 to your computer and use it in GitHub Desktop.
Unicode rune handling in multiple languages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"unicode/utf8" | |
) | |
const strangeMessage = "TᕼIᔕ ᗰᕮᔕᔕᗩGᕮ Iᔕ ᑭᖇOTᕮᑕTᕮᗪ ᗷY ᗩ ᔕᑭᕮᑕIᗩᒪ ᖴOᑎT. IT'ᔕ IᗰᑭOᔕᔕIᗷᒪᕮ TO ᑕOᑭY IT. TᖇY ᗩᑎᗪ YOᑌ ᗯIᒪᒪ ᖴᗩIᒪ" | |
func main() { | |
fmt.Println(strangeMessage + "\n") | |
for _, rune := range strangeMessage { | |
fmt.Printf( | |
"Char: %1q\t\tCode-point: %4d\t\tSize: %1d\t\tValid: %t\n", | |
string(rune), | |
rune, | |
utf8.RuneLen(rune), | |
utf8.ValidRune(rune), | |
) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
const STRANGE_MESSAGE = "TᕼIᔕ ᗰᕮᔕᔕᗩGᕮ Iᔕ ᑭᖇOTᕮᑕTᕮᗪ ᗷY ᗩ ᔕᑭᕮᑕIᗩᒪ ᖴOᑎT. IT'ᔕ IᗰᑭOᔕᔕIᗷᒪᕮ TO ᑕOᑭY IT. TᖇY ᗩᑎᗪ YOᑌ ᗯIᒪᒪ ᖴᗩIᒪ"; | |
echo STRANGE_MESSAGE . PHP_EOL; | |
for ($i = 0; $i < mb_strlen(STRANGE_MESSAGE); $i++) { | |
$char = mb_substr(STRANGE_MESSAGE, $i, 1); | |
printf( | |
"Char: %1s\t\tCode-point: %4d\t\tSize: %1d\t\tValid: %s\n", | |
$char, | |
IntlChar::ord($char), | |
strlen($char), | |
IntlChar::getIntPropertyValue($char, IntlChar::PROPERTY_INVALID_CODE) ? 'false' : true | |
); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment