Skip to content

Instantly share code, notes, and snippets.

@Rican7
Created March 10, 2016 19:01
Show Gist options
  • Save Rican7/b3d4f15dd429b6900fa2 to your computer and use it in GitHub Desktop.
Save Rican7/b3d4f15dd429b6900fa2 to your computer and use it in GitHub Desktop.
Unicode rune handling in multiple languages
package main
import (
"fmt"
"unicode/utf8"
)
const strangeMessage = "TᕼIᔕ ᗰᕮᔕᔕᗩGᕮ Iᔕ ᑭᖇOTᕮᑕTᕮᗪ ᗷY ᗩ ᔕᑭᕮᑕIᗩᒪ ᖴOᑎT. IT'ᔕ IᗰᑭOᔕᔕIᗷᒪᕮ TO ᑕOᑭY IT. TᖇY ᗩᑎᗪ YOᑌ ᗯIᒪᒪ ᖴᗩIᒪ"
func main() {
fmt.Println(strangeMessage + "\n")
for _, rune := range strangeMessage {
fmt.Printf(
"Char: %1q\t\tCode-point: %4d\t\tSize: %1d\t\tValid: %t\n",
string(rune),
rune,
utf8.RuneLen(rune),
utf8.ValidRune(rune),
)
}
}
<?php
const STRANGE_MESSAGE = "TᕼIᔕ ᗰᕮᔕᔕᗩGᕮ Iᔕ ᑭᖇOTᕮᑕTᕮᗪ ᗷY ᗩ ᔕᑭᕮᑕIᗩᒪ ᖴOᑎT. IT'ᔕ IᗰᑭOᔕᔕIᗷᒪᕮ TO ᑕOᑭY IT. TᖇY ᗩᑎᗪ YOᑌ ᗯIᒪᒪ ᖴᗩIᒪ";
echo STRANGE_MESSAGE . PHP_EOL;
for ($i = 0; $i < mb_strlen(STRANGE_MESSAGE); $i++) {
$char = mb_substr(STRANGE_MESSAGE, $i, 1);
printf(
"Char: %1s\t\tCode-point: %4d\t\tSize: %1d\t\tValid: %s\n",
$char,
IntlChar::ord($char),
strlen($char),
IntlChar::getIntPropertyValue($char, IntlChar::PROPERTY_INVALID_CODE) ? 'false' : true
);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment