Skip to content

Instantly share code, notes, and snippets.

@kamchy
Last active May 26, 2021 08:03
Show Gist options
  • Save kamchy/06fe4b4d1dd44d9647016406aa389f6a to your computer and use it in GitHub Desktop.
Save kamchy/06fe4b4d1dd44d9647016406aa389f6a to your computer and use it in GitHub Desktop.
import java.util.Arrays;
import java.util.List;
import java.util.stream.IntStream;
import java.util.stream.Collectors;
public class UnicodePlay{
static void fmt(String fmt, Object... rest) {
System.out.format(fmt, rest);
}
static String formattedCodePoint(int codePoint) {
return String.format("U+%04X", codePoint);
}
static String formattedCodePoints(String str) {
return str.codePoints()
.mapToObj(UnicodePlay::formattedCodePoint)
.collect(Collectors.joining(" "));
}
static void checkUnicode(String str) {
var codePointsCount = Character.codePointCount(str, 0, str.length());
fmt("%n%nChecking string %s%n", str);
fmt("String length is: %s%n", str.length());
fmt("Code point count is: %s%n", codePointsCount);
fmt("List of code points: %s%n", formattedCodePoints(str));
var header = "|%3s|%4s|%8X|%5s|%5s|%16s|%n";
var strheader ="|%3s|%4s|%8s|%5s|%5s|%16s|%n";
fmt(strheader, "idx", "znak", "int", "is hs", "is ls", "codepointAt(idx)");
IntStream.range(0, str.length())
.forEach(i ->
fmt(header,
i,
Character.toString(str.codePointAt(i)),
(int) str.charAt(i),
Character.isHighSurrogate(str.charAt(i)),
Character.isLowSurrogate(str.charAt(i)),
formattedCodePoint(str.codePointAt(i))));
fmt ("Iterate over chars%n");
str.chars().forEach(i -> fmt("%04X ", i));
}
public static void main(String[] args) {
var strings = List.of(
"👩‍❤️‍💋‍👨",
"道",
"zażółć",
"👨‍🍼");
strings.forEach(UnicodePlay::checkUnicode);
}
}
@kamchy
Copy link
Author

kamchy commented May 26, 2021

Looking at Emojis, code points and related Java Api

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment