Skip to content

Instantly share code, notes, and snippets.

@kssd
Created May 16, 2023 11:14
Show Gist options
  • Save kssd/9e1d6b98d709f3dce235e1b8121f6fd4 to your computer and use it in GitHub Desktop.
Save kssd/9e1d6b98d709f3dce235e1b8121f6fd4 to your computer and use it in GitHub Desktop.
JAVA : Check UTF8 enconded characters in a string
import java.util.*;
import java.util.stream.*;
import java.nio.charset.StandardCharsets;
public class UTF8Check {
public static final String[] utf8Strings = new String[]{
"UTF8 check",
"@!#$%^&*()_+}{[]';:/.,?><`~1234567890-=\\|"
};
public static final String[] nonUtf8Strings = new String[]{
"Héllo", // contains the character 'é' (U+00E9)
"नमस्ते", // contains the Devanagari character 'न' (U+0928)
"Привет", // contains the Cyrillic character 'р' (U+0440)
"مرحبا", // contains the Arabic character 'ح' (U+062D)
"こんにちは", // contains the Hiragana character 'に' (U+306B)
"안녕하세요", // contains the Hangul character '하' (U+D558)
"שלום", // contains the Hebrew character 'ל' (U+05DC)
"سلام", // contains the Arabic character 'م' (U+0645)
"你好", // contains the Chinese character '好' (U+597D)
"สวัสดี", // contains the Thai character 'ด' (U+0E14)
"\uD83D\uDE00" // contains a non-UTF-8 emoji (U+1F600)
};
public static boolean hasOnlyUtf8Characters(String str) {
byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
for (byte b : bytes) {
if ((b & 0xC0) == 0x80) {
return false; // Non-UTF-8 character detected
}
}
return true;
}
public static void main(String... args){
Stream.concat(
Arrays.stream(utf8Strings),
Arrays.stream(nonUtf8Strings))
.forEach((s) -> System.out.println(String.format("%-50s : has non-utf8 characters - %s", s, !hasOnlyUtf8Characters(s))));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment