Skip to content

Instantly share code, notes, and snippets.

@rdp
Created March 18, 2015 21:39
Show Gist options
  • Save rdp/0014de502f37abd64ffd to your computer and use it in GitHub Desktop.
Save rdp/0014de502f37abd64ffd to your computer and use it in GitHub Desktop.
@Test
public void canSplitIntoCharacterWithCombiningCharacters() throws UnsupportedEncodingException {
assert DataImageWriter.stringToCharacterWithCombiningChars("a").equals(Arrays.asList("a"));
assert DataImageWriter.stringToCharacterWithCombiningChars("ab").equals(Arrays.asList("a", "b"));
String umlautMark = new String(new byte[]{(byte)0xCC, (byte) 0x88}, "UTF-8");
String marksFirst = umlautMark + umlautMark + "a";
assert DataImageWriter.stringToCharacterWithCombiningChars(marksFirst).equals(Arrays.asList(umlautMark + umlautMark, "a"));
String charsThenMarks = "a" + umlautMark + "b";
assert DataImageWriter.stringToCharacterWithCombiningChars(charsThenMarks).equals(Arrays.asList("a" + umlautMark, "b"));
String khmerCombiningWithWidth = new String(new byte[]{(byte) 0xe1, (byte) 0x9f, (byte) 0x80}, "UTF-8");
String charsThenWeirdMarks = "a" + khmerCombiningWithWidth + "b";
assert DataImageWriter.stringToCharacterWithCombiningChars(charsThenWeirdMarks).equals(Arrays.asList("a" + khmerCombiningWithWidth, "b"));
// and lots of stuff together:
assert DataImageWriter.stringToCharacterWithCombiningChars(umlautMark + "ab" + umlautMark + " c" + khmerCombiningWithWidth).equals(Arrays.asList(umlautMark, "a", "b" + umlautMark, " ", "c" + khmerCombiningWithWidth));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment