Skip to content

Instantly share code, notes, and snippets.

@curtmack
Created January 11, 2024 23:19
Show Gist options
  • Save curtmack/22e5f95870aecf89a60a4ddb57bd91cf to your computer and use it in GitHub Desktop.
Save curtmack/22e5f95870aecf89a60a4ddb57bd91cf to your computer and use it in GitHub Desktop.
Correct title case conversion in Java
import java.util.Map;
import java.util.HashMap;
public class TitleCase {
private TitleCase() {}
// Strictly speaking, this is 100% necessary for correct international behavior.
// There is no way to do this that's built into Java.
private static final Map<Integer, String> SPECIAL_MAP;
static {
SPECIAL_MAP = new HashMap<>();
SPECIAL_MAP.put( 0xDF, "\u0053\u0073"); // ß -> Ss
SPECIAL_MAP.put( 0x149, "\u02BC\u004E"); // ʼn -> ʼN
SPECIAL_MAP.put( 0x1F0, "\u004A\u030C"); // ǰ -> J̌
SPECIAL_MAP.put( 0x390, "\u0399\u0308\u0301"); // ΐ -> Ϊ́
SPECIAL_MAP.put( 0x3B0, "\u03A5\u0308\u0301"); // ΰ -> Ϋ́
SPECIAL_MAP.put( 0x587, "\u0535\u0582"); // և -> Եւ
SPECIAL_MAP.put(0x1E96, "\u0048\u0331"); // ẖ -> H̱
SPECIAL_MAP.put(0x1E97, "\u0054\u0308"); // ẗ -> T̈
SPECIAL_MAP.put(0x1E98, "\u0057\u030A"); // ẘ -> W̊
SPECIAL_MAP.put(0x1E99, "\u0059\u030A"); // ẙ -> Y̊
SPECIAL_MAP.put(0x1E9A, "\u0041\u02BE"); // ẚ -> Aʾ
SPECIAL_MAP.put(0x1F50, "\u03A5\u0313"); // ὐ -> Υ̓
SPECIAL_MAP.put(0x1F52, "\u03A5\u0313\u0300"); // ὒ -> Υ̓̀
SPECIAL_MAP.put(0x1F54, "\u03A5\u0313\u0301"); // ὔ -> Υ̓́
SPECIAL_MAP.put(0x1F56, "\u03A5\u0313\u0342"); // ὖ -> Υ̓͂
SPECIAL_MAP.put(0x1FB2, "\u1FBA\u0345"); // ᾲ -> Ὰͅ
SPECIAL_MAP.put(0x1FB4, "\u0386\u0345"); // ᾴ -> Άͅ
SPECIAL_MAP.put(0x1FB6, "\u0391\u0342"); // ᾶ -> Α͂
SPECIAL_MAP.put(0x1FB7, "\u0391\u0342\u0345"); // ᾷ -> ᾼ͂
SPECIAL_MAP.put(0x1FC2, "\u1FCA\u0345"); // ῂ -> Ὴͅ
SPECIAL_MAP.put(0x1FC4, "\u0389\u0345"); // ῄ -> Ήͅ
SPECIAL_MAP.put(0x1FC6, "\u0397\u0342"); // ῆ -> Η͂
SPECIAL_MAP.put(0x1FC7, "\u0397\u0342\u0345"); // ῇ -> ῌ͂
SPECIAL_MAP.put(0x1FD2, "\u0399\u0308\u0300"); // ῒ -> Ϊ̀
SPECIAL_MAP.put(0x1FD3, "\u0399\u0308\u0301"); // ΐ -> Ϊ́
SPECIAL_MAP.put(0x1FD6, "\u0399\u0342"); // ῖ -> Ι͂
SPECIAL_MAP.put(0x1FD7, "\u0399\u0308\u0342"); // ῗ -> Ϊ͂
SPECIAL_MAP.put(0x1FE2, "\u03A5\u0308\u0300"); // ῢ -> Ϋ̀
SPECIAL_MAP.put(0x1FE3, "\u03A5\u0308\u0301"); // ΰ -> Ϋ́
SPECIAL_MAP.put(0x1FE4, "\u03A1\u0313"); // ῤ -> Ρ̓
SPECIAL_MAP.put(0x1FE6, "\u03A5\u0342"); // ῦ -> Υ͂
SPECIAL_MAP.put(0x1FE7, "\u03A5\u0308\u0342"); // ῧ -> Ϋ͂
SPECIAL_MAP.put(0x1FF2, "\u1FFA\u0345"); // ῲ -> Ὼͅ
SPECIAL_MAP.put(0x1FF4, "\u038F\u0345"); // ῴ -> Ώͅ
SPECIAL_MAP.put(0x1FF6, "\u03A9\u0342"); // ῶ -> Ω͂
SPECIAL_MAP.put(0x1FF7, "\u03A9\u0342\u0345"); // ῷ -> ῼ͂
SPECIAL_MAP.put(0xFB00, "\u0046\u0066"); // ff -> Ff
SPECIAL_MAP.put(0xFB01, "\u0046\u0069"); // fi -> Fi
SPECIAL_MAP.put(0xFB02, "\u0046\u006C"); // fl -> Fl
SPECIAL_MAP.put(0xFB03, "\u0046\u0066\u0069"); // ffi -> Ffi
SPECIAL_MAP.put(0xFB04, "\u0046\u0066\u006C"); // ffl -> Ffl
SPECIAL_MAP.put(0xFB05, "\u0053\u0074"); // ſt -> St
SPECIAL_MAP.put(0xFB06, "\u0053\u0074"); // st -> St
SPECIAL_MAP.put(0xFB13, "\u0544\u0576"); // ﬓ -> Մն
SPECIAL_MAP.put(0xFB14, "\u0544\u0565"); // ﬔ -> Մե
SPECIAL_MAP.put(0xFB15, "\u0544\u056B"); // ﬕ -> Մի
SPECIAL_MAP.put(0xFB16, "\u054E\u0576"); // ﬖ -> Վն
SPECIAL_MAP.put(0xFB17, "\u0544\u056D"); // ﬗ -> Մխ
}
public static String convertCodePoint(int codepoint) {
if (SPECIAL_MAP.containsKey(codepoint)) {
return SPECIAL_MAP.get(codepoint);
} else {
return Character.toString(Character.toTitleCase(codepoint));
}
}
public static String convertString(String input) {
int first = Character.codePointAt(input, 0);
return new StringBuilder()
.append(convertCodePoint(first))
.append(input, Character.offsetByCodePoints(input, 0, 1), input.length())
.toString();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment