Skip to content

Instantly share code, notes, and snippets.

@andjc
Last active December 22, 2022 00:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andjc/d9a90d2f89588a76bc1d5ed1f028858c to your computer and use it in GitHub Desktop.
Save andjc/d9a90d2f89588a76bc1d5ed1f028858c to your computer and use it in GitHub Desktop.
titlecasing: using pyicu, or to_title(), a wrapper to python's inbuilt method str.title().
from icu import Locale, UnicodeString
# loc = Locale.createCanonical("haw_US")
loc = Locale("haw_US")
s1 = "ʻōlelo hawaiʻi"
s2 = "oude ijssel "
print(UnicodeString(s1).toTitle(loc))
print(UnicodeString(s2).toTitle(Locale("nl_NL")).trim())
import regex as re
def to_title(s, hyphens=False):
def slice_group(grp):
if grp.group(0)[0] == "ʻ":
return grp.group(0)[0] + grp.group(0)[1].upper() + grp.group(0)[2:].lower()
else:
return grp.group(0)[0].upper() + grp.group(0)[1:].lower()
pattern = r"[ʻ]?[\p{Alphabetic}]+([\-'·:\uA789\u2019]?[\p{Alphabetic}\p{Mn}\p{Mc}])+" if hyphens else r"[ʻ]?[\p{Alphabetic}]+(['·:\uA789\u2019]?[\p{Alphabetic}\p{Mn}\p{Mc}])+"
regexPattern = re.compile(pattern, re.I)
return regexPattern.sub(lambda grp: slice_group(grp), s)
s1 = "ʻōlelo hawaiʻi"
print(to_title(s1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment