Skip to content

Instantly share code, notes, and snippets.

@guyzmo
Last active December 26, 2015 18:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guyzmo/a7ce4fddba7744ddf335 to your computer and use it in GitHub Desktop.
Save guyzmo/a7ce4fddba7744ddf335 to your computer and use it in GitHub Desktop.
unicode combined class for better character counting and indexing
#!/usr/bin/env python3
import unicodedata
test_strings = [
"בְּרֵאשִׁית, בָּרָא אֱלֹהִים, אֵת הַשָּׁמַיִם, וְאֵת הָאָרֶץ",
"bête",
]
class unicomb(str):
def __init__(self, unistr):
self._real_rep = [char for char in unistr if unicodedata.combining(char) == 0]
def __len__(self):
return len(self._real_rep)
def __getitem__(self, idx):
return self._real_rep[idx]
def __setitem__(self, idx, val):
self._real_rep[idx] = val
def __str__(self):
return str(''.join(self._real_rep))
def __repr__(self):
return repr(''.join(self._real_rep))
for s in test_strings:
print("full string: '{}'".format(s))
print("string length: {}".format(len(s)))
print("real length: {}".format(len(unicomb(s))))
print("3rd character: '{}'".format(s[2]))
print("3rd real char: '{}'".format(unicomb(s)[2]))
u = unicomb(s)
u[0] = '*'
u[-1] = '*'
print("Change 1st and last chars: {}".format(u))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment