Created
July 23, 2017 12:52
-
-
Save PM2Ring/ddcf20d6014184b8f600bb81ff68286b to your computer and use it in GitHub Desktop.
Case-insensitive string class
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
''' Case-insensitive string class | |
A string class that uses lower-case hashing and comparisons. | |
It also uses case-insensitive comparison when compared with | |
normal `str` strings. | |
CIStr defines all 6 rich comparison methods, although only | |
the `__eq__` method is needed for dictionary and set use. | |
Written by PM 2Ring 2017.07.23 | |
''' | |
from collections import UserString | |
class CIStr(UserString): | |
''' case-insensitive string ''' | |
def __init__(self, seq=''): | |
super().__init__(seq) | |
self.low = self.data.casefold() | |
def __hash__(self): | |
return hash(self.low) | |
# This method returns a normal string rather than | |
# a CIStr in order to make comparisons faster. | |
def casefold(self): | |
return self.low | |
def __eq__(self, other): | |
return self.low == other.casefold() | |
def __ne__(self, other): | |
return self.low != other.casefold() | |
def __lt__(self, other): | |
return self.low < other.casefold() | |
def __le__(self, other): | |
return self.low <= other.casefold() | |
def __gt__(self, other): | |
return self.low > other.casefold() | |
def __ge__(self, other): | |
return self.low >= other.casefold() | |
def test_all(s): | |
s = s.lower() | |
# Make all upper / lower case combinations of `s` | |
slen = len(s) | |
cases = [] | |
for i in range(1 << slen): | |
bits = format('{0:0{1}b}'.format(i, slen)) | |
z = ''.join([c.upper() if b == '1' else c.lower() for c, b in zip(s, bits)]) | |
cases.append(CIStr(z)) | |
# Compare all the combinations in `cases`. | |
# This performs `4**slen` comparisons | |
return all(u == v for u in cases for v in cases) | |
def tests(): | |
# Test all 4**8 == 65536 pairs of upper / lower case combinations of 'abcdefgh' | |
s = 'abcdefgh' | |
print('Pairwise comparison test of', s) | |
print(test_all(s)) | |
# Make some lists of case-insensitive strings | |
a = list(map(CIStr, 'zero One TWo thrEE FoUr fiVE SIX'.split())) | |
b = list(map(CIStr, 'Zero One Two Three Four Five Six'.split())) | |
print('a', a) | |
print('b', b) | |
print(all(u == v for u, v in zip(a, b))) | |
# A set of case-insensitive strings | |
print('\nSet test') | |
ciset = set(a) | |
print(ciset) | |
print(all(u in ciset for u in a)) | |
print(all(u in ciset for u in b)) | |
newset = set(b) | |
# They should be equal | |
print(newset == ciset) | |
newset = set(a) | |
# This shouldn't change `newset` | |
newset.update(b) | |
print(newset == ciset) | |
# A dictionary with case-insensitive keys | |
print('\nDictionary test') | |
d = {CIStr(k): i for i, k in enumerate(a)} | |
print(d) | |
for k in b: | |
print(k, d[k]) | |
if __name__ == '__main__': | |
tests() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment