Skip to content

Instantly share code, notes, and snippets.

@PM2Ring
Created July 23, 2017 12:52
Show Gist options
  • Save PM2Ring/ddcf20d6014184b8f600bb81ff68286b to your computer and use it in GitHub Desktop.
Save PM2Ring/ddcf20d6014184b8f600bb81ff68286b to your computer and use it in GitHub Desktop.
Case-insensitive string class
#!/usr/bin/env python3
''' Case-insensitive string class
A string class that uses lower-case hashing and comparisons.
It also uses case-insensitive comparison when compared with
normal `str` strings.
CIStr defines all 6 rich comparison methods, although only
the `__eq__` method is needed for dictionary and set use.
Written by PM 2Ring 2017.07.23
'''
from collections import UserString
class CIStr(UserString):
''' case-insensitive string '''
def __init__(self, seq=''):
super().__init__(seq)
self.low = self.data.casefold()
def __hash__(self):
return hash(self.low)
# This method returns a normal string rather than
# a CIStr in order to make comparisons faster.
def casefold(self):
return self.low
def __eq__(self, other):
return self.low == other.casefold()
def __ne__(self, other):
return self.low != other.casefold()
def __lt__(self, other):
return self.low < other.casefold()
def __le__(self, other):
return self.low <= other.casefold()
def __gt__(self, other):
return self.low > other.casefold()
def __ge__(self, other):
return self.low >= other.casefold()
def test_all(s):
s = s.lower()
# Make all upper / lower case combinations of `s`
slen = len(s)
cases = []
for i in range(1 << slen):
bits = format('{0:0{1}b}'.format(i, slen))
z = ''.join([c.upper() if b == '1' else c.lower() for c, b in zip(s, bits)])
cases.append(CIStr(z))
# Compare all the combinations in `cases`.
# This performs `4**slen` comparisons
return all(u == v for u in cases for v in cases)
def tests():
# Test all 4**8 == 65536 pairs of upper / lower case combinations of 'abcdefgh'
s = 'abcdefgh'
print('Pairwise comparison test of', s)
print(test_all(s))
# Make some lists of case-insensitive strings
a = list(map(CIStr, 'zero One TWo thrEE FoUr fiVE SIX'.split()))
b = list(map(CIStr, 'Zero One Two Three Four Five Six'.split()))
print('a', a)
print('b', b)
print(all(u == v for u, v in zip(a, b)))
# A set of case-insensitive strings
print('\nSet test')
ciset = set(a)
print(ciset)
print(all(u in ciset for u in a))
print(all(u in ciset for u in b))
newset = set(b)
# They should be equal
print(newset == ciset)
newset = set(a)
# This shouldn't change `newset`
newset.update(b)
print(newset == ciset)
# A dictionary with case-insensitive keys
print('\nDictionary test')
d = {CIStr(k): i for i, k in enumerate(a)}
print(d)
for k in b:
print(k, d[k])
if __name__ == '__main__':
tests()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment