Created
December 7, 2016 01:25
-
-
Save victorono/d465d033e535e33aa836ab5ad3d67ef0 to your computer and use it in GitHub Desktop.
returns a string with accent to REGEX expression to find any combinations in accent insentive way
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# import re | |
from unicodedata import normalize | |
DIACRITICS_REPLACEMENTS = { | |
'a': 'àáâãäåæåāa', | |
'e': 'èéêëẽėęe', | |
'i': 'ìíîïĩīįi', | |
'o': 'ðòóôõöøœōo', | |
'u': 'ùúûüµūu', | |
'c': 'çćčc', | |
'n': 'ñńn', | |
's': 'ßšśs', | |
'y': 'ýÿ¥y', | |
'z': 'žźżz' | |
} | |
def insensitive_case(string): | |
""" | |
Case and accent insensitive searchs | |
""" | |
regex = '' | |
string = normalize('NFKD', unicode(string)).encode('ascii', 'ignore') | |
string = string.lower() | |
for char in string: | |
for key, value in DIACRITICS_REPLACEMENTS.items(): | |
if char in value: | |
regex += '[{}]'.format(value) | |
break | |
else: | |
if char in DIACRITICS_REPLACEMENTS: | |
regex += '[{}]'.format(DIACRITICS_REPLACEMENTS[char]) | |
else: | |
regex += char | |
# result = re.compile(regex, re.IGNORECASE) | |
return regex |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment