Skip to content

Instantly share code, notes, and snippets.

@victorono
Created December 7, 2016 01:25
Show Gist options
  • Save victorono/d465d033e535e33aa836ab5ad3d67ef0 to your computer and use it in GitHub Desktop.
Save victorono/d465d033e535e33aa836ab5ad3d67ef0 to your computer and use it in GitHub Desktop.
returns a string with accent to REGEX expression to find any combinations in accent insentive way
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# import re
from unicodedata import normalize
DIACRITICS_REPLACEMENTS = {
'a': 'àáâãäåæåāa',
'e': 'èéêëẽėęe',
'i': 'ìíîïĩīįi',
'o': 'ðòóôõöøœōo',
'u': 'ùúûüµūu',
'c': 'çćčc',
'n': 'ñńn',
's': 'ßšśs',
'y': 'ýÿ¥y',
'z': 'žźżz'
}
def insensitive_case(string):
"""
Case and accent insensitive searchs
"""
regex = ''
string = normalize('NFKD', unicode(string)).encode('ascii', 'ignore')
string = string.lower()
for char in string:
for key, value in DIACRITICS_REPLACEMENTS.items():
if char in value:
regex += '[{}]'.format(value)
break
else:
if char in DIACRITICS_REPLACEMENTS:
regex += '[{}]'.format(DIACRITICS_REPLACEMENTS[char])
else:
regex += char
# result = re.compile(regex, re.IGNORECASE)
return regex
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment