Skip to content

Instantly share code, notes, and snippets.

@fhardison
Last active January 11, 2023 01:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fhardison/da88c906c8c0b660c651511798aa2e8b to your computer and use it in GitHub Desktop.
Save fhardison/da88c906c8c0b660c651511798aa2e8b to your computer and use it in GitHub Desktop.
from pathlib import Path
from greek_normalisation.utils import nfc
from greek_accentuation.characters import add_breathing, Breathing,Accent, add_diacritic
import re
import sys
FIND = [x for x in "α ε η υ ο ω i".split(' ') if x]
DIPTH = [x for x in "ει αι οι ου αυ ευ ηυ".split(' ') if x]
def create_replacement(ilist, ilist2, accent):
a = [add_diacritic(x, accent) for x in ilist]
di= [x[0] + add_diacritic(x[1], accent) for x in ilist2]
one= [(x, add_breathing(x, Breathing.PSILI)) for x in a]
two = [(x, 'DIPTHDIPTH' + x[0] + add_breathing(x[1], Breathing.PSILI)) for x in di]
return one, two
accute, di_accute = create_replacement(FIND, DIPTH, Accent.ACUTE)
circumflex, di_circumflex = create_replacement(FIND, DIPTH, Accent.CIRCUMFLEX)
no_accent = [(x, add_breathing(x, Breathing.PSILI)) for x in FIND]
di_no_accent = [(x, 'DIPTHDIPTH' + x[0] + add_breathing(x[1], Breathing.PSILI)) for x in DIPTH]
DI_REPLACEMENTS = [ di_no_accent, di_circumflex, di_accute]
REPLACEMENTS = [no_accent, accute, circumflex ]
def run_replacements(text):
text = nfc(text)
for l in DI_REPLACEMENTS:
for x, y in l:
text = re.sub('\\b' + x, y, text)
for l in REPLACEMENTS:
for x, y in l:
text = re.sub('\\b' + x, y, text)
return text.replace('DIPTHDIPTH', '')
text = Path(sys.argv[1]).read_text()
if len(sys.argv) > 2:
Path(sys.argv[2]).write_text(run_replacements(text))
print("DONE")
else:
print(run_replacements(text))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment