Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
English to Tengwar transliterator
# This file converts English text to Tengwar, using my own personal
# preferences for transliterating Tengwar.
#
# Currently, the output that is created is intended for use with the Tengwar
# Annatar font and related font families.
#
# Example usage:
# >>> print convert("This was a triumph. I'm making a note here: huge success!")
#
# -- then paste the resulting text into a document rendered in Tengwar Annatar.
#
# Chelsea Voss, 2015
# Examples can either yield a single character, or a single character after a
# carrier
# In addition to this, some characters have multiple values, and those depend
# on the previous values
# Special characters: T for theta, D for eth
# R for pre-vowel r, S and Z for vowel-less s and z
# Q for rd, L for ld, W for wh, C for ch, K for kh, G for gh, X for sh, H for
# zh, N for ng
def dictzip(str1, str2):
output = {}
assert len(str1) == len(str2)
l = len(str1)
for i in xrange(l):
output[str1[i]] = str2[i]
return output
# So, English has two different pronunciations of 'th', and Tengwar
# distinguishes between them. TODO: use a library to determine which 'th'
# we're dealing with. In the meantime: voiced 'th' is the rare one,
# so these cases handle that.
# replace only the first instance of th
voiced_th_prefices = ['their', 'these', 'those', 'although',
'them', 'thine', 'thy', 'thou', 'there']
# replace only the second instance of th
voiced_th_special_prefices = ['thither']
# must be alone -- punctuation may extend them, but consider 'thank' -- these aren't prefixes
voiced_th_solo_prefices = ['that', 'this', 'than', 'they', 'thee', 'though']
# should have only one th apiece
voiced_th_always_safe = ['feather', 'together', 'bathing', 'bathe',
'father', 'mother', 'clothing', 'clothe',
'brother', 'weather', 'either', 'gather',
'other', 'another', 'worthy', 'rather', 'soothing',
'soothe', 'smooth', 'leather', 'tether', 'breathe',
'breathing', 'lathe', 'seethe', 'seething', 'scathe',
'scathing', 'teethe', 'teething', 'loath',
'loathing', 'neither', 'thence', 'rhythm',
'slither', 'southern', 'bother', 'altogether',
'lather', 'hither']
def replace_th(inp):
for x in voiced_th_always_safe:
if x in inp:
inp = inp.replace(x, x.replace('th', 'TH'))
for x in voiced_th_solo_prefices:
if x == inp:
inp = inp.replace('th', 'TH')
for x in voiced_th_prefices:
l = len(x)
if inp[:l] == x:
inp = inp.replace(x, x.replace('th', 'TH'))
for x in voiced_th_special_prefices:
l = len(x)
if inp[:l] == x:
inp = inp.replace(x, x.replace('th', 'TH', 2).replace('TH', 'th', 1))
return inp
punctuation = {
'.': u'-',
',': u'\xb7',
'!': u'\xc1',
'?': u'\xc0',
';': u'\xc3',
'"': u'\xbb',
'\'': u'\xb2',
'_': u'\xc2',
'-': u'\\',
'`': u'\xb1',
':': '-',
'/': u'\u203a',
'\\': u'\u203a',
'<': '*',
'>': 'I',
'[': '*',
']': 'I',
'{': '*',
'}': 'I',
'(': '*',
')': 'I',
'@': '1E',
'#': '9dE1x#',
'$': u'k\xa1',
'%': 'q6R85$1',
'^': 'z7D1R',
'&': '5#2',
'*': u'\u02c6',
'=': u'\xac',
'+': u'` \xb0',
'|': u'\xbd',
' ': ' ',
'\n': '\n',
'\t': u'\xb7-\xb7',
}
def elfify_start(inp):
import re
split_inp = re.findall(r"[\w']+|[.,!\?;\"'-_`:<>/\\\[\]\(\){}@#$%^&\*=\+| \n]", inp)
output = unicode('')
for item in split_inp:
output += unicode(elfify_token(item))
return output
def elfify_token(item):
if item in punctuation.keys():
return punctuation[item]
if item.isdigit():
return elfify_number(int(item))
item = item.replace("'","")
return elfify_word(item)
def elfify_number(num):
# TODO: implement fancy base-12 Elvish numerals
return '`````'
def elfify_word(inp):
inp = inp.lower()
if inp == '':
return inp
# Detect 'of'
if inp == 'of':
return 'W'
# Detect 'the'
if inp == 'the':
return '@'
# Detect voiced th, replace with TH
inp = replace_th(inp)
# Detect hard and soft c and g
for i in range(len(inp) - 1):
first = inp[:i]
cur = inp[i]
rest = inp[i+1:]
if cur == 'g':
if rest[0] in 'eiy':
inp = first + 'j' + rest
elif cur == 'c':
if rest[0] in 'eiy':
inp = first + 's' + rest
elif rest[0] in 'h':
inp = first + 'C' + rest # Ch
else:
inp = first + 'k' + rest
if inp[-1] == 'c':
inp = inp[:-1] + 'k'
# Detect places where we can use the pre-vowel r
for i in range(len(inp) - 1):
if inp[i] == 'r' and inp[i+1] in 'aeiouy':
inp = inp[:i] + 'R' + inp[i+1:]
# q == k
inp = inp.replace('q', 'k')
# Detect differences between consonant y (henceforth Y) and vowel y
# All ys which do not come before a vowel are consonants
# Hey, it's just like r!
for i in range(len(inp) - 1):
if inp[i] == 'y' and inp[i+1] in 'aeiou':
inp = inp[:i] + 'Y' + inp[i+1:]
# Detach the ending s if we notice one... and it's not after aiou
if len(inp) > 0 and inp[-1] == 's':
if len(inp) > 1 and inp[-2] not in 'aiou':
inp = inp[:-1]
has_trailing_s = True
else:
has_trailing_s = False
else:
has_trailing_s = False
# Detach the ending e if we notice one -- note, it must be:
# vowel THEN consonant THEN e
if len(inp) >= 3 and inp[-1] == 'e' and inp[-2] not in 'aeiouy':
inp = inp[:-1]
has_trailing_e = True
else:
has_trailing_e = False
# Elfification
if len(inp) == 0:
output = carrier
else:
output = elfify_postfix(inp)
# Detect places where we can use the not-post-vowel s and z
for i in range(len(output)-1):
# fancy S
if output[i] == 'i' and output[i+1] not in vowels:
output = output[:i] + '8' + output[i+1:]
# fancy Z
if output[i] == ',' and output[i+1] not in vowels:
output = output[:i] + 'k' + output[i+1:]
# Add the ending e if we detached it earlier
if has_trailing_e:
output = output + 'O'
# Add the ending s if we detached it earlier
if has_trailing_s:
if output[-1] in '7um8k':
output = output + u'\xc5'
elif output[-1] in 'qwertyo':
output = output + u'\xc6'
elif output[-1] in 'l9':
output = output + u'\xa5'
else:
output = output + '_'
return output
consonants = dictzip('tdnrRhpbfvmwsj--lYkg-z-',
u'125679qwertyisghjlzxn,.')
doubles = {
'sh': 'd',
'zh': 'f',
'ch': 'a',
'Ch': 'a',
'ph': 'e',
'kh': 'c',
'gh': 'v',
'wh': 'o',
'ng': 'b',
'rd': 'u',
'ld': 'm',
'th': '3',
'TH': '4', # voiced
}
vowel_series = {
'a': '#EDC',
'e': '$RFV',
'i': '%TGB',
'o': '^YHN',
'u': '&UJM',
'y': u'\xd8\xd9\xda\xdb'
}
vowels = '#EDC$RFV%TGB^YHN&UJM'
# Index into the output of vowel_series.
# For example, a 0 before an A yields #.
vowels_for_consonants = {'`': 3, '~': 3, '1': 1, 'q': 1, 'a': 2, 'z': 2, '2': 0, 'w': 0, 's': 0, 'x': 0, '3': 2, 'e': 2, 'd': 1, 'c': 1, '4': 0, 'r': 0, 'f': 0, 'v': 0, '5': 0, 't': 0, 'g': 0, 'b': 0, '6': 1, 'y': 1, 'h': 2, 'n': 2, '7': 2, 'u': 2, 'j': 0, 'm': 0, 'i': 2, ',': 2, '9': 3, 'o': 0, 'l': 2, '.': 2, }
short_carrier = '`'
carrier = short_carrier
long_carrier = '~'
def elfify_postfix(postfix):
if len(postfix) == 0:
return ''
# TODO: Actually add the appropriate character
if not postfix[0].isalpha():
return '`' + elfify_postfix(postfix[1:])
# Check whether we can apply a double -- if so, apply and recurse
for double in doubles:
l = len(double)
if postfix[:l] == double:
return doubles[double] + elfify_postfix(postfix[l:])
# Otherwise, apply the appropriate consonant or vowel placeholder
nxt = postfix[0]
postfix = postfix[1:]
# If it's a vowel: Check whether the next thing == a vowel; if so, add the carrier.
# If not, add the appropriate vowel for the consonant that's coming next.
# This requires that we first recurse, then check!
if nxt in vowel_series.keys():
if len(postfix) == 0:
next_consonant = carrier # add a carrier -- we're at the end of the word
elif postfix[0] in vowel_series.keys():
next_consonant = carrier # add a carrier -- the next thing == a vowel
else:
rest = elfify_postfix(postfix)
next_consonant = rest[0]
vowel_to_add = vowel_series[nxt][vowels_for_consonants[next_consonant]]
return next_consonant + vowel_to_add + rest[1:]
vowel_to_add = vowel_series[nxt][vowels_for_consonants[next_consonant]]
return next_consonant + vowel_to_add + elfify_postfix(postfix)
# If it's a consonant, add it!
# TODO: Maybe add a doubler ('") if the next consonant == the same thing!
if nxt in consonants.keys():
next_consonant = consonants[nxt]
return next_consonant + elfify_postfix(postfix)
if nxt == 'x':
return u'z\xe6' + elfify_postfix(postfix)
# Otherwise, raise an error!
else:
raise NotImplementedError("%s, %s" % (nxt, postfix))
# TODO: Fancy n-bars and w-bars here.
return postfix
convert = elfify_start
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment