Skip to content

Instantly share code, notes, and snippets.

@LinguList
Created September 12, 2019 16:41
Show Gist options
  • Save LinguList/ce6da48112018365bccb94bcbec891b1 to your computer and use it in GitHub Desktop.
Save LinguList/ce6da48112018365bccb94bcbec891b1 to your computer and use it in GitHub Desktop.

Feature-Based Alignment Analyses with LingPy and CLTS (2)

Requirements (can all be installed with PIP)

  • lingpy
  • pyclts

Run code

python features2.py

More information

COVINGTON
yo/je
Spanish......... j o
French.......... ʒ ə
# 4.0
tu/tu
Spanish......... t u
French.......... t y
# 15.0
nosotros/nous
Spanish......... n o s o t r o s
French.......... n u - - - - - -
# 14.0
quién/qui
Spanish......... k j e n
French.......... k - i -
# 9.2
qué/quoi
Spanish......... k - e
French.......... k w a
# 9.2
todos/tous
Spanish......... t o d o s
French.......... t u - - -
# 14.0
una/une
Spanish......... u n a
French.......... y n -
# 15.0
dos/deux
Spanish......... d o s
French.......... d ø -
# 15.0
tres/troix
Spanish......... t r - e s
French.......... t r w a -
# 19.2
hombre/homme
Spanish......... o m b r e
French.......... o m - - -
# 15.0
árbol/arbre
Spanish......... a r b o l -
French.......... a r b - r ə
# 25.0
plume/plume
Spanish......... p l u m a
French.......... p l y m -
# 35.0
cabeza/cap
Spanish......... k a b e θ a
French.......... k a p - - -
# 25.0
boca/bouche
Spanish......... b o k a
French.......... b u ʃ -
# 16.0
pie/pied
Spanish......... p j e
French.......... p j e
# 25.0
corazón/coeur
Spanish......... k o r a θ o n
French.......... k ø r - - - -
# 25.0
ver/voir
Spanish......... b - e r
French.......... v w a r
# 16.0
venir/venir
Spanish......... b e n i r
French.......... v ə n i r
# 36.0
decir/dire
Spanish......... d e θ i r
French.......... d - - i r
# 21.0
pobre/pauvre
Spanish......... p o b r e
French.......... p o v r ə
# 36.0
this/dieses
English......... ð i s - -
German.......... d iː z e s
# 21.0
that/das
English......... ð æ t
German.......... d a s
# 12.0
what/was
English......... w a t
German.......... v a s
# 13.0
not/nicht
English......... n a - t
German.......... n i x t
# 19.6
long/lang
English......... l o ŋ
German.......... l a ŋ
# 24.0
man/Mann
English......... m æ n
German.......... m a n
# 24.0
flesh/Fleisch
English......... f l e ʃ
German.......... f l ai ʃ
# 34.0
blood/Blut
English......... b l ə d
German.......... b l uː t
# 34.0
feather/Feder
English......... f e ð ə r
German.......... f eː d ə r
# 36.0
hair/Haar
English......... h æ r
German.......... h aː r
# 24.0
ear/Ohr
English......... iː r
German.......... oː r
# 14.0
eye/Auge
English......... ai - -
German.......... au g ə
# 5.0
nose/Nase
English......... n ou z -
German.......... n aː z ə
# 24.0
mouth/Mund
English......... m au - θ
German.......... m u n t
# 15.6
tongue/Zunge
English......... t ə ŋ -
German.......... ʦ u ŋ ə
# 20.0
foot/Fuß
English......... f u t
German.......... f uː s
# 17.0
knee/Knie
English......... - n iː
German.......... k n iː
# 15.0
hand/Hand
English......... h æ n d
German.......... h a n t
# 34.0
hart/Herz
English......... h a r t
German.......... h e r ʦ
# 30.0
liver/Leber
English......... l i v ə r
German.......... l eː b ə r
# 35.0
and/ante
English......... æ n d -
Latin........... a n t e
# 24.0
at/at
English......... æ t
Latin........... a t
# 14.0
blow/flare
English......... b l ou - -
Latin........... f l aː r e
# 20.0
ear/auris
English......... iː - r - -
Latin........... a w r i s
# 9.2
eat/edere
English......... iː t - - -
Latin........... e d e r e
# 14.0
fish/piscis
English......... f i ʃ - - -
Latin........... p i s k i s
# 21.0
flow/flu.ere
English......... f l ou - - -
Latin........... f l u e r e
# 24.0
star/stella
English......... s t a r -
Latin........... s t eː lː a
# 28.0
full/plenus
English......... f u l - - - -
Latin........... p - l eː n u s
# 12.0
grass/gramen
English......... g r æ s - -
Latin........... g r aː m e n
# 24.0
heart/cordis
English......... h a r t - -
Latin........... k o r d i s
# 24.0
horn/cornu
English......... h o r n -
Latin........... k o r n uː
# 25.0
I/ego
English......... ai - -
Latin........... e g o
# 4.0
knee/genu
English......... - - n iː
Latin........... g e n uː
# 14.0
mother/mater
English......... m ə ð ə r
Latin........... m aː t e r
# 35.0
mountain/mons
English......... m au n t ə n
Latin........... m oː n s - -
# 26.0
new/novus
English......... n iu - - -
Latin........... n o w u s
# 19.2
name/nomen
English......... n a m e -
Latin........... n o m e n
# 0.0
one/unus
English......... w ə n - -
Latin........... - uː n u s
# 12.8
round/rotundus
English......... r au - - n d - -
Latin........... r o t u n d u s
# 27.12
sew/suere
English......... s ou - - -
Latin........... s u e r e
# 14.0
sit/sedere
English......... s i t - - -
Latin........... s eː d e r e
# 24.0
three/tres
English......... θ r iː -
Latin........... t r eː s
# 20.0
tooth/dentis
English......... t u - θ - -
Latin........... d e n t i s
# 20.0
thin/tenuis
English......... θ i n - - -
Latin........... t e n w i s
# 20.0
kiinwaawa/kenuaq
Fox............. k iː n w aː w a
Menomini........ k e n u a ʔ -
# 24.0
niina/nenah
Fox............. n iː n a -
Menomini........ n e n a h
# 29.0
naapeewa/naapeew
Fox............. n aː p eː w a
Menomini........ n aː p ɛː w -
# 40.0
waapimini/waapemen
Fox............. w aː p i m i n i
Menomini........ w aː p e m e n -
# 53.0
nameesa/nameeqs
Fox............. n a m eː - s a
Menomini........ n a m ɛː ʔ s -
# 35.2
okimaawa/okeemaaw
Fox............. o k i m aː w a
Menomini........ o k eː m aː w -
# 44.0
šišiipa/seeqsep
Fox............. ʃ iː - ʃ iː p a
Menomini........ s eː ʔ s e p -
# 33.2
ahkohkwa/ahkeeh
Fox............. a h k o h k w a
Menomini........ a h k ɛː h - - -
# 39.0
pemaatesiweni/pemaateswen
Fox............. p e m aː t e s i w e n i
Menomini........ p e m aː t e s e w e n -
# 84.0
asenya/aqsen
Fox............. a - s e n j a
Menomini........ a ʔ s ɛ n - -
# 25.2
δίδωμι/do
Old_Greek....... d i d oː m i
Latin........... - - d oː - -
# 15.0
θυγατηρ/Tochter
Old_Greek....... tʰ u g a t eː r
German.......... t o x - t ə r
# 39.2
daughter/θυγατηρ
English......... d ɔː - - t ə r
Old_Greek....... tʰ u g a t eː r
# 32.12
ager/ajras
Latin........... a g e r - -
Sanskrit........ a ʥ - r a s
# 17.0
bharāmi/φέρθ
Sanskrit........ bʱ a r aː m i
Old_Greek....... pʰ e r o - -
# 28.0
centum/ἕκατον
Latin........... - - k e n t u m
Old_Greek....... h e k a - t o n
# 24.6
centum/satəm
Latin........... k e n t u m
Iranian......... s a - t ə m
# 25.6
from pyclts.transcriptionsystem import TranscriptionSystem
from itertools import combinations
from lingpy.algorithm.cython import malign
from lingpy import *
def score_sounds(
a,
b,
features=None,
classes=None,
bipa=None
):
"""
Score sounds with Hamming distance from feature system.
"""
# load bipa object
bipa = bipa or TranscriptionSystem('bipa')
# define the features
features = features or {
"consonant": list(
bipa['t'].featuredict),
"vowel": list(
bipa['a'].featuredict),
"tone": list(
bipa['⁵⁵'].featuredict)
}
# define base score for the classes
classes = classes or {
"consonant": 1,
"vowel": 1,
"tone": 1
}
# convert sounds to transcription system
sA, sB = bipa(a+' '+b)
# check for diphthongs or clusters
if hasattr(sA, 'from_sound'):
sA = sA.from_sound
if hasattr(sB, 'from_sound'):
sB = sB.from_sound
# return -10 if classes don't match
if sA.type != sB.type:
return -10
# base score is the number of features
sim = len(features[sA.type])
# normalization factor
normalize = classes[sA.type] / sim
# return in case of identity
if a == b:
return sim * normalize
# reduce similarity in case of mismatch
for feature in features[sA.type]:
if sA.featuredict[feature] != sB.featuredict[feature]:
sim -= 1
return sim * normalize
def get_scorer(
letters,
bipa=None,
classes=None,
features=None
):
"""
Retrieve a scoring dictionary for alignment algorithms.
"""
# load bipa object
bipa = bipa or TranscriptionSystem('bipa')
# define the features
features = features or {
"consonant": list(
bipa['t'].featuredict),
"vowel": list(
bipa['a'].featuredict),
"tone": list(
bipa['⁵⁵'].featuredict)
}
# define base score for the classes
classes = classes or {
"consonant": 1,
"vowel": 1,
"tone": 1
}
scorer = {}
bipa = bipa or TranscriptionSystem('bipa')
for a, b in combinations(letters, r=2):
scorer[a, b] = scorer[b, a] = score_sounds(a, b, bipa=bipa)
scorer[a, a] = score_sounds(a, a, bipa=bipa)
scorer[b, b] = score_sounds(b, b, bipa=bipa)
return scorer
def feature_align(seqA, seqB, mode='global', gop=-1):
if mode == 'global':
align = malign.nw_align
elif mode == 'local':
align = malign.sw_align
scorer = get_scorer(list(set(seqA+seqB)))
return align(seqA, seqB, scorer, gop)
# test the aligment procedure
seqA = 'tʰ ɔ x t ɐ'.split()
seqB = 'd ɔː t ə r'.split()
almA, almB, score = feature_align(seqA, seqB)
print('\t'.join(almA))
print('\t'.join(almB))
print('{0:.2f}'.format(score))
# test the alignment on the covington package
psa = PSA('covington.psa')
scores = []
for i, (seqA, seqB) in enumerate(psa.tokens):
almA, almB, score = feature_align(seqA, seqB)
if almA == psa.alignments[i][0] and almB == psa.alignments[i][1]:
scores += [1]
else:
scores += [0]
print('{0:.2f}'.format(sum(scores)/len(scores)))
print(sum(scores))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment