LinguList/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Feature-Based Alignment Analyses with LingPy and CLTS (1)

Requirements (can all be installed with PIP)


pyclts
tabulate

Run code

python features.py
More information


https://calc.hypotheses.org


## features.py
from pyclts.transcriptionsystem import TranscriptionSystem
from itertools import combinations

def score_sounds(
        a,
        b,
        features=None,
        classes=None,
        bipa=None
        ):
    """
    Score sounds with Hamming distance from feature system.
    """
    # load bipa object
    bipa = bipa or TranscriptionSystem('bipa')

    # define the features
    features = features or {
        "consonant": list(
            bipa['t'].featuredict),
        "vowel": list(
            bipa['a'].featuredict),
        "tone": list(
            bipa['⁵⁵'].featuredict)
        }
    # define base score for the classes
    classes = classes or {
        "consonant": 1,
        "vowel": 1,
        "tone": 1
        }

    # convert sounds to transcription system
    sA, sB = bipa(a+' '+b)

    # check for diphthongs or clusters
    if hasattr(sA, 'from_sound'):
        sA = sA.from_sound
    if hasattr(sB, 'from_sound'):
        sB = sB.from_sound

    # return -10 if classes don't match
    if sA.type != sB.type:
        return -10

    # base score is the number of features
    sim = len(features[sA.type])

    # normalization factor
    normalize = classes[sA.type] / sim

    # return in case of identity
    if a == b:
        return sim * normalize

    # reduce similarity in case of mismatch
    for feature in features[sA.type]:
        if sA.featuredict[feature] != sB.featuredict[feature]:
            sim -= 1
    return sim * normalize


def get_scorer(
        letters,
        bipa=None,
        classes=None,
        features=None
        ):
    """
    Retrieve a scoring dictionary for alignment algorithms.
    """
    # load bipa object
    bipa = bipa or TranscriptionSystem('bipa')

    # define the features
    features = features or {
        "consonant": list(
            bipa['t'].featuredict),
        "vowel": list(
            bipa['a'].featuredict),
        "tone": list(
            bipa['⁵⁵'].featuredict)
        }
    # define base score for the classes
    classes = classes or {
        "consonant": 1,
        "vowel": 1,
        "tone": 1
        }

    scorer = {}
    bipa = bipa or TranscriptionSystem('bipa')
    for a, b in combinations(letters, r=2):
        scorer[a, b] = scorer[b, a] = score_sounds(a, b, bipa=bipa)
        scorer[a, a] = score_sounds(a, a, bipa=bipa)
        scorer[b, b] = score_sounds(b, b, bipa=bipa)

    return scorer


from tabulate import tabulate
cons = ['p', 't', 'b', 'd', 'pʰ', 'tʰ']
vows = ['a', 'e', 'i', 'o', 'u']
scorer = get_scorer(cons+vows)

matrix = [[1 for x in cons] for y in cons]
for (i, a), (j, b) in combinations(enumerate(cons), r=2):
    matrix[i][j] = matrix[j][i] = round(scorer[a, b], 2)
for i, (c, r) in enumerate(zip(cons, matrix)):
    matrix[i] = [c]+r
print(tabulate(matrix, headers=cons, tablefmt='pipe'))

matrix = [[1 for x in vows] for y in vows]
for (i, a), (j, b) in combinations(enumerate(vows), r=2):
    matrix[i][j] = matrix[j][i] = round(scorer[a, b], 2)
for i, (c, r) in enumerate(zip(vows, matrix)):
    matrix[i] = [c]+r
print(tabulate(matrix, headers=vows, tablefmt='pipe'))
	from pyclts.transcriptionsystem import TranscriptionSystem
	from itertools import combinations

	def score_sounds(
	a,
	b,
	features=None,
	classes=None,
	bipa=None
	):
	"""
	Score sounds with Hamming distance from feature system.
	"""
	# load bipa object
	bipa = bipa or TranscriptionSystem('bipa')

	# define the features
	features = features or {
	"consonant": list(
	bipa['t'].featuredict),
	"vowel": list(
	bipa['a'].featuredict),
	"tone": list(
	bipa['⁵⁵'].featuredict)
	}
	# define base score for the classes
	classes = classes or {
	"consonant": 1,
	"vowel": 1,
	"tone": 1
	}

	# convert sounds to transcription system
	sA, sB = bipa(a+' '+b)

	# check for diphthongs or clusters
	if hasattr(sA, 'from_sound'):
	sA = sA.from_sound
	if hasattr(sB, 'from_sound'):
	sB = sB.from_sound

	# return -10 if classes don't match
	if sA.type != sB.type:
	return -10

	# base score is the number of features
	sim = len(features[sA.type])

	# normalization factor
	normalize = classes[sA.type] / sim

	# return in case of identity
	if a == b:
	return sim * normalize

	# reduce similarity in case of mismatch
	for feature in features[sA.type]:
	if sA.featuredict[feature] != sB.featuredict[feature]:
	sim -= 1
	return sim * normalize


	def get_scorer(
	letters,
	bipa=None,
	classes=None,
	features=None
	):
	"""
	Retrieve a scoring dictionary for alignment algorithms.
	"""
	# load bipa object
	bipa = bipa or TranscriptionSystem('bipa')

	# define the features
	features = features or {
	"consonant": list(
	bipa['t'].featuredict),
	"vowel": list(
	bipa['a'].featuredict),
	"tone": list(
	bipa['⁵⁵'].featuredict)
	}
	# define base score for the classes
	classes = classes or {
	"consonant": 1,
	"vowel": 1,
	"tone": 1
	}

	scorer = {}
	bipa = bipa or TranscriptionSystem('bipa')
	for a, b in combinations(letters, r=2):
	scorer[a, b] = scorer[b, a] = score_sounds(a, b, bipa=bipa)
	scorer[a, a] = score_sounds(a, a, bipa=bipa)
	scorer[b, b] = score_sounds(b, b, bipa=bipa)

	return scorer


	from tabulate import tabulate
	cons = ['p', 't', 'b', 'd', 'pʰ', 'tʰ']
	vows = ['a', 'e', 'i', 'o', 'u']
	scorer = get_scorer(cons+vows)

	matrix = [[1 for x in cons] for y in cons]
	for (i, a), (j, b) in combinations(enumerate(cons), r=2):
	matrix[i][j] = matrix[j][i] = round(scorer[a, b], 2)
	for i, (c, r) in enumerate(zip(cons, matrix)):
	matrix[i] = [c]+r
	print(tabulate(matrix, headers=cons, tablefmt='pipe'))

	matrix = [[1 for x in vows] for y in vows]
	for (i, a), (j, b) in combinations(enumerate(vows), r=2):
	matrix[i][j] = matrix[j][i] = round(scorer[a, b], 2)
	for i, (c, r) in enumerate(zip(vows, matrix)):
	matrix[i] = [c]+r
	print(tabulate(matrix, headers=vows, tablefmt='pipe'))