Example of calculating the probability of a word, from character probabilities
 # A python example of calculating the probability of the word 'hine' # See https://discourse.mozilla.org/t/quick-heads-up-on-some-metadata-confidence-estimate-work-were-doing/40618/2 # The probability that a word is incorrect before the nth letter: def p_not_word(n, probabilities): if n == 0: return 0 else: return probabilities[n - 1] * p_not_word(n - 1, probabilities) + \ (1 - probabilities[n - 1]) # The probability of a word being correctly emitted, given the character # probabilities: def p_word(probabilities): return 1 - p_not_word(len(probabilities), probabilities) # Character probabilities for the word 'hine' (including the following space): # h i n e blank hine = [0.994121, 0.998789, 0.999938, 0.998212, 0.841081] # The probability of the word 'hine', assuming independent draws with those # probabilities, following the probability tree: p_hine = 1 - ( (1 - 0.994121) + # not h 0.994121 * (1 - 0.998789) + # h * not i 0.994121 * 0.998789 * (1 - 0.999938) + # h * i * not n 0.994121 * 0.998789 * 0.999938 * (1 - 0.998212) + # h * i * n * not e 0.994121 * 0.998789 * 0.999938 * 0.998212 * (1 - 0.841081) # h * i * n * e * not space ) # Check the manual and the recursive calculation are the same assert round(p_hine, 5) == round(p_word(hine), 5), \ 'Error in the calculation of word probabilities' # And here is the probability of the word 'hine' (0.83357883745) print(p_word(hine))
