Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Tamil Spell Checker
import os
import traceback
from bloomfilter import BloomFilter
import jellyfish
edit_distance = 2
tamil_unique_words_file_path = "tamilwordlist.txt"
bloom_filter_file_path = "tamil_bloom_filter.txt"
def read_tamil_words_listfile(tamil_unique_words_file_path):
ta_words_unique = []
try:
tamil_word_file = open(tamil_unique_words_file_path, 'r')
for line in tamil_word_file:
ta_words_unique.append(line.strip())
tamil_word_file.close()
except Exception as e:
track = traceback.format_exc()
print(track)
return ta_words_unique
def read_bloom_filter_tamil_file(bloom_filter_path):
falsepositive_probability = 0.001
tamil_word_count = 2043478
bloom_tamil = ""
try:
bloom_tamil = BloomFilter(tamil_word_count,falsepositive_probability,bloom_filter_file_path)
except Exception as e:
track = traceback.format_exc()
print(track)
return bloom_tamil
def tamil_correct_spelling(word,edit_distance,ta_words_unique,bloom_tamil):
suggested_words = []
if not bloom_tamil.check(word):
for tamil_word in ta_words_unique:
if jellyfish.levenshtein_distance(tamil_word, word) < edit_distance:
suggested_words.append(tamil_word)
return suggested_words
ta_words_unique = read_tamil_words_listfile(tamil_unique_words_file_path)
bloom_tamil = read_bloom_filter_tamil_file(bloom_filter_file_path)
word = "மெலாம்"
tamil_correct_spelling(word,edit_distance,ta_words_unique,bloom_tamil)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment