maxme/lang_detection.py

## lang_detection.py
from collections import defaultdict

def detect_language(scores, input_str):
    tokens = input_str.split()
    results = defaultdict(int)
    for token in tokens:
        if token in scores:
            language, score = scores[token][0]
            results[language] += score
    max_lang, max_score = max(results.items(), key=lambda x:x[1])
    return max_lang

scores = {}
scores["hello"] = [ ("english", 0.01), ("french", 0)] # already sorted by score
scores["method"] = [ ("english", 0.001), ("french", 0.001)]
scores["this"] = [ ("english", 0.1), ("french", 0)]
scores["le"] = [ ("french", 0.2), ("english", 0), ]
scores["salut"] = [ ("french", 0.01), ("english", 0), ]

test = "hello this is a me"
print(test +": " + detect_language(scores, test))
test = "salut le monde"
print(test +": " + detect_language(scores, test))
	from collections import defaultdict

	def detect_language(scores, input_str):
	tokens = input_str.split()
	results = defaultdict(int)
	for token in tokens:
	if token in scores:
	language, score = scores[token][0]
	results[language] += score
	max_lang, max_score = max(results.items(), key=lambda x:x[1])
	return max_lang

	scores = {}
	scores["hello"] = [ ("english", 0.01), ("french", 0)] # already sorted by score
	scores["method"] = [ ("english", 0.001), ("french", 0.001)]
	scores["this"] = [ ("english", 0.1), ("french", 0)]
	scores["le"] = [ ("french", 0.2), ("english", 0), ]
	scores["salut"] = [ ("french", 0.01), ("english", 0), ]

	test = "hello this is a me"
	print(test +": " + detect_language(scores, test))
	test = "salut le monde"
	print(test +": " + detect_language(scores, test))