Skip to content

Instantly share code, notes, and snippets.

@ymoslem
Last active March 13, 2022 15:53
Show Gist options
  • Save ymoslem/3bbd96fee23d3691b9b227f27fa2ab3e to your computer and use it in GitHub Desktop.
Save ymoslem/3bbd96fee23d3691b9b227f27fa2ab3e to your computer and use it in GitHub Desktop.
Runtime test of language detection libraries.
# -*- coding: utf-8 -*-
# pip3 install gdown langdetect fasttext pycld2 py3langid
import gdown
from datetime import datetime
# Download fasttext models
url = "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.ftz"
output = "lid.176.ftz"
gdown.download(url, output, quiet=False)
url = "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin"
output = "lid.176.bin"
gdown.download(url, output, quiet=False)
# Text to detect - try in different languages
text = "वह जापान से होकर भारत गया"
# Test runtime of langdetect
start = datetime.now()
from langdetect import detect
lang_code =detect(text)
print(lang_code)
end = datetime.now()
print(f"langdetect runtime is: {end - start} \n")
# Test runtime of fasttext bin model
start = datetime.now()
import fasttext
pretrained_lang_model = "lid.176.bin"
model = fasttext.load_model(pretrained_lang_model)
prediction = model.predict(text.replace("\n",""), k=1) # top 1 matching languages
lang_code = prediction[0][0][-2:]
print(lang_code)
end = datetime.now()
print(f"fasttext bin runtime is: {end - start} \n")
# Test runtime of fasttext ftz model
start = datetime.now()
import fasttext
pretrained_lang_model = "lid.176.ftz"
model = fasttext.load_model(pretrained_lang_model)
prediction = model.predict(text.replace("\n",""), k=1) # top 1 matching languages
lang_code = prediction[0][0][-2:]
print(lang_code)
end = datetime.now()
print(f"fasttext ftz runtime is: {end - start} \n")
# Test runtime of pycld2
start = datetime.now()
import pycld2 as cld2
isReliable, textBytesFound, details = cld2.detect(text)
lang_code = details[0][1]
print(lang_code)
end = datetime.now()
print(f"pycld2 runtime is: {end - start} \n")
# Test runtime of py3langid
start = datetime.now()
import py3langid as langid
lang_code = langid.classify(text)[0]
print(lang_code)
end = datetime.now()
print(f"py3langid runtime is: {end - start} \n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment