Skip to content

Instantly share code, notes, and snippets.

@amadanmath
Last active August 28, 2018 03:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amadanmath/77d9bae747268f97eab2e22e7cd0a364 to your computer and use it in GitHub Desktop.
Save amadanmath/77d9bae747268f97eab2e22e7cd0a364 to your computer and use it in GitHub Desktop.
Using langdetect with a restricted set of languages
from langdetect.detector_factory import DetectorFactory, PROFILES_DIRECTORY
import os
def get_factory_for(langs):
df = DetectorFactory()
profiles = []
for lang in ['en', 'ru', 'pl']:
with open(os.path.join(PROFILES_DIRECTORY, lang), 'r', encoding='utf-8') as f:
profiles.append(f.read())
df.load_json_profile(profiles)
def _detect_langs(text):
d = df.create()
d.append(text)
return d.get_probabilities()
def _detect(text):
d = df.create()
d.append(text)
return d.detect()
df.detect_langs = _detect_langs
df.detect = _detect
return df
from langdetect_restricted import get_factory_for
df = get_factory_for(['en', 'ru', 'pl'])
df.detect('today') # 'en'
df.detect_langs('today') # [en:0.9999988994459187]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment