nbertagnolli/predict_language.py

## predict_language.py
from typing import List
import os
import requests
import fasttext


def get_language(texts: List[str]) -> List[str]:
    """Predicts the languge code for each text in a list

    Args:
        texts: A list of texts for which we want to predict the language

    Returns:
        A list of two to three letter languge codes for each text.
    """
    # If the model doesn't exist download it
    if not os.path.isfile(fasttext_model_path):
        url = 'https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin'
        r = requests.get(url, allow_redirects=True)
        open('/tmp/lid.176.bin', 'wb').write(r.content)

    # Load the downloaded model into fasttext
    lang_model = fasttext.load_model(fasttext_model_path)

    # Predict the language code for each text in texts
    # Remove newlines because fasttext doesn't like them
    langs, _ = self.lang_model.predict([x.replace("\n", " ") for x in texts])

    # Extract the two character language code from the predictions.
    return [x[0].split("__")[-1] for x in langs]
	from typing import List
	import os
	import requests
	import fasttext


	def get_language(texts: List[str]) -> List[str]:
	"""Predicts the languge code for each text in a list

	Args:
	texts: A list of texts for which we want to predict the language

	Returns:
	A list of two to three letter languge codes for each text.
	"""
	# If the model doesn't exist download it
	if not os.path.isfile(fasttext_model_path):
	url = 'https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin'
	r = requests.get(url, allow_redirects=True)
	open('/tmp/lid.176.bin', 'wb').write(r.content)

	# Load the downloaded model into fasttext
	lang_model = fasttext.load_model(fasttext_model_path)

	# Predict the language code for each text in texts
	# Remove newlines because fasttext doesn't like them
	langs, _ = self.lang_model.predict([x.replace("\n", " ") for x in texts])

	# Extract the two character language code from the predictions.
	return [x[0].split("__")[-1] for x in langs]