Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Serverless fasttext implementation. I have a blog post on the Code For Cash blog (blog.codefor.cash) that discusses what else is needed to run fasttext in a serverless AWS lambda environment: compiling fasttext on ec2 for linux, including nltk in the root directory of the zip, etc.
import string
import re
import nltk
def normalize(text):
# remove punctuation
text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
# split into words
# nltk.data.path.append("/nltk_data")
from nltk.tokenize import word_tokenize
tokens = word_tokenize(text)
# convert to lower case
tokens = [w.lower() for w in tokens]
# remove remaining tokens that are not alphabetic
words = [word for word in tokens if word.isalnum()]
# filter out stop words
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
words = [w for w in words if not w in stop_words]
return ' '.join(words)
def find_tags(lambda_input):
job_ad_text = normalize(lambda_input)
from subprocess import Popen, PIPE, STDOUT
p = Popen(['./fasttext', 'predict-prob', 'model_alnum_gom.bin', '-', '4'], stdout=PIPE, stdin=PIPE, stderr=PIPE)
stdout_data = p.communicate(input=job_ad_text)[0]
return stdout_data.strip()
def handler(event, context):
return find_tags(event['job_ad_text'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.