Last active
June 18, 2019 13:31
-
-
Save haranjackson/82b44d1e3a9ca9c695134b2ac7ed75ef to your computer and use it in GitHub Desktop.
A bash script for creating a layer on AWS Lambda (Python 3.7) that includes NLTK and the Punkt sentence tokenizer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LAYER_NAME=NltkPunkt | |
REGION=us-east-1 | |
mkdir -p build/nltk_punkt/python | |
docker run -v $(pwd):/out lambci/lambda:build-python3.7 /bin/bash -c \ | |
"pip install nltk -t /out/build/nltk_punkt/python; | |
cd /out/build/nltk_punkt/python; | |
python -c \"import nltk; nltk.download('punkt', '/out/build/nltk_punkt/python')\"; | |
rm tokenizers/punkt.zip; | |
rm tokenizers/punkt/*.pickle | |
" | |
# add nltk data to path | |
INIT_FILE=build/nltk_punkt/python/nltk/__init__.py | |
LINE=$(awk '/from __future__/{ print NR}' $INIT_FILE | tail -n 1) # last line importing __future__ | |
N_LINES=$(wc -l $INIT_FILE | awk '{ print $1 }') # number of lines in file | |
S1=$(head -n $n $INIT_FILE) | |
S2=$'\nimport os; os.environ["NLTK_DATA"]="/opt/python"' | |
S3=$(tail -n $((N_LINES-LINE)) $INIT_FILE) | |
echo "$S1 $S2 $S3" > $INIT_FILE | |
cd build/nltk_punkt | |
zip -r ../../nltk_punkt.zip python/ | |
cd ../.. | |
echo deploying $LAYER_NAME to $REGION | |
aws lambda publish-layer-version --layer-name $LAYER_NAME \ | |
--region $REGION \ | |
--zip-file fileb://nltk_punkt.zip \ | |
--compatible-runtimes python3.7 | |
rm -rf build | |
rm *.zip |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment