Last active
February 7, 2024 03:49
-
-
Save wesslen/d530dbef2ed9815f6c749d01e12880b4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# vs code create folder | |
# check python versions | |
python -v # make sure 3.9-3.12 | |
pip -v | |
# create venv | |
python -m virtualenv venv | |
source venv/bin/activate | |
# install prodigy, using Prodigy Key as an environmental variable | |
export PRODIGY_KEY = xxxx-xxxx-xxxx-xxxx | |
python -m pip install prodigy -f https://$PRODIGY_KEY@download.prodi.gy | |
python -m prodigy stats | |
# ngrok website install: https://ngrok.com/download | |
python -m pip install ngrok | |
python -m ngrok config add-authtoken <token> | |
# add canvas files into data/ | |
python ner.manual cooking_manual blank:en data/unlabeled.jsonl --label DISH,INGREDIENT,EQUIPMENT | |
# optional step if you want to share local app to someone | |
(New terminal) $ ngrok http 8080 | |
# train | |
python -m prodigy train —-ner cooking_manual —output ./output | |
# correct other unlabeled | |
python -m ner.correct cooking_correct ./output/model-best ./data/unlabeled.jsonl --label DISH,INGREDIENT,EQUIPMENT | |
# combine datasets | |
python -m prodigy db.merge cooking_manual,cooking_correct cooking_train | |
# check how label progressing | |
python -m prodigy train-curve —-ner cooking_train --label-stats | |
# retrain | |
python -m prodigy train —ner cooking_train —output ./output | |
# zip file | |
zip ./output/model-best | |
# install setuptools and wheel | |
pip install -U pip setuptools wheel | |
# upload to HF, huggingface.co/settings/tokens | |
huggingface-cli login | |
python -m spacy package ./en_ner_cooking ./output --build wheel | |
cd ./output/en_ner_cooking-0.0.0/dist | |
python -m spacy huggingface-hub push en_ner_cooking-0.0.0-py3-none-any.whl |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment