This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
try { | |
var tokens = JSON.parse(botContext.getBotVariable("UW_api.tokens")); | |
botContext.printDebugMessage("TOKEN LENGTH: " + tokens.length); | |
var token_confidence_threshold = 0.8; | |
var vulnerabilities = ["mental_health", "young_children"]; | |
botContext.printDebugMessage("VULN " + vulnerabilities); | |
for (let i = 0; i < tokens.length; i++) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Deploy the model to SageMaker hosting service. | |
# You can provide the number of instances and the type of hosting instance. | |
# In this example we are creating a hosting endpoint with 1 instance of type ml.m5.large | |
# (note that this hosting instance does not have a GPU). | |
predictor = estimator.deploy(1, | |
'ml.m5.large', | |
endpoint_name='bert-toxic-comments', | |
update_endpoint=True, | |
serializer=json_serializer) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Construct the ECR image location | |
account = session.boto_session.client('sts').get_caller_identity()['Account'] | |
region = session.boto_session.region_name | |
image = "{}.dkr.ecr.{}.amazonaws.com/sagemaker-bert:1.0-gpu-py36".format(account, region) | |
# Construct the output path for storage of model | |
output_path = "s3://{}/{}".format(bucket, prefix_output) | |
# Create the estimator | |
estimator = sagemaker.estimator.Estimator(image, # ECR image arn |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a helper feature to upload data | |
# from your local machine to S3 bucket. | |
s3_input = session.upload_data(DATA_PATH, bucket=bucket , key_prefix=prefix) | |
session.upload_data(str(DATA_PATH/'labels.csv'), bucket=bucket , key_prefix=prefix) | |
session.upload_data(str(DATA_PATH/'train.csv'), bucket=bucket , key_prefix=prefix) | |
session.upload_data(str(DATA_PATH/'val.csv'), bucket=bucket , key_prefix=prefix) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
training_config = { | |
"run_text": "toxic comments", | |
"finetuned_model": None, | |
"do_lower_case": "True", | |
"train_file": "train.csv", | |
"val_file": "val.csv", | |
"label_file": "labels.csv", | |
"text_col": "comment_text", | |
"label_col": '["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]', | |
"multi_label": "True", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hyperparameters = { | |
"epochs": 10, | |
"lr": 8e-5, | |
"max_seq_length": 512, | |
"train_batch_size": 16, | |
"lr_schedule": "warmup_cosine", | |
"warmup_steps": 1000, | |
"optimizer_type": "adamw" | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# location for train.csv, val.csv and labels.csv | |
DATA_PATH = Path("../data/") | |
# Location for storing training_config.json | |
CONFIG_PATH = DATA_PATH/'config' | |
CONFIG_PATH.mkdir(exist_ok=True) | |
# S3 bucket name | |
bucket = 'sagemaker-deep-learning' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sagemaker | |
from pathlib import Path | |
from sagemaker.predictor import json_serializer | |
import json | |
# Get the sagemaker execution role and the session | |
role = sagemaker.get_execution_role() | |
session = sagemaker.Session() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fast_bert.prediction import BertClassificationPredictor | |
predictor = BertClassificationPredictor(model_path=MODEL_PATH, pretrained_path=BERT_PRETRAINED_PATH, | |
label_path=LABEL_PATH, multi_label=False) | |
# Single prediction | |
single_prediction = predictor.predict("just get me result for this text") | |
# Batch predictions | |
texts = [ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
texts = [ | |
"this is the first text", | |
"this is the second text" | |
] | |
predictions = learner.predict_batch(texts) |
NewerOlder