Kaushal Trivedi kaushaltrivedi

## lp.js
try {
  var tokens = JSON.parse(botContext.getBotVariable("UW_api.tokens"));
  botContext.printDebugMessage("TOKEN LENGTH: " + tokens.length);

  var token_confidence_threshold = 0.8;
  var vulnerabilities = ["mental_health", "young_children"];

  botContext.printDebugMessage("VULN " + vulnerabilities);

  for (let i = 0; i < tokens.length; i++) {

## sagemaker_deploy.py
# Deploy the model to SageMaker hosting service.
# You can provide the number of instances and the type of hosting instance.
# In this example we are creating a hosting endpoint with 1 instance of type ml.m5.large
# (note that this hosting instance does not have a GPU).
predictor = estimator.deploy(1,
                             'ml.m5.large',
                             endpoint_name='bert-toxic-comments',
                             update_endpoint=True,
                             serializer=json_serializer)

## estimator.py
# Construct the ECR image location
account = session.boto_session.client('sts').get_caller_identity()['Account']
region = session.boto_session.region_name
image = "{}.dkr.ecr.{}.amazonaws.com/sagemaker-bert:1.0-gpu-py36".format(account, region)

# Construct the output path for storage of model
output_path = "s3://{}/{}".format(bucket, prefix_output)

# Create the estimator
estimator = sagemaker.estimator.Estimator(image,                                # ECR image arn

## upload_data.py
# This is a helper feature to upload data
# from your local machine to S3 bucket.

s3_input = session.upload_data(DATA_PATH, bucket=bucket , key_prefix=prefix)

session.upload_data(str(DATA_PATH/'labels.csv'), bucket=bucket , key_prefix=prefix)
session.upload_data(str(DATA_PATH/'train.csv'), bucket=bucket , key_prefix=prefix)
session.upload_data(str(DATA_PATH/'val.csv'), bucket=bucket , key_prefix=prefix)

## training_config.py
training_config = {
    "run_text": "toxic comments",
    "finetuned_model": None,
    "do_lower_case": "True",
    "train_file": "train.csv",
    "val_file": "val.csv",
    "label_file": "labels.csv",
    "text_col": "comment_text",
    "label_col": '["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]',
    "multi_label": "True",

## hyperparameters.py
hyperparameters = {
    "epochs": 10,
    "lr": 8e-5,
    "max_seq_length": 512,
    "train_batch_size": 16,
    "lr_schedule": "warmup_cosine",
    "warmup_steps": 1000,
    "optimizer_type": "adamw"
}

## sagemaker_locations.py
# location for train.csv, val.csv and labels.csv
DATA_PATH = Path("../data/")

# Location for storing training_config.json
CONFIG_PATH = DATA_PATH/'config'
CONFIG_PATH.mkdir(exist_ok=True)

# S3 bucket name
bucket = 'sagemaker-deep-learning'

## sagemaker_imports.py
import sagemaker
from pathlib import Path
from sagemaker.predictor import json_serializer
import json

# Get the sagemaker execution role and the session
role = sagemaker.get_execution_role()
session = sagemaker.Session()

## predictor.py
from fast_bert.prediction import BertClassificationPredictor

predictor = BertClassificationPredictor(model_path=MODEL_PATH, pretrained_path=BERT_PRETRAINED_PATH,
                                        label_path=LABEL_PATH, multi_label=False)

# Single prediction
single_prediction = predictor.predict("just get me result for this text")

# Batch predictions
texts = [

## predict_learner.py
texts = [
  "this is the first text",
  "this is the second text"
]

predictions = learner.predict_batch(texts)
	try {
	var tokens = JSON.parse(botContext.getBotVariable("UW_api.tokens"));
	botContext.printDebugMessage("TOKEN LENGTH: " + tokens.length);

	var token_confidence_threshold = 0.8;
	var vulnerabilities = ["mental_health", "young_children"];

	botContext.printDebugMessage("VULN " + vulnerabilities);

	for (let i = 0; i < tokens.length; i++) {
	# Deploy the model to SageMaker hosting service.
	# You can provide the number of instances and the type of hosting instance.
	# In this example we are creating a hosting endpoint with 1 instance of type ml.m5.large
	# (note that this hosting instance does not have a GPU).
	predictor = estimator.deploy(1,
	'ml.m5.large',
	endpoint_name='bert-toxic-comments',
	update_endpoint=True,
	serializer=json_serializer)
	# Construct the ECR image location
	account = session.boto_session.client('sts').get_caller_identity()['Account']
	region = session.boto_session.region_name
	image = "{}.dkr.ecr.{}.amazonaws.com/sagemaker-bert:1.0-gpu-py36".format(account, region)

	# Construct the output path for storage of model
	output_path = "s3://{}/{}".format(bucket, prefix_output)

	# Create the estimator
	estimator = sagemaker.estimator.Estimator(image, # ECR image arn
	# This is a helper feature to upload data
	# from your local machine to S3 bucket.

	s3_input = session.upload_data(DATA_PATH, bucket=bucket , key_prefix=prefix)

	session.upload_data(str(DATA_PATH/'labels.csv'), bucket=bucket , key_prefix=prefix)
	session.upload_data(str(DATA_PATH/'train.csv'), bucket=bucket , key_prefix=prefix)
	session.upload_data(str(DATA_PATH/'val.csv'), bucket=bucket , key_prefix=prefix)
	training_config = {
	"run_text": "toxic comments",
	"finetuned_model": None,
	"do_lower_case": "True",
	"train_file": "train.csv",
	"val_file": "val.csv",
	"label_file": "labels.csv",
	"text_col": "comment_text",
	"label_col": '["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]',
	"multi_label": "True",
	hyperparameters = {
	"epochs": 10,
	"lr": 8e-5,
	"max_seq_length": 512,
	"train_batch_size": 16,
	"lr_schedule": "warmup_cosine",
	"warmup_steps": 1000,
	"optimizer_type": "adamw"
	}
	# location for train.csv, val.csv and labels.csv
	DATA_PATH = Path("../data/")

	# Location for storing training_config.json
	CONFIG_PATH = DATA_PATH/'config'
	CONFIG_PATH.mkdir(exist_ok=True)

	# S3 bucket name
	bucket = 'sagemaker-deep-learning'
	import sagemaker
	from pathlib import Path
	from sagemaker.predictor import json_serializer
	import json

	# Get the sagemaker execution role and the session
	role = sagemaker.get_execution_role()
	session = sagemaker.Session()
	from fast_bert.prediction import BertClassificationPredictor

	predictor = BertClassificationPredictor(model_path=MODEL_PATH, pretrained_path=BERT_PRETRAINED_PATH,
	label_path=LABEL_PATH, multi_label=False)

	# Single prediction
	single_prediction = predictor.predict("just get me result for this text")

	# Batch predictions
	texts = [
	texts = [
	"this is the first text",
	"this is the second text"
	]

	predictions = learner.predict_batch(texts)