Skip to content

Instantly share code, notes, and snippets.

@kaushaltrivedi
kaushaltrivedi / lp.js
Created December 1, 2022 16:01
jo_kt_lp
try {
var tokens = JSON.parse(botContext.getBotVariable("UW_api.tokens"));
botContext.printDebugMessage("TOKEN LENGTH: " + tokens.length);
var token_confidence_threshold = 0.8;
var vulnerabilities = ["mental_health", "young_children"];
botContext.printDebugMessage("VULN " + vulnerabilities);
for (let i = 0; i < tokens.length; i++) {
@kaushaltrivedi
kaushaltrivedi / sagemaker_deploy.py
Last active September 12, 2019 20:54
sagemaker deploy
# Deploy the model to SageMaker hosting service.
# You can provide the number of instances and the type of hosting instance.
# In this example we are creating a hosting endpoint with 1 instance of type ml.m5.large
# (note that this hosting instance does not have a GPU).
predictor = estimator.deploy(1,
'ml.m5.large',
endpoint_name='bert-toxic-comments',
update_endpoint=True,
serializer=json_serializer)
@kaushaltrivedi
kaushaltrivedi / estimator.py
Last active September 12, 2019 16:58
sagemaker estimator
# Construct the ECR image location
account = session.boto_session.client('sts').get_caller_identity()['Account']
region = session.boto_session.region_name
image = "{}.dkr.ecr.{}.amazonaws.com/sagemaker-bert:1.0-gpu-py36".format(account, region)
# Construct the output path for storage of model
output_path = "s3://{}/{}".format(bucket, prefix_output)
# Create the estimator
estimator = sagemaker.estimator.Estimator(image, # ECR image arn
@kaushaltrivedi
kaushaltrivedi / upload_data.py
Last active September 26, 2019 09:11
sagemaker data
# This is a helper feature to upload data
# from your local machine to S3 bucket.
s3_input = session.upload_data(DATA_PATH, bucket=bucket , key_prefix=prefix)
session.upload_data(str(DATA_PATH/'labels.csv'), bucket=bucket , key_prefix=prefix)
session.upload_data(str(DATA_PATH/'train.csv'), bucket=bucket , key_prefix=prefix)
session.upload_data(str(DATA_PATH/'val.csv'), bucket=bucket , key_prefix=prefix)
training_config = {
"run_text": "toxic comments",
"finetuned_model": None,
"do_lower_case": "True",
"train_file": "train.csv",
"val_file": "val.csv",
"label_file": "labels.csv",
"text_col": "comment_text",
"label_col": '["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]',
"multi_label": "True",
hyperparameters = {
"epochs": 10,
"lr": 8e-5,
"max_seq_length": 512,
"train_batch_size": 16,
"lr_schedule": "warmup_cosine",
"warmup_steps": 1000,
"optimizer_type": "adamw"
}
@kaushaltrivedi
kaushaltrivedi / sagemaker_locations.py
Created September 12, 2019 15:07
sagemaker_locations
# location for train.csv, val.csv and labels.csv
DATA_PATH = Path("../data/")
# Location for storing training_config.json
CONFIG_PATH = DATA_PATH/'config'
CONFIG_PATH.mkdir(exist_ok=True)
# S3 bucket name
bucket = 'sagemaker-deep-learning'
@kaushaltrivedi
kaushaltrivedi / sagemaker_imports.py
Last active September 12, 2019 14:33
sagemaker
import sagemaker
from pathlib import Path
from sagemaker.predictor import json_serializer
import json
# Get the sagemaker execution role and the session
role = sagemaker.get_execution_role()
session = sagemaker.Session()
@kaushaltrivedi
kaushaltrivedi / predictor.py
Last active October 16, 2023 00:19
fast-bert predictor
from fast_bert.prediction import BertClassificationPredictor
predictor = BertClassificationPredictor(model_path=MODEL_PATH, pretrained_path=BERT_PRETRAINED_PATH,
label_path=LABEL_PATH, multi_label=False)
# Single prediction
single_prediction = predictor.predict("just get me result for this text")
# Batch predictions
texts = [
@kaushaltrivedi
kaushaltrivedi / predict_learner.py
Created May 14, 2019 20:54
fast-bert predict learner
texts = [
"this is the first text",
"this is the second text"
]
predictions = learner.predict_batch(texts)