Skip to content

Instantly share code, notes, and snippets.

View sengstacken's full-sized avatar

Aaron Sengstacken sengstacken

View GitHub Profile
@sengstacken
sengstacken / LLM-file-tree.py
Created April 24, 2024 13:23
Get file structure for LLM upload
import os
import shutil
import tarfile
from collections import defaultdict
import logging
# Setup basic logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def copy_and_compress(src_directory, temp_directory, max_types, max_size):
# from - https://gitlab.com/juliensimon/aim410/-/blob/master/aim410.ipynb
# Number of GPUs on this machine
%env SM_NUM_GPUS=0
# Where to save the model
%env SM_MODEL_DIR=/tmp/model
# Where the training data is
%env SM_CHANNEL_TRAINING=data
# Where the validation data is
%env SM_CHANNEL_VALIDATION=data
import qrcode
qr = qrcode.QRCode(version=3, box_size=15, border=15) #box_size is QR dimension and border is thickness
data = "https:~/~/www.amazon.com"
qr.add_data(data)
image = qr.make_image(fill='black', back_color='white')
@sengstacken
sengstacken / scalers.py
Created September 16, 2021 17:14
scale data with pandas without warnings
scale_cols = ['Time','Amount']
scaler = StandardScaler()
# fit scaler
scaler.fit(train_df[scale_cols].to_numpy())
# make copies of dataframes
train_df_ = train_df.copy()
val_df_ = val_df.copy()
test_df_ = test_df.copy()
@sengstacken
sengstacken / watch_gpu
Last active August 31, 2021 18:02
watch_gpu
watch -n1 nvidia-smi
import time
from tqdm.auto import tqdm
mins = 10
with tqdm(desc="Break Timer", total=mins*60, bar_format="{l_bar}{bar} {elapsed_s:.0f}/{total} seconds") as pbar:
start = time.time()
now = time.time()
prev_now = now
while (now - start) < mins*60:
pbar.update(now - prev_now)
time.sleep(1)
# get default bucket
bucket_name = sagemaker.Session().default_bucket()
# upload data to s3
# training data for sagemaker
s3_input_train = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}/data/train'.format(bucket_name, prefix), content_type='csv')
s3_input_validation = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}/data/val'.format(bucket_name, prefix), content_type='csv')
@sengstacken
sengstacken / logger.py
Created December 21, 2020 18:37
Example of how to use the logging function within python
import logging
import sys, os
logging.basicConfig(level="INFO", handlers=[logging.StreamHandler(sys.stdout)], format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# logging.basicConfig(filename='example.log', encoding='utf-8', level=logging.DEBUG)
logging.debug('This message should appear on the console')
logging.info('So should this')
logging.warning('And this, too')
@sengstacken
sengstacken / data_upload.py
Last active November 24, 2020 14:23
How to upload and prepare training data for SageMaker
import sagemaker
s3_bucket = 'ENTER BUCKET NAME'
sagemaker_session = sagemaker.Session()
# upload
sagemaker_session.upload_data(path='val', bucket=s3_bucket, key_prefix='data/val_annotation')
sagemaker_session.upload_data(path='test', bucket=s3_bucket, key_prefix='data/test_annotation')
sagemaker_session.upload_data(path='train', bucket=s3_bucket, key_prefix='data/train_annotation')
sagemaker_session.upload_data(path='trainaug', bucket=s3_bucket, key_prefix='data/trainaug_annotation')
@sengstacken
sengstacken / endpoint_connect.py
Last active January 20, 2021 11:32
Connect to deployed endpoint
# TensorFlow
# https://sagemaker.readthedocs.io/en/stable/frameworks/tensorflow/deploying_tensorflow_serving.html
end_point_name = 'keras-tf-fmnist-2020-10-13-22-25-23'
predictor = sagemaker.tensorflow.model.TensorFlowPredictor(end_point_name,sagemaker_session=sess)
# PyTorch
## OPTIONAL
end_point_name = 'pytorch-inference-2021-01-20-04-00-19-786'
predictor = sagemaker.pytorch.model.PyTorchPredictor(end_point_name,sagemaker_session=sagemaker_session,serializer=sagemaker.serializers.JSONSerializer(), deserializer=sagemaker.deserializers.JSONDeserializer())