Skip to content

Instantly share code, notes, and snippets.

View shashankprasanna's full-sized avatar
🛠️

Shashank shashankprasanna

🛠️
View GitHub Profile
from tensor import Tensor
from math import trunc, mod
fn tensorprint[type: DType](t: Tensor[type])->None:
let rank = t.rank()
var dim0:Int=0
var dim1:Int=0
var dim2:Int=0
if rank==0 or rank>3:
print("Error: Tensor rank should be: 1,2, or 3. Tensor rank is ", rank)
return
def f(x):
return torch.sin(x)**2 + torch.cos(x)**2
torch._dynamo.reset()
compiled_f = torch.compile(f, backend='inductor',
options={'trace.enabled':True,
'trace.graph_diagram':True})
# device = 'cpu'
device = 'cuda'
import torch._dynamo
from torch.fx.passes.graph_drawer import FxGraphDrawer
from functorch.compile import make_boxed_func
from torch._functorch.aot_autograd import aot_module_simplified
def f(x):
return torch.sin(x)**2 + torch.cos(x)**2
def inspect_backend(gm, sample_inputs):
# Forward compiler capture
def inspect_backend(gm, sample_inputs):
code = gm.print_readable()
with open("forward.svg", "wb") as file:
file.write(FxGraphDrawer(gm,'f').get_dot_graph().create_svg())
return gm.forward
torch._dynamo.reset()
compiled_f = torch.compile(f, backend=inspect_backend)
x = torch.rand(1000, requires_grad=True).to(device)
Architecture NVIDIA GPU Instance type Instance name Number of GPUs GPU Memory (per GPU) GPU Interconnect (NVLink / PCIe) Thermal
Design Power (TDP) from nvidia-smi
Tensor Cores (mixed-precision) Precision Support CPU Type Nitro based
Ampere A100 P4 p4d.24xlarge 8 40 GB NVLink gen 3 (600 GB/s) 400W Tensor Cores (Gen 3) FP64, FP32, FP16, INT8, BF16, TF32 Intel Xeon Scalable (Cascade Lake) Yes
Ampere A10G G5 g5.xlarge 1 24 GB NA (
def image_preprocess(img, reps=1):
img = np.asarray(img.resize((224, 224)))
img = np.stack([img]*reps)
img = tf.keras.applications.resnet50.preprocess_input(img)
return img
from PIL import Image
import numpy as np
import json
from sagemaker.tensorflow.model import TensorFlowModel, TensorFlowPredictor
prefix = 'keras_models'
s3_model_path = sess.upload_data(path='model.tar.gz', key_prefix=prefix)
model = TensorFlowModel(model_data=s3_model_path,
framework_version='1.15',
role=role,
predictor_cls = TensorFlowPredictor,
sagemaker_session=sess)
from smdebug.trials import create_trial
def tensor_df(tname):
tval = trial.tensor(tname).values()
df = pd.DataFrame.from_dict(tval,orient='index',columns=[tname])
df_tval = df.reset_index().rename(columns={'index':'steps'})
return df_tval
def trial_perf_curves(job_name, tname, experiment_name):
debug_data = f's3://{bucket_name}/{experiment_name}/{job_name}/debug-output'
from sagemaker.analytics import ExperimentAnalytics
experiment_name = training_experiment.experiment_name
trial_component_analytics = ExperimentAnalytics(
sagemaker_session=sagemaker_session,
experiment_name=experiment_name,
)
trial_comp_ds = trial_component_analytics.dataframe()
for trial_hyp in trial_hyperparameter_set:
# Combine static hyperparameters and trial specific hyperparameters
hyperparams = {**static_hyperparams, **trial_hyp}
# Create unique job name with hyperparameter and time
time_append = int(time.time())
hyp_append = "-".join([str(elm) for elm in trial_hyp.values()])
job_name = f'cifar10-training-{hyp_append}-{time_append}'
# Create a Tracker to track Trial specific hyperparameters