Shashank shashankprasanna

## tensorprint.mojo
from tensor import Tensor
from math import trunc, mod
fn tensorprint[type: DType](t: Tensor[type])->None:
    let rank = t.rank()
    var dim0:Int=0
    var dim1:Int=0
    var dim2:Int=0
    if rank==0 or rank>3:
        print("Error: Tensor rank should be: 1,2, or 3. Tensor rank is ", rank)
        return

## torch_compile3.py
def f(x):
    return torch.sin(x)**2 + torch.cos(x)**2

torch._dynamo.reset()
compiled_f = torch.compile(f, backend='inductor',
                              options={'trace.enabled':True,
                                       'trace.graph_diagram':True})

# device = 'cpu'
device = 'cuda'

## torch_compile2.py
import torch._dynamo
from torch.fx.passes.graph_drawer import FxGraphDrawer
from functorch.compile import make_boxed_func
from torch._functorch.aot_autograd import aot_module_simplified

def f(x):
    return torch.sin(x)**2 + torch.cos(x)**2

def inspect_backend(gm, sample_inputs):
    # Forward compiler capture

## torch_compile1.py
def inspect_backend(gm, sample_inputs):
    code = gm.print_readable()
    with open("forward.svg", "wb") as file:
        file.write(FxGraphDrawer(gm,'f').get_dot_graph().create_svg())
    return gm.forward

torch._dynamo.reset()
compiled_f = torch.compile(f, backend=inspect_backend)

x = torch.rand(1000, requires_grad=True).to(device)

## aws_gpu_list.md

      
              1 file
            
          
              3 forks
            
          
              0 comments
            
          
              3 stars
            
          
                shashankprasanna
                / aws_gpu_list.md
            
            
              Last active
              May 7, 2024 04:42
            
          
Architecture
NVIDIA GPU
Instance type
Instance name
Number of GPUs
GPU Memory (per GPU)
GPU Interconnect (NVLink / PCIe)
Thermal
Design Power (TDP) from nvidia-smi
Tensor Cores (mixed-precision)
Precision Support
CPU Type
Nitro based


Ampere
A100
P4
p4d.24xlarge
8
40 GB
NVLink gen 3 (600 GB/s)
400W
Tensor Cores (Gen 3)
FP64, FP32, FP16, INT8, BF16, TF32
Intel Xeon Scalable (Cascade Lake)
Yes


Ampere
A10G
G5
g5.xlarge
1
24 GB
NA (


## ai-accelerator-2.py
def image_preprocess(img, reps=1):
    img = np.asarray(img.resize((224, 224)))
    img = np.stack([img]*reps)
    img = tf.keras.applications.resnet50.preprocess_input(img)
    return img

from PIL import Image
import numpy as np
import json

## ai-accelerator-1.py
from sagemaker.tensorflow.model import TensorFlowModel, TensorFlowPredictor

prefix = 'keras_models'
s3_model_path = sess.upload_data(path='model.tar.gz', key_prefix=prefix)

model = TensorFlowModel(model_data=s3_model_path,
                        framework_version='1.15',
                        role=role,
                        predictor_cls = TensorFlowPredictor,
                        sagemaker_session=sess)

## sm-experiments-7.py
from smdebug.trials import create_trial

def tensor_df(tname):
    tval = trial.tensor(tname).values()
    df   = pd.DataFrame.from_dict(tval,orient='index',columns=[tname])
    df_tval = df.reset_index().rename(columns={'index':'steps'})
    return df_tval

def trial_perf_curves(job_name, tname, experiment_name):
    debug_data = f's3://{bucket_name}/{experiment_name}/{job_name}/debug-output'

## sm-experiments-6.py
from sagemaker.analytics import ExperimentAnalytics

experiment_name = training_experiment.experiment_name

trial_component_analytics = ExperimentAnalytics(
    sagemaker_session=sagemaker_session,
    experiment_name=experiment_name,
)
trial_comp_ds = trial_component_analytics.dataframe()

## sm-experiments-5.py
for trial_hyp in trial_hyperparameter_set:
    # Combine static hyperparameters and trial specific hyperparameters
    hyperparams = {**static_hyperparams, **trial_hyp}

    # Create unique job name with hyperparameter and time
    time_append = int(time.time())
    hyp_append = "-".join([str(elm) for elm in trial_hyp.values()])
    job_name = f'cifar10-training-{hyp_append}-{time_append}'

    # Create a Tracker to track Trial specific hyperparameters
	from tensor import Tensor
	from math import trunc, mod
	fn tensorprint[type: DType](t: Tensor[type])->None:
	let rank = t.rank()
	var dim0:Int=0
	var dim1:Int=0
	var dim2:Int=0
	if rank==0 or rank>3:
	print("Error: Tensor rank should be: 1,2, or 3. Tensor rank is ", rank)
	return
	def f(x):
	return torch.sin(x)2 + torch.cos(x)2

	torch._dynamo.reset()
	compiled_f = torch.compile(f, backend='inductor',
	options={'trace.enabled':True,
	'trace.graph_diagram':True})

	# device = 'cpu'
	device = 'cuda'
	import torch._dynamo
	from torch.fx.passes.graph_drawer import FxGraphDrawer
	from functorch.compile import make_boxed_func
	from torch._functorch.aot_autograd import aot_module_simplified

	def f(x):
	return torch.sin(x)2 + torch.cos(x)2

	def inspect_backend(gm, sample_inputs):
	# Forward compiler capture
	def inspect_backend(gm, sample_inputs):
	code = gm.print_readable()
	with open("forward.svg", "wb") as file:
	file.write(FxGraphDrawer(gm,'f').get_dot_graph().create_svg())
	return gm.forward

	torch._dynamo.reset()
	compiled_f = torch.compile(f, backend=inspect_backend)

	x = torch.rand(1000, requires_grad=True).to(device)
Architecture	NVIDIA GPU	Instance type	Instance name	Number of GPUs	GPU Memory (per GPU)	GPU Interconnect (NVLink / PCIe)	Thermal Design Power (TDP) from nvidia-smi	Tensor Cores (mixed-precision)	Precision Support	CPU Type	Nitro based
Ampere	A100	P4	p4d.24xlarge	8	40 GB	NVLink gen 3 (600 GB/s)	400W	Tensor Cores (Gen 3)	FP64, FP32, FP16, INT8, BF16, TF32	Intel Xeon Scalable (Cascade Lake)	Yes
Ampere	A10G	G5	g5.xlarge	1	24 GB	NA (
	def image_preprocess(img, reps=1):
	img = np.asarray(img.resize((224, 224)))
	img = np.stack([img]*reps)
	img = tf.keras.applications.resnet50.preprocess_input(img)
	return img

	from PIL import Image
	import numpy as np
	import json
	from sagemaker.tensorflow.model import TensorFlowModel, TensorFlowPredictor

	prefix = 'keras_models'
	s3_model_path = sess.upload_data(path='model.tar.gz', key_prefix=prefix)

	model = TensorFlowModel(model_data=s3_model_path,
	framework_version='1.15',
	role=role,
	predictor_cls = TensorFlowPredictor,
	sagemaker_session=sess)
	from smdebug.trials import create_trial

	def tensor_df(tname):
	tval = trial.tensor(tname).values()
	df = pd.DataFrame.from_dict(tval,orient='index',columns=[tname])
	df_tval = df.reset_index().rename(columns={'index':'steps'})
	return df_tval

	def trial_perf_curves(job_name, tname, experiment_name):
	debug_data = f's3://{bucket_name}/{experiment_name}/{job_name}/debug-output'
	from sagemaker.analytics import ExperimentAnalytics

	experiment_name = training_experiment.experiment_name

	trial_component_analytics = ExperimentAnalytics(
	sagemaker_session=sagemaker_session,
	experiment_name=experiment_name,
	)
	trial_comp_ds = trial_component_analytics.dataframe()
	for trial_hyp in trial_hyperparameter_set:
	# Combine static hyperparameters and trial specific hyperparameters
	hyperparams = {static_hyperparams, trial_hyp}

	# Create unique job name with hyperparameter and time
	time_append = int(time.time())
	hyp_append = "-".join([str(elm) for elm in trial_hyp.values()])
	job_name = f'cifar10-training-{hyp_append}-{time_append}'

	# Create a Tracker to track Trial specific hyperparameters