This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
a = 5 | |
b = 6 | |
y = a/b | |
h = 0.00001 | |
def dy_da_f(): | |
a2 = a + h | |
y2 = a2 / b | |
dy_da = (y2 - y) / h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import jax | |
from jax import Array | |
import jax.numpy as jnp | |
def init_params(key: Array, shape) -> Array: | |
return jax.random.normal(key, shape).astype(jax.dtypes.bfloat16) | |
def softmax(x): | |
mx = x.max(axis=-1, keepdims=True) | |
mx = jax.lax.stop_gradient(mx) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
HloModule xla_computation_ff, entry_computation_layout={(f32[1,224,224,3]{3,2,1,0})->(f32[1,224,224,3]{3,2,1,0})} | |
ENTRY main.20 { | |
Arg_0.1 = f32[1,224,224,3]{3,2,1,0} parameter(0) | |
multiply.10 = f32[1,224,224,3]{3,2,1,0} multiply(Arg_0.1, Arg_0.1) | |
multiply.11 = f32[1,224,224,3]{3,2,1,0} multiply(Arg_0.1, multiply.10) | |
constant.8 = f32[] constant(0.044715) | |
broadcast.9 = f32[1,224,224,3]{3,2,1,0} broadcast(constant.8), dimensions={} | |
multiply.12 = f32[1,224,224,3]{3,2,1,0} multiply(multiply.11, broadcast.9) | |
add.13 = f32[1,224,224,3]{3,2,1,0} add(Arg_0.1, multiply.12) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
HloModule xla_computation_ff, entry_computation_layout={(f32[4,1000]{1,0})->(f32[4,1000]{1,0})} | |
region_0.4 { | |
Arg_0.5 = f32[] parameter(0) | |
Arg_1.6 = f32[] parameter(1) | |
ROOT maximum.7 = f32[] maximum(Arg_0.5, Arg_1.6) | |
} | |
region_1.15 { | |
Arg_0.16 = f32[] parameter(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
std::vector<int> testNRVO(int value, size_t size, const std::vector<int> **localVec) | |
{ | |
std::vector<int> vec(size, value); | |
*localVec = &vec; | |
/* Do something here.. */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from transformers import RobertaTokenizer, RobertaModel | |
torch.set_grad_enabled(False) | |
class RobertaTraceWrapper(torch.nn.Module): | |
def __init__(self, model): | |
super().__init__() | |
self.model = model | |
def forward(self, x): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// To compile - nvcc cuda_check.cu -o cuda_check -lcuda | |
// To run ./cuda_check | |
// set g++ path to older g++ if needed - export NVCC_PREPEND_FLAGS='-ccbin | |
// /usr/local/gcc-11/bin/g++-11' | |
#include <cuda.h> | |
#include <cuda_runtime_api.h> | |
#include <stdio.h> | |
/* Outputs some information on CUDA-enabled devices on your computer, | |
* including compute capability and current memory usage. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
import torch.optim as optim | |
import torchvision | |
from torchinfo import summary | |
in_sz = 28*28 | |
n_epochs = 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
VV=8.5.3-1+cuda11.8 | |
sudo apt install \ | |
libcudnn8=8.9.0.131-1+cuda11.8 \ | |
libcudnn8-dev=8.9.0.131-1+cuda11.8 \ | |
libnvinfer-bin=$VV \ | |
libnvinfer-dev=$VV \ | |
libnvinfer-plugin-dev=$VV \ | |
libnvinfer-plugin8=$VV \ | |
libnvinfer8=$VV \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import numpy as np | |
import aitemplate | |
from aitemplate.frontend import nn, Tensor | |
from aitemplate.compiler.model import AITData | |
from aitemplate.compiler.base import IntVar, IntImm | |
class M1AIT(nn.Module): |
NewerOlder