Skip to content

Instantly share code, notes, and snippets.

View apivovarov's full-sized avatar

Alexander Pivovarov apivovarov

  • Amazon Web Services
  • Santa Clara, CA
  • 16:31 (UTC -07:00)
  • LinkedIn in/pivovaal
View GitHub Profile
@apivovarov
apivovarov / grad.py
Last active November 15, 2023 23:39
a = 5
b = 6
y = a/b
h = 0.00001
def dy_da_f():
a2 = a + h
y2 = a2 / b
dy_da = (y2 - y) / h
import jax
from jax import Array
import jax.numpy as jnp
def init_params(key: Array, shape) -> Array:
return jax.random.normal(key, shape).astype(jax.dtypes.bfloat16)
def softmax(x):
mx = x.max(axis=-1, keepdims=True)
mx = jax.lax.stop_gradient(mx)
HloModule xla_computation_ff, entry_computation_layout={(f32[1,224,224,3]{3,2,1,0})->(f32[1,224,224,3]{3,2,1,0})}
ENTRY main.20 {
Arg_0.1 = f32[1,224,224,3]{3,2,1,0} parameter(0)
multiply.10 = f32[1,224,224,3]{3,2,1,0} multiply(Arg_0.1, Arg_0.1)
multiply.11 = f32[1,224,224,3]{3,2,1,0} multiply(Arg_0.1, multiply.10)
constant.8 = f32[] constant(0.044715)
broadcast.9 = f32[1,224,224,3]{3,2,1,0} broadcast(constant.8), dimensions={}
multiply.12 = f32[1,224,224,3]{3,2,1,0} multiply(multiply.11, broadcast.9)
add.13 = f32[1,224,224,3]{3,2,1,0} add(Arg_0.1, multiply.12)
HloModule xla_computation_ff, entry_computation_layout={(f32[4,1000]{1,0})->(f32[4,1000]{1,0})}
region_0.4 {
Arg_0.5 = f32[] parameter(0)
Arg_1.6 = f32[] parameter(1)
ROOT maximum.7 = f32[] maximum(Arg_0.5, Arg_1.6)
}
region_1.15 {
Arg_0.16 = f32[] parameter(0)
@apivovarov
apivovarov / nrvo.cc
Created September 19, 2023 23:38
NRVO Test
#include <iostream>
#include <vector>
std::vector<int> testNRVO(int value, size_t size, const std::vector<int> **localVec)
{
std::vector<int> vec(size, value);
*localVec = &vec;
/* Do something here.. */
@apivovarov
apivovarov / test-roberta-pt.py
Created July 28, 2023 00:59
test-roberta-pt.py
import torch
from transformers import RobertaTokenizer, RobertaModel
torch.set_grad_enabled(False)
class RobertaTraceWrapper(torch.nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
def forward(self, x):
@apivovarov
apivovarov / cuda_check.cu
Last active June 7, 2023 23:11
Simple program to test whether nvcc/CUDA work
// To compile - nvcc cuda_check.cu -o cuda_check -lcuda
// To run ./cuda_check
// set g++ path to older g++ if needed - export NVCC_PREPEND_FLAGS='-ccbin
// /usr/local/gcc-11/bin/g++-11'
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <stdio.h>
/* Outputs some information on CUDA-enabled devices on your computer,
* including compute capability and current memory usage.
@apivovarov
apivovarov / fcn.py
Created May 1, 2023 22:36
Fully Connected Network model
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchinfo import summary
in_sz = 28*28
n_epochs = 1
VV=8.5.3-1+cuda11.8
sudo apt install \
libcudnn8=8.9.0.131-1+cuda11.8 \
libcudnn8-dev=8.9.0.131-1+cuda11.8 \
libnvinfer-bin=$VV \
libnvinfer-dev=$VV \
libnvinfer-plugin-dev=$VV \
libnvinfer-plugin8=$VV \
libnvinfer8=$VV \
@apivovarov
apivovarov / ait_one_op_compile.py
Last active March 21, 2023 07:15
Compile test model with AITemplate AIT
import math
import numpy as np
import aitemplate
from aitemplate.frontend import nn, Tensor
from aitemplate.compiler.model import AITData
from aitemplate.compiler.base import IntVar, IntImm
class M1AIT(nn.Module):