This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cpp_fused_mul_sum_0 = async_compile.cpp_pybinding(['const float*', 'const float*', 'float*'], ''' | |
#include "/tmp/torchinductor_root/lg/clghje745biezhrbrw5fghxqjaj76ck5jms7466s4ax63eruswf5.h" | |
extern "C" void kernel(const float* in_ptr0, | |
const float* in_ptr1, | |
float* out_ptr0) | |
{ | |
{ | |
{ | |
float tmp_acc0 = 0; | |
at::vec::Vectorized<float> tmp_acc0_vec = at::vec::Vectorized<float>(0); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List, Optional, Union | |
import torch | |
from torch import Tensor | |
from torch.optim.optimizer import _get_value, _dispatch_sqrt | |
NPARAM = 10 | |
TENSOR_SIZE = 1024 * 1024 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ctypes import c_void_p, c_long | |
import torch | |
import math | |
import random | |
import os | |
import tempfile | |
from math import inf, nan | |
from torch._inductor.hooks import run_intermediate_hooks | |
from torch._inductor.utils import maybe_profile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ctypes import c_void_p, c_long | |
import torch | |
import math | |
import random | |
import os | |
import tempfile | |
from math import inf, nan | |
from torch._inductor.hooks import run_intermediate_hooks | |
from torch._inductor.utils import maybe_profile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch.optim.sgd import _single_tensor_sgd, _fused_sgd | |
import copy | |
device='cpu' | |
dtype=torch.float | |
import os | |
TENSOR_SIZE = (int(os.getenv('TENSOR_SIZE', 512 * 512)), ) | |
NPARAM = int(os.getenv("NPARAM", 4)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export LD_PRELOAD=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}/lib/libiomp5.so:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}/lib/libjemalloc.so | |
export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1" | |
export KMP_AFFINITY=granularity=fine,compact,1,0 | |
export KMP_BLOCKTIME=1 | |
export TENSOR_SIZE=262144 | |
export NPARAM=4 | |
export OMP_NUM_THREADS=1 | |
echo "Tensor Size: $TENSOR_SIZE, Num Tensor $NPARAM, Num Threads: $OMP_NUM_THREADS" | |
numactl -C 1 -m 0 python sgd.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch.optim.adagrad import _single_tensor_adagrad, _fused_adagrad | |
import copy | |
device='cpu' | |
dtype=torch.float | |
import os | |
TENSOR_SIZE = (int(os.getenv('TENSOR_SIZE', 512 * 512)), ) | |
NPARAM = int(os.getenv("NPARAM", 4)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export LD_PRELOAD=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}/lib/libiomp5.so:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}/lib/libjemalloc.so | |
export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1" | |
export KMP_AFFINITY=granularity=fine,compact,1,0 | |
export KMP_BLOCKTIME=1 | |
export TENSOR_SIZE=262144 | |
export NPARAM=4 | |
export OMP_NUM_THREADS=1 | |
echo "Tensor Size: $TENSOR_SIZE, Num Tensor $NPARAM, Num Threads: $OMP_NUM_THREADS" | |
numactl -C 1 -m 0 python adagrad.py |