Skip to content

Instantly share code, notes, and snippets.

Avatar

Rohan Varma rohan-varma

View GitHub Profile
View ddp_custom_func.py
import torch
import torch.nn as nn
from torch.autograd import Function
class PassThrough(Function):
@staticmethod
def forward(ctx, *inputs):
return inputs
View checkpoint.py
import argparse
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
import torch
#print(torch.__file__) ; exit()
import torch.nn as nn
import torch.nn.parallel
import torch.distributed as dist
import torch.multiprocessing as mp
View named_modules.py
for mn, m in net.named_modules():
for param_name, param in m.named_parameters(recurse=False):
qualname = f"{mn}.{param_name}"
print(f"Got qualname {qualname}")
@rohan-varma
rohan-varma / sed.sh
Created Feb 12, 2021
Example sed command to sed for all cpp files
View sed.sh
find . -name '*.cpp' -type f -print0 | xargs -0 sed -i 's/c10::static_intrusive_pointer_cast<c10::RRefInterface>/fromOwnerRRef/g'
View grad_accs.py
import torch
import torch.nn as nn
def get_param_to_grad_accs(model):
param_to_grad_accs = {}
for param in model.parameters(recurse=True):
param_tmp = param.expand_as(param)
grad_acc = param_tmp.grad_fn.next_functions[0][0]
param_to_grad_accs[param] = grad_acc
return param_to_grad_accs
View rpc_parameter_server.py
import argparse
import os
from threading import Lock
import time
import logging
import sys
import torch
import torch.distributed.autograd as dist_autograd
import torch.distributed.rpc as rpc
View join_test.py
import torch
import torch.distributed as dist
import os
import torch.multiprocessing as mp
import torch.nn as nn
import pytorch_lightning as pl
from pytorch_lightning.core.lightning import LightningModule
from contextlib import nullcontext
View nccl deadlock
--- Process 0 -----
#0 0x00007fff70b5269e in clock_gettime ()
#1 0x00007fbe97a5a7fd in clock_gettime () from /lib64/libc.so.6
#2 0x00007fbe3e3f99ae in ?? () from /lib64/libcuda.so.1
#3 0x00007fbe3e4c12c7 in ?? () from /lib64/libcuda.so.1
#4 0x00007fbe3e3a1cac in ?? () from /lib64/libcuda.so.1
#5 0x00007fbe3e3d9502 in ?? () from /lib64/libcuda.so.1
#6 0x00007fbe3e3165e8 in ?? () from /lib64/libcuda.so.1
#7 0x00007fbe3e316cd4 in ?? () from /lib64/libcuda.so.1
#8 0x00007fbe8308b1e7 in ?? () from /usr/local/cuda/lib64/libcudart.so.9.2
View pytorch command
rm build/CMakeCache.txt ; python setup.py clean ; BUILD_CAFFE2_OPS=
0 USE_NCCL=1 DEBUG=1 USE_DISTRIBUTED=1 USE_MKLDNN=0 USE_CUDA=1 USE_FBGEMM=0 USE_NNPACK=0 USE_QNNPACK=0 USE_XNNPACK=0 p
ython setup.py develop
@rohan-varma
rohan-varma / enable key repeat in sublime vintage mode
Last active Nov 28, 2020
enable key repeat in sublime vintage mode
View enable key repeat in sublime vintage mode
enter this in the terminal:
➜ triangle git:(master) ✗ defaults write com.sublimetext.3 ApplePressAndHoldEnabled -bool false
For VSCodeVim:
defaults write com.microsoft.VSCode ApplePressAndHoldEnabled -bool false