This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import os | |
from threading import Lock | |
import time | |
import logging | |
import sys | |
import torch | |
import torch.distributed.autograd as dist_autograd | |
import torch.distributed.rpc as rpc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.distributed as dist | |
import os | |
import torch.multiprocessing as mp | |
import torch.nn as nn | |
import pytorch_lightning as pl | |
from pytorch_lightning.core.lightning import LightningModule | |
from contextlib import nullcontext | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- Process 0 ----- | |
#0 0x00007fff70b5269e in clock_gettime () | |
#1 0x00007fbe97a5a7fd in clock_gettime () from /lib64/libc.so.6 | |
#2 0x00007fbe3e3f99ae in ?? () from /lib64/libcuda.so.1 | |
#3 0x00007fbe3e4c12c7 in ?? () from /lib64/libcuda.so.1 | |
#4 0x00007fbe3e3a1cac in ?? () from /lib64/libcuda.so.1 | |
#5 0x00007fbe3e3d9502 in ?? () from /lib64/libcuda.so.1 | |
#6 0x00007fbe3e3165e8 in ?? () from /lib64/libcuda.so.1 | |
#7 0x00007fbe3e316cd4 in ?? () from /lib64/libcuda.so.1 | |
#8 0x00007fbe8308b1e7 in ?? () from /usr/local/cuda/lib64/libcudart.so.9.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rm build/CMakeCache.txt ; python setup.py clean ; BUILD_CAFFE2_OPS= | |
0 USE_NCCL=1 DEBUG=1 USE_DISTRIBUTED=1 USE_MKLDNN=0 USE_CUDA=1 USE_FBGEMM=0 USE_NNPACK=0 USE_QNNPACK=0 USE_XNNPACK=0 p | |
ython setup.py develop |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
enter this in the terminal: | |
➜ triangle git:(master) ✗ defaults write com.sublimetext.3 ApplePressAndHoldEnabled -bool false | |
For VSCodeVim: | |
defaults write com.microsoft.VSCode ApplePressAndHoldEnabled -bool false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class MyModule(torch.jit.ScriptModule): | |
def __init__(self): | |
super().__init__() | |
@torch.jit.script_method | |
def method(self, x): | |
return x | |
def forward(self) -> Tensor: | |
res_tensor = torch.ones(2, 2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Run this script: | |
#!/bin/bash | |
if ! ls ~/ccache/bin/ccache | |
then | |
set -ex | |
sudo apt-get update | |
sudo apt-get install -y cmake | |
mkdir -p ~/ccache | |
pushd ~/ccache |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.distributed as dist | |
import os | |
import torch.multiprocessing as mp | |
import torch.nn as nn | |
import contextlib | |
class enc(nn.Module): | |
def __init__(self): | |
super().__init__() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.distributed as dist | |
import os | |
import torch.multiprocessing as mp | |
import torch.nn as nn | |
import contextlib | |
class enc(nn.Module): | |
def __init__(self): | |
super().__init__() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--local_rank", type=int) | |
args = parser.parse_args() | |
print(f"trainer got local_rank {args.local_rank}") | |
import torch | |
import torch.distributed as dist | |
torch.cuda.set_device(args.local_rank) | |
dist.init_process_group(backend="nccl", init_method="env://") |