python3 test_diff_stages.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This isn't supposed to run as a bash script, i named it with ".sh" for syntax highlighting. | |
# https://developer.nvidia.com/nsight-systems | |
# https://docs.nvidia.com/nsight-systems/profiling/index.html | |
# My preferred nsys (command line executable used to create profiles) commands | |
# | |
# In your script, write | |
# torch.cuda.nvtx.range_push("region name") | |
# ... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set default-terminal colors to display default terminal colors | |
set -g default-terminal "xterm-256color" | |
# set-option -ga terminal-overrides ',xterm-256color:Tc | |
# Undercurl | |
set-option -g default-terminal "tmux-256color" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Install packer | |
local install_path = vim.fn.stdpath 'data' .. '/site/pack/packer/start/packer.nvim' | |
local is_bootstrap = false | |
if vim.fn.empty(vim.fn.glob(install_path)) > 0 then | |
is_bootstrap = true | |
vim.fn.system { 'git', 'clone', '--depth', '1', 'https://github.com/wbthomason/packer.nvim', install_path } | |
vim.cmd [[packadd packer.nvim]] | |
end | |
require('packer').startup(function(use) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
call to _all_gather_base with c10d._coalescing_manager | |
Test command: | |
mpirun -np $1 -N ${ndev_per_node} --hostfile ${HOST_FILE} \ | |
--mca plm_rsh_no_tree_spawn 1 \ | |
-mca btl tcp,self --mca btl_tcp_if_exclude lo,docker0 \ | |
--mca pml ^cm \ | |
-bind-to none \ | |
--tag-output \ | |
-x LD_LIBRARY_PATH=$LD_LIBRARY_PATH \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/csrc/lamb/fused_lamb_cuda_kernel.cu b/csrc/lamb/fused_lamb_cuda_kernel.cu | |
index e934b69c..207faa39 100644 | |
--- a/csrc/lamb/fused_lamb_cuda_kernel.cu | |
+++ b/csrc/lamb/fused_lamb_cuda_kernel.cu | |
@@ -8,7 +8,7 @@ | |
#include "ATen/cuda/CUDAContext.h" | |
#include "ATen/cuda/detail/IndexUtils.cuh" | |
//#include "ATen/Type.h" | |
-#include <THC/THCGeneral.h> | |
+// #include <THC/THCGeneral.h> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/csrc/lamb/fused_lamb_cuda_kernel.cu b/csrc/lamb/fused_lamb_cuda_kernel.cu | |
index 0448a45..ff87993 100644 | |
--- a/csrc/lamb/fused_lamb_cuda_kernel.cu | |
+++ b/csrc/lamb/fused_lamb_cuda_kernel.cu | |
@@ -464,7 +464,7 @@ void fused_lamb_cuda(at::Tensor& p, | |
lamb_coeff.data<scalar_t>()); | |
})); | |
} | |
- THCudaCheck(cudaGetLastError()); | |
+ AT_CUDA_CHECK(cudaGetLastError()); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
21: M9 P[5, 6] avail 3.1e+08, max_avail 5.0e+07, queue_sz 5.8e+02, n_inflight 5.1e+03, inflight [9] | |
-gather param for module 3: {'id': 0, 'status': 'AVAILABLE', 'numel': 78151680, 'persist': False, 'active_sub_modules': {3}} | |
[2021-07-07 21:16:52,635] [INFO] [stage3.py:42:print_rank_0] wait_for_fetch current submodule id 9 | |
[2021-07-07 21:16:52,635] [INFO] [stage3.py:42:print_rank_0] module id 9 handle is None | |
22: M23 P[] avail 3.1e+08, max_avail 5.0e+07, queue_sz 5.8e+02, n_inflight 7.8e+07, inflight [0, 23, 2, 1, 3] | |
[2021-07-07 21:16:52,636] [INFO] [stage3.py:42:print_rank_0] wait_for_fetch current submodule id 23 | |
[2021-07-07 21:16:52,636] [INFO] [stage3.py:42:print_rank_0] module id 23 handle is None | |
-gather param for module 24: {'id': 151, 'status': 'NOT_AVAILABLE', 'numel': 6553600, 'persist': False, 'active_sub_modules': {24}} | |
-gather param for module 24: {'id': 152, 'status': 'AVAILABLE', 'numel': 2560, 'persist': True, 'active_sub_modules': {24}} | |
[2021-07-07 21:16:52,636] [INFO] [utils.py:629:info_rank_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"train_batch_size": 512, | |
"train_micro_batch_size_per_gpu": 8, | |
"steps_per_print": 100, | |
"prescale_gradients": false, | |
"bert_token_file": "bert-large-uncased", | |
"bert_model_config": { | |
"vocab_size_or_config_json_file": 32003, | |
"hidden_size": 2560, | |
"num_hidden_layers": 64, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import argparse | |
def get_args(): | |
arg_parser = argparse.ArgumentParser() | |
arg_parser.add_argument('--file') | |
args = arg_parser.parse_args() | |
return args |
NewerOlder