Skip to content

Instantly share code, notes, and snippets.

View checkpointing_true_bug.log
21: M9 P[5, 6] avail 3.1e+08, max_avail 5.0e+07, queue_sz 5.8e+02, n_inflight 5.1e+03, inflight [9]
-gather param for module 3: {'id': 0, 'status': 'AVAILABLE', 'numel': 78151680, 'persist': False, 'active_sub_modules': {3}}
[2021-07-07 21:16:52,635] [INFO] [stage3.py:42:print_rank_0] wait_for_fetch current submodule id 9
[2021-07-07 21:16:52,635] [INFO] [stage3.py:42:print_rank_0] module id 9 handle is None
22: M23 P[] avail 3.1e+08, max_avail 5.0e+07, queue_sz 5.8e+02, n_inflight 7.8e+07, inflight [0, 23, 2, 1, 3]
[2021-07-07 21:16:52,636] [INFO] [stage3.py:42:print_rank_0] wait_for_fetch current submodule id 23
[2021-07-07 21:16:52,636] [INFO] [stage3.py:42:print_rank_0] module id 23 handle is None
-gather param for module 24: {'id': 151, 'status': 'NOT_AVAILABLE', 'numel': 6553600, 'persist': False, 'active_sub_modules': {24}}
-gather param for module 24: {'id': 152, 'status': 'AVAILABLE', 'numel': 2560, 'persist': True, 'active_sub_modules': {24}}
[2021-07-07 21:16:52,636] [INFO] [utils.py:629:info_rank_
@zarzen
zarzen / model_config.json
Created Jun 19, 2021
bert 5.1B model config
View model_config.json
{
"train_batch_size": 512,
"train_micro_batch_size_per_gpu": 8,
"steps_per_print": 100,
"prescale_gradients": false,
"bert_token_file": "bert-large-uncased",
"bert_model_config": {
"vocab_size_or_config_json_file": 32003,
"hidden_size": 2560,
"num_hidden_layers": 64,
@zarzen
zarzen / strip_latex.py
Created May 7, 2021
strip latex code for grammarly check
View strip_latex.py
import re
import argparse
def get_args():
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--file')
args = arg_parser.parse_args()
return args
View etcd_rendz.py
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import datetime
import json
@zarzen
zarzen / launch.sh
Created Apr 16, 2021
nccl-tests mpirun launch
View launch.sh
#!/bin/bash
NP=8
HOSTS="127.0.0.1:8"
MPI_HOME="/opt/amazon/openmpi"
TEST_BIN="/home/ubuntu/nccl-tests/build/all_reduce_perf"
MPI_BIN="${MPI_HOME}/bin/mpirun"
LD_LIBRARY_PATH="${MPI_HOME}/lib":$LD_LIBRARY_PATH
@zarzen
zarzen / limit_bandwidth.sh
Created Jan 4, 2021
根据IP限制带宽
View limit_bandwidth.sh
#! /bin/bash
# 注意:这个脚本只有对本地有用,比如node0 上做了限制,但是iperf -s 在node0上运行
# node1 连接到 node0 通过iperf -c node0-ip -P5 这样的情况带宽无法得到限制
# 只能是node0 连接其他node时候这个限制有作用
# 原代码链接: https://serverfault.com/questions/191560/how-can-i-do-traffic-shaping-in-linux-by-ip
NETCARD=ens5 # 改这边
MAXBANDWIDTH=40000 # 选个大点的就行
# reinit
tc qdisc del dev $NETCARD root handle 1
@zarzen
zarzen / vgg16.py
Last active Oct 19, 2020
singleNodeTraining
View vgg16.py
from torchvision import datasets, transforms, models
import torch
import torchvision
from torch import optim
import os
import torch.nn.functional as F
__n_threads = 4
print('torch num threads:', __n_threads)
torch.set_num_threads(__n_threads)
@zarzen
zarzen / bandwidthTest.cu
Created Oct 7, 2020
PCIe bandwidth test
View bandwidthTest.cu
// CUDA runtime
#include <cuda_runtime.h>
#include <cuda.h>
#include <cassert>
#include <iostream>
#include <memory>
#include <thread>
#include <atomic>
View shm_model.py
import posix_ipc
from torchvision import models
import pickle
import mmap
from model.index import get_model_module
def main():
""""""
m = models.resnet152(pretrained=True)
model_b = pickle.dumps(m)
@zarzen
zarzen / memcpy_bw.cpp
Created Apr 1, 2020
multi-thread-memcpy
View memcpy_bw.cpp
#include <iostream>
#include <chrono>
#include <sys/mman.h>
#include <cstring>
#include <thread>
#include <vector>
static const char integ_alphabet[] =
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
static const int integ_alphabet_length =