Zhen Zhang zarzen

## send-packet.py
from scapy.all import *

def main():
    """
    """
    packet = IP(dst="192.168.100.123")/TCP()/"from scapy packet"
    send(packet)


def packet_with_seq_n():

## README.md

      
              3 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zarzen
                / README.md
            
            
              Last active
              November 8, 2021 19:25
            
              
                deepspeed_loss_test
              
          
    Usage

python3 test_diff_stages.py


## checkpointing_true_bug.log
21: M9 P[5, 6] avail 3.1e+08, max_avail 5.0e+07, queue_sz 5.8e+02, n_inflight 5.1e+03, inflight [9]
-gather param for module 3: {'id': 0, 'status': 'AVAILABLE', 'numel': 78151680, 'persist': False, 'active_sub_modules': {3}}
[2021-07-07 21:16:52,635] [INFO] [stage3.py:42:print_rank_0] wait_for_fetch current submodule id 9
[2021-07-07 21:16:52,635] [INFO] [stage3.py:42:print_rank_0] module id 9 handle is None
22: M23 P[] avail 3.1e+08, max_avail 5.0e+07, queue_sz 5.8e+02, n_inflight 7.8e+07, inflight [0, 23, 2, 1, 3]
[2021-07-07 21:16:52,636] [INFO] [stage3.py:42:print_rank_0] wait_for_fetch current submodule id 23
[2021-07-07 21:16:52,636] [INFO] [stage3.py:42:print_rank_0] module id 23 handle is None
-gather param for module 24: {'id': 151, 'status': 'NOT_AVAILABLE', 'numel': 6553600, 'persist': False, 'active_sub_modules': {24}}
-gather param for module 24: {'id': 152, 'status': 'AVAILABLE', 'numel': 2560, 'persist': True, 'active_sub_modules': {24}}
[2021-07-07 21:16:52,636] [INFO] [utils.py:629:info_rank_

## model_config.json
{
    "train_batch_size": 512,
    "train_micro_batch_size_per_gpu": 8,
    "steps_per_print": 100,
    "prescale_gradients": false,
    "bert_token_file": "bert-large-uncased",
    "bert_model_config": {
        "vocab_size_or_config_json_file": 32003,
        "hidden_size": 2560,
        "num_hidden_layers": 64,

## strip_latex.py
import re
import argparse

def get_args():
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument('--file')

    args = arg_parser.parse_args()
    return args

## etcd_rendz.py
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import datetime
import json

## launch.sh
#!/bin/bash

NP=8
HOSTS="127.0.0.1:8"
MPI_HOME="/opt/amazon/openmpi"
TEST_BIN="/home/ubuntu/nccl-tests/build/all_reduce_perf"

MPI_BIN="${MPI_HOME}/bin/mpirun"
LD_LIBRARY_PATH="${MPI_HOME}/lib":$LD_LIBRARY_PATH

## limit_bandwidth.sh
#! /bin/bash
# 注意：这个脚本只有对本地有用，比如node0 上做了限制，但是iperf -s 在node0上运行
# node1 连接到 node0 通过iperf -c node0-ip -P5 这样的情况带宽无法得到限制
# 只能是node0 连接其他node时候这个限制有作用
# 原代码链接： https://serverfault.com/questions/191560/how-can-i-do-traffic-shaping-in-linux-by-ip
NETCARD=ens5 # 改这边
MAXBANDWIDTH=40000 # 选个大点的就行

# reinit
tc qdisc del dev $NETCARD root handle 1

## vgg16.py
from torchvision import datasets, transforms, models
import torch
import torchvision
from torch import optim
import os
import torch.nn.functional as F

__n_threads = 4
print('torch num threads:', __n_threads)
torch.set_num_threads(__n_threads)

## shm_model.py
import posix_ipc
from torchvision import models
import pickle
import mmap
from model.index import get_model_module

def main():
    """"""
    m = models.resnet152(pretrained=True)
    model_b = pickle.dumps(m)
	from scapy.all import *

	def main():
	"""
	"""
	packet = IP(dst="192.168.100.123")/TCP()/"from scapy packet"
	send(packet)


	def packet_with_seq_n():
	21: M9 P[5, 6] avail 3.1e+08, max_avail 5.0e+07, queue_sz 5.8e+02, n_inflight 5.1e+03, inflight [9]
	-gather param for module 3: {'id': 0, 'status': 'AVAILABLE', 'numel': 78151680, 'persist': False, 'active_sub_modules': {3}}
	[2021-07-07 21:16:52,635] [INFO] [stage3.py:42:print_rank_0] wait_for_fetch current submodule id 9
	[2021-07-07 21:16:52,635] [INFO] [stage3.py:42:print_rank_0] module id 9 handle is None
	22: M23 P[] avail 3.1e+08, max_avail 5.0e+07, queue_sz 5.8e+02, n_inflight 7.8e+07, inflight [0, 23, 2, 1, 3]
	[2021-07-07 21:16:52,636] [INFO] [stage3.py:42:print_rank_0] wait_for_fetch current submodule id 23
	[2021-07-07 21:16:52,636] [INFO] [stage3.py:42:print_rank_0] module id 23 handle is None
	-gather param for module 24: {'id': 151, 'status': 'NOT_AVAILABLE', 'numel': 6553600, 'persist': False, 'active_sub_modules': {24}}
	-gather param for module 24: {'id': 152, 'status': 'AVAILABLE', 'numel': 2560, 'persist': True, 'active_sub_modules': {24}}
	[2021-07-07 21:16:52,636] [INFO] [utils.py:629:info_rank_
	{
	"train_batch_size": 512,
	"train_micro_batch_size_per_gpu": 8,
	"steps_per_print": 100,
	"prescale_gradients": false,
	"bert_token_file": "bert-large-uncased",
	"bert_model_config": {
	"vocab_size_or_config_json_file": 32003,
	"hidden_size": 2560,
	"num_hidden_layers": 64,
	import re
	import argparse

	def get_args():
	arg_parser = argparse.ArgumentParser()
	arg_parser.add_argument('--file')

	args = arg_parser.parse_args()
	return args
	#!/usr/bin/env python3

	# Copyright (c) Facebook, Inc. and its affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	import datetime
	import json
	#!/bin/bash

	NP=8
	HOSTS="127.0.0.1:8"
	MPI_HOME="/opt/amazon/openmpi"
	TEST_BIN="/home/ubuntu/nccl-tests/build/all_reduce_perf"

	MPI_BIN="${MPI_HOME}/bin/mpirun"
	LD_LIBRARY_PATH="${MPI_HOME}/lib":$LD_LIBRARY_PATH
	#! /bin/bash
	# 注意：这个脚本只有对本地有用，比如node0 上做了限制，但是iperf -s 在node0上运行
	# node1 连接到 node0 通过iperf -c node0-ip -P5 这样的情况带宽无法得到限制
	# 只能是node0 连接其他node时候这个限制有作用
	# 原代码链接： https://serverfault.com/questions/191560/how-can-i-do-traffic-shaping-in-linux-by-ip
	NETCARD=ens5 # 改这边
	MAXBANDWIDTH=40000 # 选个大点的就行

	# reinit
	tc qdisc del dev $NETCARD root handle 1
	from torchvision import datasets, transforms, models
	import torch
	import torchvision
	from torch import optim
	import os
	import torch.nn.functional as F

	__n_threads = 4
	print('torch num threads:', __n_threads)
	torch.set_num_threads(__n_threads)
	import posix_ipc
	from torchvision import models
	import pickle
	import mmap
	from model.index import get_model_module

	def main():
	""""""
	m = models.resnet152(pretrained=True)
	model_b = pickle.dumps(m)