Skip to content

Instantly share code, notes, and snippets.

View zarzen's full-sized avatar

Zhen Zhang zarzen

View GitHub Profile
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import datetime
import json
@zarzen
zarzen / launch.sh
Created April 16, 2021 18:04
nccl-tests mpirun launch
#!/bin/bash
NP=8
HOSTS="127.0.0.1:8"
MPI_HOME="/opt/amazon/openmpi"
TEST_BIN="/home/ubuntu/nccl-tests/build/all_reduce_perf"
MPI_BIN="${MPI_HOME}/bin/mpirun"
LD_LIBRARY_PATH="${MPI_HOME}/lib":$LD_LIBRARY_PATH
@zarzen
zarzen / limit_bandwidth.sh
Created January 4, 2021 22:51
根据IP限制带宽
#! /bin/bash
# 注意:这个脚本只有对本地有用,比如node0 上做了限制,但是iperf -s 在node0上运行
# node1 连接到 node0 通过iperf -c node0-ip -P5 这样的情况带宽无法得到限制
# 只能是node0 连接其他node时候这个限制有作用
# 原代码链接: https://serverfault.com/questions/191560/how-can-i-do-traffic-shaping-in-linux-by-ip
NETCARD=ens5 # 改这边
MAXBANDWIDTH=40000 # 选个大点的就行
# reinit
tc qdisc del dev $NETCARD root handle 1
@zarzen
zarzen / vgg16.py
Last active October 19, 2020 02:05
singleNodeTraining
from torchvision import datasets, transforms, models
import torch
import torchvision
from torch import optim
import os
import torch.nn.functional as F
__n_threads = 4
print('torch num threads:', __n_threads)
torch.set_num_threads(__n_threads)
@zarzen
zarzen / bandwidthTest.cu
Created October 7, 2020 17:13
PCIe bandwidth test
// CUDA runtime
#include <cuda_runtime.h>
#include <cuda.h>
#include <cassert>
#include <iostream>
#include <memory>
#include <thread>
#include <atomic>
import posix_ipc
from torchvision import models
import pickle
import mmap
from model.index import get_model_module
def main():
""""""
m = models.resnet152(pretrained=True)
model_b = pickle.dumps(m)
@zarzen
zarzen / memcpy_bw.cpp
Created April 1, 2020 20:43
multi-thread-memcpy
#include <iostream>
#include <chrono>
#include <sys/mman.h>
#include <cstring>
#include <thread>
#include <vector>
static const char integ_alphabet[] =
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
static const int integ_alphabet_length =
@zarzen
zarzen / scheduler.log
Created April 1, 2020 01:35
ps-lite-efa-log
DMLC_ENABLE_RDMA=fabric PS_VERBOSE=2 NUM_KEY_PER_SERVER=40 DMLC_ROLE=scheduler bash tests/local_multi_workers.sh 1 1 tests/test_benchmark 1024000 100 1
[01:34:16] src/postoffice.cc:19: Creating Van: fabric
[01:34:16] src/./fabric_van.h:653: This is a scheduler
[01:34:16] src/./zmq_van.h:66: BYTEPS_ZMQ_MAX_SOCKET set to 1024
[01:34:16] src/./zmq_van.h:71: BYTEPS_ZMQ_NTHREADS set to 4
[01:34:16] src/./fabric_van.h:336: ~FabricContext
[01:34:16] src/./fabric_van.h:330: Endpoint created: [-2,-128,0,0,0,0,0,0,0,83,63,-1,-2,-128,-40,-43,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,] readable endpoint = fi_addr_efa://[fe80::53:3fff:fe80:d8d5]:1
[01:34:16] src/./fabric_van.h:189: aligned to pagesize 4096
[01:34:16] src/./zmq_van.h:294: Start ZMQ recv thread
[01:34:16] src/van.cc:397: Bind to [role=scheduler, id=1, ip=127.0.0.1, port=8000, is_recovery=0, aux_id=-1]
@zarzen
zarzen / log1
Created March 23, 2020 00:16
fabric-efa.log
ec2-user@ip-172-31-11-29:~/efa-bad-practice/build$ FI_EFA_RECVWIN_SIZE=500000000 FI_LOG_LEVEL=Debug ./shm_worker efa-client 1 0 1000000000
libfabric:10452:core:core:fi_param_get_():280<info> variable perf_cntr=<not set>
libfabric:10452:core:core:fi_param_get_():280<info> variable hook=<not set>
libfabric:10452:core:core:fi_param_get_():280<info> variable mr_cache_max_size=<not set>
libfabric:10452:core:core:fi_param_get_():280<info> variable mr_cache_max_count=<not set>
libfabric:10452:core:core:fi_param_get_():280<info> variable mr_cache_merge_regions=<not set>
libfabric:10452:core:core:fi_param_get_():280<info> variable mr_cache_monitor=<not set>
libfabric:10452:core:mr:ofi_default_cache_size():56<info> default cache size=2754853148
libfabric:10452:core:core:fi_param_get_():280<info> variable provider=<not set>
libfabric:10452:core:core:fi_param_get_():280<info> variable provider_path=<not set>
libfabric:20881:core:core:fi_param_get_():280<info> variable perf_cntr=<not set>
libfabric:20881:core:core:fi_param_get_():280<info> variable hook=<not set>
libfabric:20881:core:core:fi_param_get_():280<info> variable mr_cache_max_size=<not set>
libfabric:20881:core:core:fi_param_get_():280<info> variable mr_cache_max_count=<not set>
libfabric:20881:core:core:fi_param_get_():280<info> variable mr_cache_merge_regions=<not set>
libfabric:20881:core:core:fi_param_get_():280<info> variable mr_cache_monitor=<not set>
libfabric:20881:core:mr:ofi_default_cache_size():56<info> default cache size=2754818048
libfabric:20881:core:core:fi_param_get_():280<info> variable provider=<not set>
libfabric:20881:core:core:fi_param_get_():280<info> variable provider_path=<not set>
libfabric:20881:core:core:ofi_register_provider():404<info> registering provider: shm (1.1)