Skip to content

Instantly share code, notes, and snippets.

View scheduler.log
DMLC_ENABLE_RDMA=fabric PS_VERBOSE=2 NUM_KEY_PER_SERVER=40 DMLC_ROLE=scheduler bash tests/local_multi_workers.sh 1 1 tests/test_benchmark 1024000 100 1
[01:34:16] src/postoffice.cc:19: Creating Van: fabric
[01:34:16] src/./fabric_van.h:653: This is a scheduler
[01:34:16] src/./zmq_van.h:66: BYTEPS_ZMQ_MAX_SOCKET set to 1024
[01:34:16] src/./zmq_van.h:71: BYTEPS_ZMQ_NTHREADS set to 4
[01:34:16] src/./fabric_van.h:336: ~FabricContext
[01:34:16] src/./fabric_van.h:330: Endpoint created: [-2,-128,0,0,0,0,0,0,0,83,63,-1,-2,-128,-40,-43,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,] readable endpoint = fi_addr_efa://[fe80::53:3fff:fe80:d8d5]:1
[01:34:16] src/./fabric_van.h:189: aligned to pagesize 4096
[01:34:16] src/./zmq_van.h:294: Start ZMQ recv thread
[01:34:16] src/van.cc:397: Bind to [role=scheduler, id=1, ip=127.0.0.1, port=8000, is_recovery=0, aux_id=-1]
@zarzen
zarzen / log1
Created Mar 23, 2020
fabric-efa.log
View log1
ec2-user@ip-172-31-11-29:~/efa-bad-practice/build$ FI_EFA_RECVWIN_SIZE=500000000 FI_LOG_LEVEL=Debug ./shm_worker efa-client 1 0 1000000000
libfabric:10452:core:core:fi_param_get_():280<info> variable perf_cntr=<not set>
libfabric:10452:core:core:fi_param_get_():280<info> variable hook=<not set>
libfabric:10452:core:core:fi_param_get_():280<info> variable mr_cache_max_size=<not set>
libfabric:10452:core:core:fi_param_get_():280<info> variable mr_cache_max_count=<not set>
libfabric:10452:core:core:fi_param_get_():280<info> variable mr_cache_merge_regions=<not set>
libfabric:10452:core:core:fi_param_get_():280<info> variable mr_cache_monitor=<not set>
libfabric:10452:core:mr:ofi_default_cache_size():56<info> default cache size=2754853148
libfabric:10452:core:core:fi_param_get_():280<info> variable provider=<not set>
libfabric:10452:core:core:fi_param_get_():280<info> variable provider_path=<not set>
View cli-fabric.log
libfabric:20881:core:core:fi_param_get_():280<info> variable perf_cntr=<not set>
libfabric:20881:core:core:fi_param_get_():280<info> variable hook=<not set>
libfabric:20881:core:core:fi_param_get_():280<info> variable mr_cache_max_size=<not set>
libfabric:20881:core:core:fi_param_get_():280<info> variable mr_cache_max_count=<not set>
libfabric:20881:core:core:fi_param_get_():280<info> variable mr_cache_merge_regions=<not set>
libfabric:20881:core:core:fi_param_get_():280<info> variable mr_cache_monitor=<not set>
libfabric:20881:core:mr:ofi_default_cache_size():56<info> default cache size=2754818048
libfabric:20881:core:core:fi_param_get_():280<info> variable provider=<not set>
libfabric:20881:core:core:fi_param_get_():280<info> variable provider_path=<not set>
libfabric:20881:core:core:ofi_register_provider():404<info> registering provider: shm (1.1)
@zarzen
zarzen / hello.cpp
Created Mar 5, 2020
libfabric-hello
View hello.cpp
/**********************************************************************
* Simple Hello Test
* for
* Open Fabric Interface 1.x
*
* Jianxin Xiong
* (jianxin.xiong@intel.com)
* 2013-2017
* ********************************************************************/
#include <iostream>
@zarzen
zarzen / mimic_training_log_analysis_results.txt
Created Jan 11, 2020
mimic distributed training results
View mimic_training_log_analysis_results.txt
{
"folder": "20191220-013118-40Gbit-100Gbit-8p3dn-resnet101-imagenet",
"reduced_params": 178054802.176,
"actual_model_size": 178618432,
"missed_param_percent": 0.003155496427154847,
"mimic_coll_ops_time": 129155.102,
"horovod_coll_ops_time": 142147.65263157897,
"mimic_dur * (model_size/reduced_size)": 129563.94055149835,
"horovod_overhead": 0.09712356714775058
}
@zarzen
zarzen / mimic_env_setup.sh
Last active Dec 24, 2019
mimic env setup script
View mimic_env_setup.sh
#!/bin/bash
AUTO_DIR="$HOME/autorun"
LOG_DIR="$HOME/autorun/chaokun_logs"
if [ ! -d "$LOG_DIR" ]; then
cd $AUTO_DIR && \
echo "downloading chaokun_logs.tar.gz" && \
wget -q https://dt-training.s3.amazonaws.com/chaokun_logs.tar.gz && \
echo "starting untar logs" && \
tar -zxf chaokun_logs.tar.gz && \
@zarzen
zarzen / eRPC-log.txt
Created Dec 17, 2019
eRPC compiling log
View eRPC-log.txt
Scanning dependencies of target erpc
[ 1%] Building CXX object CMakeFiles/erpc.dir/src/rpc_impl/rpc_ev_loop.cc.o
[ 3%] Building CXX object CMakeFiles/erpc.dir/src/nexus_impl/nexus.cc.o
[ 2%] Building CXX object CMakeFiles/erpc.dir/src/nexus_impl/nexus_sm_thread.cc.o
[ 5%] Building CXX object CMakeFiles/erpc.dir/src/rpc_impl/rpc_queues.cc.o
[ 6%] Building CXX object CMakeFiles/erpc.dir/src/rpc_impl/rpc_rfr.cc.o
[ 10%] Building CXX object CMakeFiles/erpc.dir/src/transport_impl/dpdk/dpdk_transport.cc.o
[ 11%] Building CXX object CMakeFiles/erpc.dir/src/transport_impl/transport.cc.o
[ 12%] Building CXX object CMakeFiles/erpc.dir/src/nexus_impl/nexus_bg_thread.cc.o
[ 21%] Building CXX object CMakeFiles/erpc.dir/src/rpc_impl/rpc_req.cc.o
@zarzen
zarzen / conf.txt
Created Dec 7, 2019
vscode vim plugin
View conf.txt
"vim.easymotion": true,
"vim.sneak": true,
"vim.incsearch": true,
"vim.useSystemClipboard": true,
"vim.useCtrlKeys": true,
"vim.hlsearch": true,
"vim.insertModeKeyBindings": [{
"before": ["f", "d"],
"after": ["<Esc>"]
}],
View dataset_links.txt
View bug_solv_soduku.py
class Solution:
"""
@param board: the sudoku puzzle
@return: nothing
"""
def solveSudoku(self, board):
# write your code here
# if not board: return
# print('start')
empty_cells = self.getEmptyXY(board)