Skip to content

Instantly share code, notes, and snippets.

View KellenSunderland's full-sized avatar

Kellen Sunderland KellenSunderland

View GitHub Profile
FILE SIZE VM SIZE
-------------- --------------
73.8% 279Mi 75.2% 279Mi [section .nv_fatbin]
7.5% 28.5Mi 4.9% 18.4Mi [46244 Others]
1.1% 4.03Mi 1.0% 3.58Mi mshadow::MapExp<>()
0.9% 3.56Mi 1.0% 3.56Mi __sti____cudaRegisterAll()
0.9% 3.37Mi 0.9% 3.36Mi mxnet::op::ElemwiseBinaryOp::RspRspOp<>()
0.0% 2.70Ki 0.7% 2.78Mi precalc_xorwow_matrix
0.0% 2.71Ki 0.7% 2.78Mi precalc_xorwow_offset_matrix
0.7% 2.78Mi 0.7% 2.78Mi [section .gcc_except_table]
@KellenSunderland
KellenSunderland / TensorRTCompilation.MD
Last active November 16, 2019 00:30
MXNet with TensorRT support compilation guide

FROM UBUNTU 18.04

sudo apt-get install tmux htop libssl-dev
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.0.130-1_amd64.deb
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub && sudo apt update
sudo dpkg -i cuda-repo-ubuntu1804_10.0.130-1_amd64.deb
sudo apt update
sudo apt install -y cuda
wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
sudo dpkg -i nvidia-machine-learning-repo-*.deb
@KellenSunderland
KellenSunderland / reproduce.sh
Last active May 2, 2019 01:24
Reproduce Regression
#!/bin/bash
wget https://gist.githubusercontent.com/KellenSunderland/219fc25d7f796ae5442cfe162d27cc9c/raw/d9015ebd878391930b5ba186ab4ba77042eee943/Dockerfile
nvidia-docker build -f Dockerfile . -t mxnet/regression
nvidia-docker run -ti -e "PYTHONPATH=/work/mxnet-v1.4.0-cmake/python" mxnet/regression python2 run_model.py
nvidia-docker run -ti -e "PYTHONPATH=/work/mxnet-v1.3.0-cmake/python" mxnet/regression python2 run_model.py
nvidia-docker run -ti -e "PYTHONPATH=/work/mxnet-v1.2.0-cmake/python" mxnet/regression python2 run_model.py
nvidia-docker run -ti -e "PYTHONPATH=/work/mxnet-v1.4.0-cmake-profiler/python" mxnet/regression python2 run_model.py
nvidia-docker run -ti -e "PYTHONPATH=/work/mxnet-v1.4.0-make/python" mxnet/regression python2 run_model.py
@KellenSunderland
KellenSunderland / Dockerfile
Last active May 2, 2019 00:32
Reproduce Regression
FROM kellens/build.ubuntu_gpu_tensorrt:latest
WORKDIR /work
RUN wget https://gist.githubusercontent.com/KellenSunderland/686522830475dfc7073b5d7a97e89d24/raw/a0b12e63b5fbf51f4c2794a9a8dae22a2ac8cab1/run_model.py && \
wget https://raw.githubusercontent.com/apache/incubator-mxnet/master/example/fcn-xs/symbol_fcnxs.py
RUN git clone --recursive https://github.com/apache/incubator-mxnet.git --branch v1.4.x mxnet-v1.4.0-cmake && \
mkdir -p /work/mxnet-v1.4.0-cmake/lib/ && \
git clone --recursive https://github.com/apache/incubator-mxnet.git --branch v1.4.x mxnet-v1.4.0-make && \
@KellenSunderland
KellenSunderland / config.mk
Created April 29, 2019 21:25
Regression config
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
@KellenSunderland
KellenSunderland / run_model.py
Created April 29, 2019 21:16
MXNet Benchmarking Script
import mxnet as mx
import numpy as np
import importlib
from collections import namedtuple
import time
def runMx(ctx,mod,data,num_batches,runType):
print('%s MXNet' % (runType))
Batch = namedtuple('Batch', ['data'])
t = 0
@KellenSunderland
KellenSunderland / main.cpp
Last active January 13, 2019 17:11
rvalue test
#include <iostream>
#include <utility>
#include <vector>
#include <cstring>
using namespace std;
class MyType {
public:
@KellenSunderland
KellenSunderland / bt all from hung test
Last active March 29, 2018 17:15
Full trace of hang
Thread 20 (Thread 0x7f43ab149700 (LWP 95971)):
#0 pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1 0x00007f4450b1a91c in std::condition_variable::wait(std::unique_lock<std::mutex>&) () from target:/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#2 0x00007f43f7a2463f in std::condition_variable::wait<mxnet::engine::ThreadedEngine::WaitForVar(mxnet::Engine::VarHandle)::<lambda()> > (__p=..., __lock=..., this=0x30d6238)
at /usr/include/c++/5/condition_variable:98
#3 mxnet::engine::ThreadedEngine::WaitForVar (this=<optimized out>, var=0x3d6c318) at src/engine/threaded_engine.cc:387
#4 0x00007f43f922f411 in mxnet::op::CuDNNConvolutionOp<float>::SelectAlgo (this=this@entry=0x7f4328b6bdd0, ctx=..., in_shape=..., out_shape=...,
cudnn_forward_compute_type=cudnn_forward_compute_type@entry=CUDNN_DATA_FLOAT, cudnn_backward_compute_type=cudnn_backward_compute_type@entry=CUDNN_DATA_FLOAT)
at src/operator/nn/./cudnn/cudnn_convolution-inl.h:718
#5 0x00007f43f91efa10
warning: Target and debugger are in different PID namespaces; thread lists and other data are likely unreliable
0x00007f44580af98d in pthread_join (threadid=139928609789696, thread_return=0x0) at pthread_join.c:90
90 pthread_join.c: No such file or directory.
(gdb) bt
#0 0x00007f44580af98d in pthread_join (threadid=139928609789696, thread_return=0x0) at pthread_join.c:90
#1 0x00007f4450b1fb97 in std::thread::join() () from target:/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#2 0x00007f43f7a2f997 in mxnet::engine::ThreadPool::~ThreadPool (this=0x1af15540, __in_chrg=<optimized out>) at src/engine/./thread_pool.h:84
#3 std::default_delete<mxnet::engine::ThreadPool>::operator() (this=<optimized out>, __ptr=0x1af15540) at /usr/include/c++/5/bits/unique_ptr.h:76
#4 std::unique_ptr<mxnet::engine::ThreadPool, std::default_delete<mxnet::engine::ThreadPool> >::~unique_ptr (this=0x19516fe8, __in_chrg=<optimized out>)
at /usr/include/c++/5/bits/unique_ptr.h:236
@KellenSunderland
KellenSunderland / device_props.cu
Last active March 27, 2018 09:22
Cuda Device Check
#include <stdio.h>
int main() {
int nDevices;
cudaGetDeviceCount(&nDevices);
for (int i = 0; i < nDevices; i++) {
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, i);
printf("Device Number: %d\n", i);