Skip to content

Instantly share code, notes, and snippets.

View KellenSunderland's full-sized avatar

Kellen Sunderland KellenSunderland

View GitHub Profile
@KellenSunderland
KellenSunderland / Dockerfile
Last active September 25, 2017 19:17
Dockerfile to build a relatively portable armv6 linux binary.
# -*- mode: dockerfile -*-
# Dockerfile to build openblas for armv6
FROM dockcross/linux-armv6
ENV ARCH armv6l
ENV BUILD_OPTS "USE_BLAS=openblas USE_SSE=0 USE_OPENCV=0"
ENV CC /usr/bin/arm-linux-gnueabihf-gcc
ENV CXX /usr/bin/arm-linux-gnueabihf-g++
ENV FC /usr/bin/arm-linux-gnueabihf-gfortran
ENV HOSTCC gcc
@KellenSunderland
KellenSunderland / Dockerfile
Created September 25, 2017 16:38
MXNet Arm Cross Compilation Config and Dockerfile to build a relatively portable armv6 linux binary.
# -*- mode: dockerfile -*-
# Dockerfile to build libmxnet.so for armv6
FROM dockcross/linux-armv6
ENV ARCH armv6l
ENV BUILD_OPTS "USE_BLAS=openblas USE_SSE=0 USE_OPENCV=0"
ENV CC /usr/bin/arm-linux-gnueabihf-gcc
ENV CXX /usr/bin/arm-linux-gnueabihf-g++
ENV FC /usr/bin/arm-linux-gnueabihf-gfortran
ENV HOSTCC gcc
@KellenSunderland
KellenSunderland / Dockerfile.build.master.jetson
Last active August 19, 2023 16:12
Jetson MXNet build recipe
# -*- mode: dockerfile -*-
# Work in progress, some of the manual steps below will be fixed in a subsequent release.
# Dockerfile to build libmxnet.so, and a python wheel for the Jetson TX1 and TX2
# Builds from Github MXNet master branch
# Once complete copy artifacts from /work/build to target device.
# Install by running 'pip wheel name_of_wheel.whl' and copying the .so to a folder on your LD_LIBRARY_PATH
FROM nvidia/cuda:8.0-cudnn5-devel as cudabuilder
FROM dockcross/linux-arm64
@KellenSunderland
KellenSunderland / lighthead.py
Created February 2, 2018 14:19
Autotune repro.
import mxnet as mx
from collections import namedtuple
import numpy as np
import cv2
Batch = namedtuple('Batch', ['data'])
from scipy.misc import imread, imresize
import time
import os
from mxnet.gluon.model_zoo import vision
@KellenSunderland
KellenSunderland / main.cpp
Created February 14, 2018 08:47
Reduce Test
#include <iostream>
#include <cuda_runtime.h>
#include <cstring>
#include <chrono>
int gpu_reduce(int size, const dim3 &block, const dim3 &grid, size_t bytes, int *h_idata, int *h_odata,
int *d_idata, int *d_odata);
void cpu_reduce(int size, int *h_idata, int &cpu_sum) {
cpu_sum= 0;
@KellenSunderland
KellenSunderland / Dockerfile.cpu_clang
Created March 5, 2018 10:05
MKL docker container
FROM ubuntu:16.04
COPY install/ubuntu_install_core.sh /install/
RUN /install/ubuntu_install_core.sh
COPY install/ubuntu_install_python.sh /install/
RUN /install/ubuntu_install_python.sh
COPY install/ubuntu_install_scala.sh /install/
RUN /install/ubuntu_install_scala.sh
COPY install/ubuntu_install_r.sh /install/
RUN /install/ubuntu_install_r.sh
@KellenSunderland
KellenSunderland / Optimized
Created March 5, 2018 10:38
Docker layer optimizations
FROM ubuntu:16.04
COPY install/ubuntu_install_core.sh /install/
RUN /install/ubuntu_install_core.sh
COPY install/ubuntu_install_python.sh /install/
RUN /install/ubuntu_install_python.sh
COPY install/ubuntu_install_scala.sh /install/
RUN /install/ubuntu_install_scala.sh
COPY install/ubuntu_install_r.sh /install/
RUN /install/ubuntu_install_r.sh
@KellenSunderland
KellenSunderland / device_props.cu
Last active March 27, 2018 09:22
Cuda Device Check
#include <stdio.h>
int main() {
int nDevices;
cudaGetDeviceCount(&nDevices);
for (int i = 0; i < nDevices; i++) {
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, i);
printf("Device Number: %d\n", i);
warning: Target and debugger are in different PID namespaces; thread lists and other data are likely unreliable
0x00007f44580af98d in pthread_join (threadid=139928609789696, thread_return=0x0) at pthread_join.c:90
90 pthread_join.c: No such file or directory.
(gdb) bt
#0 0x00007f44580af98d in pthread_join (threadid=139928609789696, thread_return=0x0) at pthread_join.c:90
#1 0x00007f4450b1fb97 in std::thread::join() () from target:/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#2 0x00007f43f7a2f997 in mxnet::engine::ThreadPool::~ThreadPool (this=0x1af15540, __in_chrg=<optimized out>) at src/engine/./thread_pool.h:84
#3 std::default_delete<mxnet::engine::ThreadPool>::operator() (this=<optimized out>, __ptr=0x1af15540) at /usr/include/c++/5/bits/unique_ptr.h:76
#4 std::unique_ptr<mxnet::engine::ThreadPool, std::default_delete<mxnet::engine::ThreadPool> >::~unique_ptr (this=0x19516fe8, __in_chrg=<optimized out>)
at /usr/include/c++/5/bits/unique_ptr.h:236
@KellenSunderland
KellenSunderland / bt all from hung test
Last active March 29, 2018 17:15
Full trace of hang
Thread 20 (Thread 0x7f43ab149700 (LWP 95971)):
#0 pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1 0x00007f4450b1a91c in std::condition_variable::wait(std::unique_lock<std::mutex>&) () from target:/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#2 0x00007f43f7a2463f in std::condition_variable::wait<mxnet::engine::ThreadedEngine::WaitForVar(mxnet::Engine::VarHandle)::<lambda()> > (__p=..., __lock=..., this=0x30d6238)
at /usr/include/c++/5/condition_variable:98
#3 mxnet::engine::ThreadedEngine::WaitForVar (this=<optimized out>, var=0x3d6c318) at src/engine/threaded_engine.cc:387
#4 0x00007f43f922f411 in mxnet::op::CuDNNConvolutionOp<float>::SelectAlgo (this=this@entry=0x7f4328b6bdd0, ctx=..., in_shape=..., out_shape=...,
cudnn_forward_compute_type=cudnn_forward_compute_type@entry=CUDNN_DATA_FLOAT, cudnn_backward_compute_type=cudnn_backward_compute_type@entry=CUDNN_DATA_FLOAT)
at src/operator/nn/./cudnn/cudnn_convolution-inl.h:718
#5 0x00007f43f91efa10