Skip to content

Instantly share code, notes, and snippets.

View sandeepkumar-skb's full-sized avatar
:octocat:
Get comfortable being uncomfortable

Sandeep Kumar Behera sandeepkumar-skb

:octocat:
Get comfortable being uncomfortable
View GitHub Profile
/* Floating Point 4x4 Matrix Multiplication */
.global _start
_start:
LDR R0, =matrix0
LDR R1, =matrix1
LDR R2, =matrix2
#include <chrono>
#include <iostream>
#include <vector>
#include <thread>
__global__ void do_nothing(int time_us, int clock_rate) {
clock_t start = clock64();
clock_t end;
for (;;) {
end = clock64();
#include <limits.h>
#include <unistd.h>
#include <csignal>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <iostream>
#include <sstream>
#include <stdexcept>
@sandeepkumar-skb
sandeepkumar-skb / device-prop-test.cu
Created September 16, 2020 18:34 — forked from teju85/device-prop-test.cu
Sample example to compare perf of cudaGetDeviceProperties and cudaDeviceGetAttribute
// Compiling and running this program:
// nvcc -std=c++11 device-prop-test.cu && ./a.out
#include <chrono>
#include <iostream>
using namespace std;
#define CUDA_CHECK(call) \
do { \
cudaError_t status = call; \
if(status != cudaSuccess) { \
@sandeepkumar-skb
sandeepkumar-skb / profile.py
Last active July 17, 2020 19:44 — forked from mkolod/profile.py
A CUDA memory profiler for pytorch
'''
Memory profiling utilities
'''
import gc
import inspect
import linecache
import os.path
import sys
import time
import threading
@sandeepkumar-skb
sandeepkumar-skb / profile.py
Created July 17, 2020 19:44 — forked from mkolod/profile.py
A CUDA memory profiler for pytorch
'''
Memory profiling utilities
'''
import gc
import inspect
import linecache
import os.path
import sys
import time
import threading
@sandeepkumar-skb
sandeepkumar-skb / onnx_tensorrt_inference.py
Created June 26, 2020 03:25 — forked from CasiaFan/onnx_tensorrt_inference.py
Acceleration inference of onnx model with TensorRT
import tensorrt as trt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import time
model_path = "model.onnx"
input_size = 32
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
import onnx
import sys
name = sys.argv[1]
model = onnx.load(name)
onnx.checker.check_model(model)
print(onnx.helper.printable_graph(model.graph))