Skip to content

Instantly share code, notes, and snippets.

Avatar

Roman Sokolkov r7vme

View GitHub Profile
View gist:c103dd227bd512a1be9302c2dafc7c82
info: reading kernel config from /proc/config.gz ...
Generally Necessary:
- cgroup hierarchy: properly mounted [/sys/fs/cgroup]
- CONFIG_NAMESPACES: enabled
- CONFIG_NET_NS: enabled
- CONFIG_PID_NS: enabled
- CONFIG_IPC_NS: enabled
- CONFIG_UTS_NS: enabled
- CONFIG_CGROUPS: enabled
View gist:4f738d03f64f2657d6f7b397df6dec08
FROM ubuntu:18.04
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENV ROS_DISTRO dashing
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -q -y \
curl \
gnupg2 \
lsb-release \
&& rm -rf /var/lib/apt/lists/*
View gist:45ad34e9c2e8d8cd1c01d94975ee6bee
Device 0: "NVIDIA Tegra X1"
CUDA Driver Version / Runtime Version 10.0 / 10.0
CUDA Capability Major/Minor version number: 5.3
Total amount of global memory: 3965 MBytes (4157145088 bytes)
( 1) Multiprocessors, (128) CUDA Cores/MP: 128 CUDA Cores
GPU Max Clock rate: 922 MHz (0.92 GHz)
Memory Clock rate: 13 Mhz
Memory Bus Width: 64-bit
L2 Cache Size: 262144 bytes
Maximum Texture Dimension Size (x,y,z) 1D=(65536), 2D=(65536, 65536), 3D=(4096, 4096, 4096)
View register_in_header.hpp
REGISTER_TENSORRT_PLUGIN(L2NormHelperPluginCreator);
View createPlugin.cpp
IPluginV2* L2NormHelperPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
{
const PluginField* fields = fc->fields;
for (int i = 0; i < fc->nbFields; ++i)
{
const char* attrName = fields[i].name;
if (!strcmp(attrName, "op_type"))
{
ASSERT(fields[i].type == PluginFieldType::kINT32);
mOpType = static_cast<int>(*(static_cast<const int*>(fields[i].data)));
View enqueue.cpp
int L2NormHelper::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)
{
const void* inputData = inputs[0];
void* outputData = outputs[0];
bool status = executeInference(stream, op_type, eps, batchSize, C, H, W, inputData, outputData);
ASSERT(status == 0);
return 0;
}
View constructor_deserialize.cpp
L2NormHelper::L2NormHelper(const void* buffer, size_t length)
{
const char *d = reinterpret_cast<const char*>(buffer), *a = d;
op_type = read<int>(d);
eps = read<float>(d);
C = read<int>(d);
H = read<int>(d);
W = read<int>(d);
ASSERT(d == a + length);
}
View max_kernel.cu
__global__ void maxKernel(
const int n,
const float eps,
const float* x,
float* y)
{
for (int i = blockIdx.x * blockDim.x + threadIdx.x;
i < n; i += gridDim.x * blockDim.x)
{
y[i] = fmaxf(x[i], eps);
View rsqrt_kernel.cu
__global__ void rsqrtKernel(
const int n,
const float* x,
float* y)
{
for (int i = blockIdx.x * blockDim.x + threadIdx.x;
i < n; i += gridDim.x * blockDim.x)
{
y[i] = rsqrtf(x[i]);
}
View sqrt_kernel.cu
__global__ void sqrtKernel(
const int n,
const float* x,
float* y)
{
for (int i = blockIdx.x * blockDim.x + threadIdx.x;
i < n; i += gridDim.x * blockDim.x)
{
y[i] = sqrtf(x[i]);
}
You can’t perform that action at this time.