Skip to content

Instantly share code, notes, and snippets.

View csullivan's full-sized avatar

Chris Sullivan csullivan

  • OctoML
  • Portland
View GitHub Profile
import tvm
from tvm import te
def intrin_vadd(xo, m, n):
x = te.placeholder((n,), name="vx")
y = te.placeholder((n,), name="vy")
if m % n == 0:
body = lambda i: x[i] + y[i]
else:
@csullivan
csullivan / kona_opencl_info.txt
Created July 31, 2020 22:56
Snapdragon 865 HDK OpenCL Info.
1 OpenCL Platforms found
CL_PLATFORM_NAME: QUALCOMM Snapdragon(TM)
CL_PLATFORM_VERSION: OpenCL 2.0 QUALCOMM build: commit #ffa098c changeid #I3cb6626e76 Date: 01/16/20 Thu Local Branch: Remote Branch:
OpenCL Device Info:
1 devices found supporting OpenCL on: QUALCOMM Snapdragon(TM)
----------------------------------
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import argparse
import os
def get_files(directory):
files = os.listdir(path=directory)
pbtxts = []
for f in files:
if "pbtxt" in f:
pbtxts.append(f)
return pbtxts
diff --git a/src/ngraph/runtime/gpu/gpu_backend.cpp b/src/ngraph/runtime/gpu/gpu_backend.cpp
index 2f1472e4..f54d4ce0 100644
--- a/src/ngraph/runtime/gpu/gpu_backend.cpp
+++ b/src/ngraph/runtime/gpu/gpu_backend.cpp
@@ -162,6 +162,7 @@ bool runtime::gpu::GPU_Backend::call(shared_ptr<Function> func,
const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
{
+ std::cout << "call " << func->get_name() << std::endl;
bool rc = true;
diff --git a/src/ngraph/runtime/gpu/gpu_external_function.cpp b/src/ngraph/runtime/gpu/gpu_external_function.cpp
index 71cdd614..e836f16b 100644
--- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
+++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp
@@ -561,7 +561,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
m_shared_context->m_primitive_emitter->get_memory_allocator());
ngraph::pass::Manager pass_manager;
-#if CUDNN_VERSION >= 7200
+#if CUDNN_VERSION >= 9200
diff --git a/src/ngraph/runtime/gpu/cuda_emitter.cpp b/src/ngraph/runtime/gpu/cuda_emitter.cpp
index a9ef0e00..09fe458b 100644
--- a/src/ngraph/runtime/gpu/cuda_emitter.cpp
+++ b/src/ngraph/runtime/gpu/cuda_emitter.cpp
@@ -3096,11 +3096,11 @@ void* runtime::gpu::CUDAEmitter::get_init_reduce_val(std::string reduce_op, std:
{
if (reduce_op == "max")
{
- return m_host_parameters->min_by_datatype(data_type);
+ return TypeInfo::Get(data_type)->max_ptr();
INFO:root:start with arguments Namespace(batch_size=64, benchmark=0, brightness=0, contrast=0, data_nthreads=4, data_train='/dataset/mxnet_imagenet/train.rec', data_train_idx='', data_val='/dataset/mxnet_imagenet/val.rec', data_val_idx='', disp_batches=20, dtype='float32', fill_value=127, gc_threshold=0.5, gc_type='none', gpus='0', image_shape='3,224,224', initializer='default', is_nnp=False, kv_store='device', load_epoch=None, loss='', lr=0.1, lr_factor=0.1, lr_step_epochs='30,60', macrobatch_size=0, max_crop_size=-1, max_random_area=1, max_random_aspect_ratio=0, max_random_h=0, max_random_l=0, max_random_rotate_angle=0, max_random_s=0, max_random_scale=1, max_random_shear_ratio=0, min_crop_size=-1, min_random_area=1, min_random_aspect_ratio=None, min_random_scale=1, model_prefix=None, mom=0.9, monitor=0, network='resnet', num_classes=1000, num_epochs=80, num_examples=1281167, num_layers=50, optimizer='sgd', pad_size=0, pca_noise=0, profile_server_suffix='', profile_worker_suffix='', random_crop=0, random_mi
diff --git a/src/ngraph/runtime/cpu/cpu_external_function.cpp b/src/ngraph/runtime/cpu/cpu_external_function.cpp
index bc30f4d1..4fbd85aa 100644
--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -22,6 +22,7 @@
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
+#include <algorithm>
diff --git a/src/ngraph/runtime/gpu/CMakeLists.txt b/src/ngraph/runtime/gpu/CMakeLists.txt
index 04d96608..aaad210c 100644
--- a/src/ngraph/runtime/gpu/CMakeLists.txt
+++ b/src/ngraph/runtime/gpu/CMakeLists.txt
@@ -42,7 +42,6 @@ set(SRC
pass/tensor_memory_reservation.cpp
gpu_kernel_args.cpp
pass/gpu_rnn_fusion.cpp
- op/lstm.cpp
op/rnn.cpp