Chris Sullivan csullivan

## uneven.py
import tvm
from tvm import te


def intrin_vadd(xo, m, n):
    x = te.placeholder((n,), name="vx")
    y = te.placeholder((n,), name="vy")
    if m % n == 0:
        body = lambda i: x[i] + y[i]
    else:

## kona_opencl_info.txt

1 OpenCL Platforms found

 CL_PLATFORM_NAME:      QUALCOMM Snapdragon(TM)
 CL_PLATFORM_VERSION:   OpenCL 2.0 QUALCOMM build: commit #ffa098c changeid #I3cb6626e76 Date: 01/16/20 Thu Local Branch:  Remote Branch:
OpenCL Device Info:

 1 devices found supporting OpenCL on: QUALCOMM Snapdragon(TM)

 ----------------------------------

## mace_conv2d_1x1_buffer_analysis.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                csullivan
                / mace_conv2d_1x1_buffer_analysis.ipynb
            
            
              Last active
              July 21, 2020 16:40
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## remove_class_attr_from_pbtxt.py
import argparse
import os

def get_files(directory):
    files = os.listdir(path=directory)
    pbtxts = []
    for f in files:
        if "pbtxt" in f:
            pbtxts.append(f)
    return pbtxts

## hetero_layer.diff
diff --git a/src/ngraph/runtime/gpu/gpu_backend.cpp b/src/ngraph/runtime/gpu/gpu_backend.cpp
index 2f1472e4..f54d4ce0 100644
--- a/src/ngraph/runtime/gpu/gpu_backend.cpp
+++ b/src/ngraph/runtime/gpu/gpu_backend.cpp
@@ -162,6 +162,7 @@ bool runtime::gpu::GPU_Backend::call(shared_ptr<Function> func,
                                      const vector<shared_ptr<runtime::Tensor>>& outputs,
                                      const vector<shared_ptr<runtime::Tensor>>& inputs)
 {
+    std::cout << "call " << func->get_name() << std::endl;
     bool rc = true;

## gnmt_ngraph.diff
diff --git a/src/ngraph/runtime/gpu/gpu_external_function.cpp b/src/ngraph/runtime/gpu/gpu_external_function.cpp
index 71cdd614..e836f16b 100644
--- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
+++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp
@@ -561,7 +561,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
         m_shared_context->m_primitive_emitter->get_memory_allocator());

     ngraph::pass::Manager pass_manager;
-#if CUDNN_VERSION >= 7200
+#if CUDNN_VERSION >= 9200

## use_type_info.diff
diff --git a/src/ngraph/runtime/gpu/cuda_emitter.cpp b/src/ngraph/runtime/gpu/cuda_emitter.cpp
index a9ef0e00..09fe458b 100644
--- a/src/ngraph/runtime/gpu/cuda_emitter.cpp
+++ b/src/ngraph/runtime/gpu/cuda_emitter.cpp
@@ -3096,11 +3096,11 @@ void* runtime::gpu::CUDAEmitter::get_init_reduce_val(std::string reduce_op, std:
 {
     if (reduce_op == "max")
     {
-        return m_host_parameters->min_by_datatype(data_type);
+        return TypeInfo::Get(data_type)->max_ptr();

## profile_small.txt
INFO:root:start with arguments Namespace(batch_size=64, benchmark=0, brightness=0, contrast=0, data_nthreads=4, data_train='/dataset/mxnet_imagenet/train.rec', data_train_idx='', data_val='/dataset/mxnet_imagenet/val.rec', data_val_idx='', disp_batches=20, dtype='float32', fill_value=127, gc_threshold=0.5, gc_type='none', gpus='0', image_shape='3,224,224', initializer='default', is_nnp=False, kv_store='device', load_epoch=None, loss='', lr=0.1, lr_factor=0.1, lr_step_epochs='30,60', macrobatch_size=0, max_crop_size=-1, max_random_area=1, max_random_aspect_ratio=0, max_random_h=0, max_random_l=0, max_random_rotate_angle=0, max_random_s=0, max_random_scale=1, max_random_shear_ratio=0, min_crop_size=-1, min_random_area=1, min_random_aspect_ratio=None, min_random_scale=1, model_prefix=None, mom=0.9, monitor=0, network='resnet', num_classes=1000, num_epochs=80, num_examples=1281167, num_layers=50, optimizer='sgd', pad_size=0, pca_noise=0, profile_server_suffix='', profile_worker_suffix='', random_crop=0, random_mi

## print_tensor_vals.diff
diff --git a/src/ngraph/runtime/cpu/cpu_external_function.cpp b/src/ngraph/runtime/cpu/cpu_external_function.cpp
index bc30f4d1..4fbd85aa 100644
--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -22,6 +22,7 @@
 #include <typeindex>
 #include <typeinfo>
 #include <unordered_map>
+#include <algorithm>


## rnn_updates.diff
diff --git a/src/ngraph/runtime/gpu/CMakeLists.txt b/src/ngraph/runtime/gpu/CMakeLists.txt
index 04d96608..aaad210c 100644
--- a/src/ngraph/runtime/gpu/CMakeLists.txt
+++ b/src/ngraph/runtime/gpu/CMakeLists.txt
@@ -42,7 +42,6 @@ set(SRC
     pass/tensor_memory_reservation.cpp
     gpu_kernel_args.cpp
     pass/gpu_rnn_fusion.cpp
-    op/lstm.cpp
     op/rnn.cpp
	import tvm
	from tvm import te


	def intrin_vadd(xo, m, n):
	x = te.placeholder((n,), name="vx")
	y = te.placeholder((n,), name="vy")
	if m % n == 0:
	body = lambda i: x[i] + y[i]
	else:

	1 OpenCL Platforms found

	CL_PLATFORM_NAME: QUALCOMM Snapdragon(TM)
	CL_PLATFORM_VERSION: OpenCL 2.0 QUALCOMM build: commit #ffa098c changeid #I3cb6626e76 Date: 01/16/20 Thu Local Branch: Remote Branch:
	OpenCL Device Info:

	1 devices found supporting OpenCL on: QUALCOMM Snapdragon(TM)

	----------------------------------
	import argparse
	import os

	def get_files(directory):
	files = os.listdir(path=directory)
	pbtxts = []
	for f in files:
	if "pbtxt" in f:
	pbtxts.append(f)
	return pbtxts
	diff --git a/src/ngraph/runtime/gpu/gpu_backend.cpp b/src/ngraph/runtime/gpu/gpu_backend.cpp
	index 2f1472e4..f54d4ce0 100644
	--- a/src/ngraph/runtime/gpu/gpu_backend.cpp
	+++ b/src/ngraph/runtime/gpu/gpu_backend.cpp
	@@ -162,6 +162,7 @@ bool runtime::gpu::GPU_Backend::call(shared_ptr<Function> func,
	const vector<shared_ptr<runtime::Tensor>>& outputs,
	const vector<shared_ptr<runtime::Tensor>>& inputs)
	{
	+ std::cout << "call " << func->get_name() << std::endl;
	bool rc = true;
	diff --git a/src/ngraph/runtime/gpu/gpu_external_function.cpp b/src/ngraph/runtime/gpu/gpu_external_function.cpp
	index 71cdd614..e836f16b 100644
	--- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
	+++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp
	@@ -561,7 +561,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
	m_shared_context->m_primitive_emitter->get_memory_allocator());

	ngraph::pass::Manager pass_manager;
	-#if CUDNN_VERSION >= 7200
	+#if CUDNN_VERSION >= 9200
	diff --git a/src/ngraph/runtime/gpu/cuda_emitter.cpp b/src/ngraph/runtime/gpu/cuda_emitter.cpp
	index a9ef0e00..09fe458b 100644
	--- a/src/ngraph/runtime/gpu/cuda_emitter.cpp
	+++ b/src/ngraph/runtime/gpu/cuda_emitter.cpp
	@@ -3096,11 +3096,11 @@ void* runtime::gpu::CUDAEmitter::get_init_reduce_val(std::string reduce_op, std:
	{
	if (reduce_op == "max")
	{
	- return m_host_parameters->min_by_datatype(data_type);
	+ return TypeInfo::Get(data_type)->max_ptr();
	diff --git a/src/ngraph/runtime/cpu/cpu_external_function.cpp b/src/ngraph/runtime/cpu/cpu_external_function.cpp
	index bc30f4d1..4fbd85aa 100644
	--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
	+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
	@@ -22,6 +22,7 @@
	#include <typeindex>
	#include <typeinfo>
	#include <unordered_map>
	+#include <algorithm>
	diff --git a/src/ngraph/runtime/gpu/CMakeLists.txt b/src/ngraph/runtime/gpu/CMakeLists.txt
	index 04d96608..aaad210c 100644
	--- a/src/ngraph/runtime/gpu/CMakeLists.txt
	+++ b/src/ngraph/runtime/gpu/CMakeLists.txt
	@@ -42,7 +42,6 @@ set(SRC
	pass/tensor_memory_reservation.cpp
	gpu_kernel_args.cpp
	pass/gpu_rnn_fusion.cpp
	- op/lstm.cpp
	op/rnn.cpp