eqy

## conv.py
import torch
import time

torch.backends.cudnn.benchmark = True

iters = 10

conv = torch.nn.Conv2d(64, 64, 3, 3, groups=64, dtype=torch.half, device='cuda')
convb = torch.nn.Conv2d(64, 64, 3, 3, groups=64, dtype=torch.bfloat16, device='cuda')
data = torch.randn(16, 64, 1024, 1024, dtype=torch.half, device='cuda')

## nsight.sh
# This isn't supposed to run as a bash script, i named it with ".sh" for syntax highlighting.

# https://developer.nvidia.com/nsight-systems
# https://docs.nvidia.com/nsight-systems/profiling/index.html

# My preferred nsys (command line executable used to create profiles) commands
#
# In your script, write
# torch.cuda.nvtx.range_push("region name")
# ...

## temp.sh
#!/bin/bash
PYTHONPATH=/tvm/python:$PYTHONPATH && python3 -m tvm.exec.rpc_tracker --host 0.0.0.0 --port 9190 &
while true; do
  res=$(PYTHONPATH=/tvm/python:$PYTHONPATH && python3 -m tvm.exec.query_rpc_tracker --host 0.0.0.0 --port 9190 2>&1 | grep 'Cannot connect to tracker')
  if [ "$res" == "" ]; then
    echo "OK..."
  else
    echo "RESTARTING @ " $(date)
    PYTHONPATH=/tvm/python:$PYTHONPATH && python3 -m tvm.exec.rpc_tracker --host 0.0.0.0 --port 9190 &
  fi

## prepare_model.py
import logging

import mxnet as mx
import tvm
import nnvm.frontend
import nnvm.compiler
from mxnet import gluon
from mxnet.gluon.model_zoo import vision
from tvm import relay
from tvm.contrib import ndk

## gist:2a07911ea9ceb1276c9379a40af15d7c
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#

## tune_nnvm_cuda.py
"""
Auto-tuning a convolutional network for NVIDIA GPU
====================================================
**Author**: `Lianmin Zheng <https://https://github.com/merrymercy>`_

Auto-tuning for specific devices and workloads is critical for getting the
best performance. This is a tutorial on how to tune a whole convolutional
network for NVIDIA GPU.

The operator implementation for NVIDIA GPU in TVM is written in template form.

## load_slice_outline.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                eqy
                / load_slice_outline.md
            
            
              Last active
              November 7, 2015 05:08
            
          
    Motivation

Instruction Slice Table


What is this used for?
Instructions in the IST get sent to another queue
Define address generating instructions
128 entry 2-way set-associative least-recently-used replacement policy
	import torch
	import time

	torch.backends.cudnn.benchmark = True

	iters = 10

	conv = torch.nn.Conv2d(64, 64, 3, 3, groups=64, dtype=torch.half, device='cuda')
	convb = torch.nn.Conv2d(64, 64, 3, 3, groups=64, dtype=torch.bfloat16, device='cuda')
	data = torch.randn(16, 64, 1024, 1024, dtype=torch.half, device='cuda')
	# This isn't supposed to run as a bash script, i named it with ".sh" for syntax highlighting.

	# https://developer.nvidia.com/nsight-systems
	# https://docs.nvidia.com/nsight-systems/profiling/index.html

	# My preferred nsys (command line executable used to create profiles) commands
	#
	# In your script, write
	# torch.cuda.nvtx.range_push("region name")
	# ...
	#!/bin/bash
	PYTHONPATH=/tvm/python:$PYTHONPATH && python3 -m tvm.exec.rpc_tracker --host 0.0.0.0 --port 9190 &
	while true; do
	res=$(PYTHONPATH=/tvm/python:$PYTHONPATH && python3 -m tvm.exec.query_rpc_tracker --host 0.0.0.0 --port 9190 2>&1 \| grep 'Cannot connect to tracker')
	if [ "$res" == "" ]; then
	echo "OK..."
	else
	echo "RESTARTING @ " $(date)
	PYTHONPATH=/tvm/python:$PYTHONPATH && python3 -m tvm.exec.rpc_tracker --host 0.0.0.0 --port 9190 &
	fi
	import logging

	import mxnet as mx
	import tvm
	import nnvm.frontend
	import nnvm.compiler
	from mxnet import gluon
	from mxnet.gluon.model_zoo import vision
	from tvm import relay
	from tvm.contrib import ndk
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	"""
	Auto-tuning a convolutional network for NVIDIA GPU
	====================================================
	Author: `Lianmin Zheng <https://https://github.com/merrymercy>`_

	Auto-tuning for specific devices and workloads is critical for getting the
	best performance. This is a tutorial on how to tune a whole convolutional
	network for NVIDIA GPU.

	The operator implementation for NVIDIA GPU in TVM is written in template form.