Skip to content

Instantly share code, notes, and snippets.

@gvtulder
Created November 13, 2016 23:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gvtulder/e78b4e71e58826350e7afa19845db935 to your computer and use it in GitHub Desktop.
Save gvtulder/e78b4e71e58826350e7afa19845db935 to your computer and use it in GitHub Desktop.
Multiple in-place optimizations
from __future__ import print_function
import time
from collections import OrderedDict
import numpy as np
import theano
import theano.tensor as T
import theano.gpuarray
from theano.gpuarray.basic_ops import as_gpuarray_variable, gpu_contiguous
import theano.gpuarray.tests.config
class DummyOp(theano.Op):
__props__ = ('inplace_harmful', 'inplace_harmless')
def __init__(self, inplace_harmful=False, inplace_harmless=False):
self.inplace_harmful = inplace_harmful
self.inplace_harmless = inplace_harmless
self.destroy_map = {}
if self.inplace_harmful:
self.destroy_map[0] = [0]
if self.inplace_harmless:
self.destroy_map[1] = [1]
def make_node(self, x, y):
x = as_gpuarray_variable(x, None)
y = as_gpuarray_variable(y, None)
return theano.Apply(self, [x, y], [x.type(), x.type()])
def connection_pattern(self, node):
return [[True, True], [True, True]]
def grad(self, inputs, output_grads):
x, y = inputs
dz, d_ = output_grads
z, _ = self.make_node(*inputs).outputs
return DummyOpGrad()(x, z, dz)
def perform(self, node, inputs, outputs):
if node.op.inplace_harmful:
outputs[0][0] = inputs[0]
else:
outputs[0][0] = inputs[0].copy()
outputs[0][0][0] = 10
outputs[1][0] = inputs[1].copy()
class DummyOpGrad(theano.Op):
def make_node(self, x, z, dz):
x = as_gpuarray_variable(x, None)
z = as_gpuarray_variable(z, None)
dz = as_gpuarray_variable(dz, None)
return theano.Apply(self, [x, z, dz], [x.type(), x.type()])
def perform(self, node, inputs, outputs):
x, z, dz = [np.asarray(a) for a in inputs]
outputs[0][0] = inputs[0].copy()
outputs[1][0] = inputs[1].copy()
outputs[0][0][0] = 100 + x[0] + z[0] + dz[0]
@theano.gpuarray.opt.register_inplace('dummy_inplace_opt')
@theano.gof.local_optimizer([DummyOp], inplace=True)
def local_dummy_inplace_2(node):
if isinstance(node.op, DummyOp) and not node.op.inplace_harmful:
print('Attempting to replace with DummyOp inplace_harmful=True')
return DummyOp(inplace_harmful=True, inplace_harmless=node.op.inplace_harmless)(*node.inputs)
return False
@theano.gpuarray.opt.register_inplace('dummy_inplace_opt')
@theano.gof.local_optimizer([DummyOp], inplace=True)
def local_dummy_inplace_1(node):
if isinstance(node.op, DummyOp) and not node.op.inplace_harmless:
print('Attempting to replace with DummyOp inplace_harmless=True')
return DummyOp(inplace_harmful=node.op.inplace_harmful, inplace_harmless=True)(*node.inputs)
return False
def dummy_op(x, y):
x = gpu_contiguous(x)
y = gpu_contiguous(y)
return DummyOp()(x, y)
x = T.vector(name='x')
y = T.vector(name='y')
dz = T.vector(name='dz')
z, _ = dummy_op(x, y)
grad = T.grad(None, wrt=x, known_grads={z: dz})
print('Before optimization and compilation')
theano.printing.debugprint([z, grad])
print('')
mode_with_gpu = theano.gpuarray.tests.config.mode_with_gpu
mode_with_inplace = mode_with_gpu.including('dummy_inplace_opt')
mode_without_inplace = mode_with_gpu.excluding('dummy_inplace_opt')
for enable_inplace in (True, False):
print('Optimizations enabled?', enable_inplace)
if enable_inplace:
f = theano.function([x, y, dz], [z, grad], mode=mode_with_inplace)
else:
f = theano.function([x, y, dz], [z, grad], mode=mode_without_inplace)
print('After optimization and compilation')
theano.printing.debugprint(f)
print('')
print('Toposort order')
for i, n in enumerate(f.maker.fgraph.toposort()):
print('%2d: %s' % (i, str(n)))
print('')
res_z, res_grad = f([1], [2], [3])
print('input x = 1, dz = 3')
print('expected z = 10')
print('expected grad = 100 + x + z + dz = 100 + 1 + 10 + 3 = 114')
print('output z = %d' % np.array(res_z)[0])
print('output grad = %d' % np.array(res_grad)[0])
print('')
Before optimization and compilation
DummyOp{inplace_harmful=False, inplace_harmless=False}.0 [id A] ''
|GpuContiguous [id B] ''
| |GpuFromHost<None> [id C] ''
| |x [id D]
|GpuContiguous [id E] ''
|GpuFromHost<None> [id F] ''
|y [id G]
HostFromGpu(gpuarray) [id H] ''
|<__main__.DummyOpGrad object at 0x7fd6669252d0>.0 [id I] ''
|GpuContiguous [id B] ''
|DummyOp{inplace_harmful=False, inplace_harmless=False}.0 [id J] ''
| |GpuContiguous [id B] ''
| |GpuContiguous [id E] ''
|GpuFromHost<None> [id K] ''
|dz [id L]
Optimizations enabled? True
Attempting to replace with DummyOp inplace_harmless=True
Attempting to replace with DummyOp inplace_harmful=True
After optimization and compilation
DummyOp{inplace_harmful=True, inplace_harmless=True}.0 [id A] '' 5
|GpuContiguous [id B] '' 4
| |GpuFromHost<None> [id C] '' 2
| |x [id D]
|GpuContiguous [id E] '' 3
|GpuFromHost<None> [id F] '' 1
|y [id G]
HostFromGpu(gpuarray) [id H] '' 7
|<__main__.DummyOpGrad object at 0x7fd6669252d0>.0 [id I] '' 6
|GpuContiguous [id B] '' 4
|DummyOp{inplace_harmful=True, inplace_harmless=True}.0 [id A] '' 5
|GpuFromHost<None> [id J] '' 0
|dz [id K]
Toposort order
0: GpuFromHost<None>(dz)
1: GpuFromHost<None>(y)
2: GpuFromHost<None>(x)
3: GpuContiguous(GpuFromHost<None>.0)
4: GpuContiguous(GpuFromHost<None>.0)
5: DummyOp{inplace_harmful=True, inplace_harmless=True}(GpuContiguous.0, GpuContiguous.0)
6: <__main__.DummyOpGrad object at 0x7fd6669252d0>(GpuContiguous.0, DummyOp{inplace_harmful=True, inplace_harmless=True}.0, GpuFromHost<None>.0)
7: HostFromGpu(gpuarray)(<__main__.DummyOpGrad object at 0x7fd6669252d0>.0)
input x = 1, dz = 3
expected z = 10
expected grad = 100 + x + z + dz = 100 + 1 + 10 + 3 = 114
output z = 10
output grad = 123
Optimizations enabled? False
After optimization and compilation
DummyOp{inplace_harmful=False, inplace_harmless=False}.0 [id A] '' 5
|GpuContiguous [id B] '' 4
| |GpuFromHost<None> [id C] '' 2
| |x [id D]
|GpuContiguous [id E] '' 3
|GpuFromHost<None> [id F] '' 1
|y [id G]
HostFromGpu(gpuarray) [id H] '' 7
|<__main__.DummyOpGrad object at 0x7fd6669252d0>.0 [id I] '' 6
|GpuContiguous [id B] '' 4
|DummyOp{inplace_harmful=False, inplace_harmless=False}.0 [id A] '' 5
|GpuFromHost<None> [id J] '' 0
|dz [id K]
Toposort order
0: GpuFromHost<None>(dz)
1: GpuFromHost<None>(y)
2: GpuFromHost<None>(x)
3: GpuContiguous(GpuFromHost<None>.0)
4: GpuContiguous(GpuFromHost<None>.0)
5: DummyOp{inplace_harmful=False, inplace_harmless=False}(GpuContiguous.0, GpuContiguous.0)
6: <__main__.DummyOpGrad object at 0x7fd6669252d0>(GpuContiguous.0, DummyOp{inplace_harmful=False, inplace_harmless=False}.0, GpuFromHost<None>.0)
7: HostFromGpu(gpuarray)(<__main__.DummyOpGrad object at 0x7fd6669252d0>.0)
input x = 1, dz = 3
expected z = 10
expected grad = 100 + x + z + dz = 100 + 1 + 10 + 3 = 114
output z = 10
output grad = 114
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment