inducer/cache-collision.py Secret

## cache-collision.py
import numpy as np
import pyopencl as cl
import pyopencl.array
import pyopencl.clrandom
import loopy as lp
lp.set_caching_enabled(False)
from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2
from warnings import filterwarnings, catch_warnings
filterwarnings('error', category=lp.LoopyWarning)
ctx = cl.create_some_context(interactive=False)
queue = cl.CommandQueue(ctx)
n = 16*16
x_vec_dev = cl.clrandom.rand(queue, n, dtype=np.float32)
y_vec_dev = cl.clrandom.rand(queue, n, dtype=np.float32)
z_vec_dev = cl.clrandom.rand(queue, n, dtype=np.float32)
a_mat_dev = cl.clrandom.rand(queue, (n, n), dtype=np.float32)
b_mat_dev = cl.clrandom.rand(queue, (n, n), dtype=np.float32)
x_vec_host = np.random.randn(n).astype(np.float32)
y_vec_host = np.random.randn(n).astype(np.float32)
# not a documented interface
import loopy.options
loopy.options.ALLOW_TERMINAL_COLORS = False
knl = lp.make_kernel(
    "{ [i]: 0<=i<n }",
    "out[i] = 2*a[i]")
knl = lp.set_options(knl, allow_terminal_colors=False)
print(knl)
evt, (out,) = knl(queue, a=x_vec_dev)
assert (out.get() == (2*x_vec_dev).get()).all()
knl = lp.set_options(knl, write_code=True)
evt, (out,) = knl(queue, a=x_vec_dev)
evt, (out,) = knl(queue, a=x_vec_host)
assert (out == (2*x_vec_host)).all()
knl = lp.set_options(knl, write_wrapper=True, write_code=False)
evt, (out,) = knl(queue, a=x_vec_host)


knl = lp.set_options(knl, build_options=["-cl-mad-enable"])
typed_knl = lp.add_dtypes(knl, dict(a=np.float32))
code = lp.generate_code_v2(typed_knl).device_code()
print(code)
header = str(lp.generate_header(typed_knl)[0])
print(header)
# WARNING: Incorrect.
knl = lp.make_kernel(
    "{ [i,j]: 0<=i,j<n }",
    """
    out[j,i] = a[i,j]
    out[i,j] = 2*out[i,j]
    """,
    [lp.GlobalArg("out", shape=lp.auto, is_input=False), ...])
# WARNING: Incorrect.
knl = lp.make_kernel(
    "{ [i,j]: 0<=i,j<n }",
    """
    out[j,i] = a[i,j] {id=transpose}
    out[i,j] = 2*out[i,j]  {dep=transpose}
    """,
    [lp.GlobalArg("out", shape=lp.auto, is_input=False), ...],
    name="transpose_and_dbl")
print(knl["transpose_and_dbl"].stringify(with_dependencies=True))

knl = lp.set_options(knl, write_code=True)
knl = lp.prioritize_loops(knl, "i,j")
evt, (out,) = knl(queue, a=a_mat_dev)
print((out.get() == a_mat_dev.get().T*2).all())
knl = lp.make_kernel(
    "{ [i,j,ii,jj]: 0<=i,j,ii,jj<n }",
    """
    out[j,i] = a[i,j] {id=transpose}
    out[ii,jj] = 2*out[ii,jj]  {dep=transpose}
    """,
    [lp.GlobalArg("out", shape=lp.auto, is_input=False), ...])
knl = lp.prioritize_loops(knl, "i,j,ii,jj")
knl = lp.set_options(knl, write_code=True)
evt, (out,) = knl(queue, a=a_mat_dev)
assert (out.get() == a_mat_dev.get().T*2).all()
knl = lp.make_kernel(
    "{ [i,j]: 0<=i,j<n }",
    """
    a[i,j] = 0
    """)
knl = lp.prioritize_loops(knl, "j,i")
knl = lp.set_options(knl, write_code=True)
evt, (out,) = knl(queue, a=a_mat_dev)


knl = lp.make_kernel(
    "{ [i]: 0<=i<n }",
    "a[i] = 0", assumptions="n>=1")
knl = lp.split_iname(knl, "i", 16)
knl = lp.prioritize_loops(knl, "i_outer,i_inner")
knl = lp.set_options(knl, write_code=True)
evt, (out,) = knl(queue, a=x_vec_dev) #THISONE
	import numpy as np
	import pyopencl as cl
	import pyopencl.array
	import pyopencl.clrandom
	import loopy as lp
	lp.set_caching_enabled(False)
	from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2
	from warnings import filterwarnings, catch_warnings
	filterwarnings('error', category=lp.LoopyWarning)
	ctx = cl.create_some_context(interactive=False)
	queue = cl.CommandQueue(ctx)
	n = 16*16
	x_vec_dev = cl.clrandom.rand(queue, n, dtype=np.float32)
	y_vec_dev = cl.clrandom.rand(queue, n, dtype=np.float32)
	z_vec_dev = cl.clrandom.rand(queue, n, dtype=np.float32)
	a_mat_dev = cl.clrandom.rand(queue, (n, n), dtype=np.float32)
	b_mat_dev = cl.clrandom.rand(queue, (n, n), dtype=np.float32)
	x_vec_host = np.random.randn(n).astype(np.float32)
	y_vec_host = np.random.randn(n).astype(np.float32)
	# not a documented interface
	import loopy.options
	loopy.options.ALLOW_TERMINAL_COLORS = False
	knl = lp.make_kernel(
	"{ [i]: 0<=i<n }",
	"out[i] = 2*a[i]")
	knl = lp.set_options(knl, allow_terminal_colors=False)
	print(knl)
	evt, (out,) = knl(queue, a=x_vec_dev)
	assert (out.get() == (2*x_vec_dev).get()).all()
	knl = lp.set_options(knl, write_code=True)
	evt, (out,) = knl(queue, a=x_vec_dev)
	evt, (out,) = knl(queue, a=x_vec_host)
	assert (out == (2*x_vec_host)).all()
	knl = lp.set_options(knl, write_wrapper=True, write_code=False)
	evt, (out,) = knl(queue, a=x_vec_host)


	knl = lp.set_options(knl, build_options=["-cl-mad-enable"])
	typed_knl = lp.add_dtypes(knl, dict(a=np.float32))
	code = lp.generate_code_v2(typed_knl).device_code()
	print(code)
	header = str(lp.generate_header(typed_knl)[0])
	print(header)
	# WARNING: Incorrect.
	knl = lp.make_kernel(
	"{ [i,j]: 0<=i,j<n }",
	"""
	out[j,i] = a[i,j]
	out[i,j] = 2*out[i,j]
	""",
	[lp.GlobalArg("out", shape=lp.auto, is_input=False), ...])
	# WARNING: Incorrect.
	knl = lp.make_kernel(
	"{ [i,j]: 0<=i,j<n }",
	"""
	out[j,i] = a[i,j] {id=transpose}
	out[i,j] = 2*out[i,j] {dep=transpose}
	""",
	[lp.GlobalArg("out", shape=lp.auto, is_input=False), ...],
	name="transpose_and_dbl")
	print(knl["transpose_and_dbl"].stringify(with_dependencies=True))

	knl = lp.set_options(knl, write_code=True)
	knl = lp.prioritize_loops(knl, "i,j")
	evt, (out,) = knl(queue, a=a_mat_dev)
	print((out.get() == a_mat_dev.get().T*2).all())
	knl = lp.make_kernel(
	"{ [i,j,ii,jj]: 0<=i,j,ii,jj<n }",
	"""
	out[j,i] = a[i,j] {id=transpose}
	out[ii,jj] = 2*out[ii,jj] {dep=transpose}
	""",
	[lp.GlobalArg("out", shape=lp.auto, is_input=False), ...])
	knl = lp.prioritize_loops(knl, "i,j,ii,jj")
	knl = lp.set_options(knl, write_code=True)
	evt, (out,) = knl(queue, a=a_mat_dev)
	assert (out.get() == a_mat_dev.get().T*2).all()
	knl = lp.make_kernel(
	"{ [i,j]: 0<=i,j<n }",
	"""
	a[i,j] = 0
	""")
	knl = lp.prioritize_loops(knl, "j,i")
	knl = lp.set_options(knl, write_code=True)
	evt, (out,) = knl(queue, a=a_mat_dev)


	knl = lp.make_kernel(
	"{ [i]: 0<=i<n }",
	"a[i] = 0", assumptions="n>=1")
	knl = lp.split_iname(knl, "i", 16)
	knl = lp.prioritize_loops(knl, "i_outer,i_inner")
	knl = lp.set_options(knl, write_code=True)
	evt, (out,) = knl(queue, a=x_vec_dev) #THISONE