Skip to content

Instantly share code, notes, and snippets.

@lcw
Last active June 8, 2016 22:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lcw/64a131874f2da5c0506127d9a49a43ba to your computer and use it in GitHub Desktop.
Save lcw/64a131874f2da5c0506127d9a49a43ba to your computer and use it in GitHub Desktop.
#define int_floor_div_pos_b(a,b) ( ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) )
task void init_knl_inner(uniform int32 const N, uniform float *uniform a)
{
if (-1 + -8 * ((uniform int32) taskIndex0) + -1 * (varying int32) programIndex + N >= 0)
for (uniform int32 i = 0; i <= -1 + N; ++i)
a[8 * i + (((varying int32) programIndex + ((uniform int32) taskIndex0) * 8) % 8) + 8 * N * (((varying int32) programIndex + ((uniform int32) taskIndex0) * 8) / 8)] = 17.0;
}
export void init_knl(uniform int32 const N, uniform float *uniform a)
{
assert(programCount == 8);
launch[int_floor_div_pos_b(7 + N, 8)] init_knl_inner(N, a);
}
$ ispc --target=avx2-i32x8 knl.ispc -o knl.o
knl.ispc:7:18: Performance Warning: Modulus operator with varying types is very inefficient.
a[8 * i + (((varying int32) programIndex + ((uniform int32) taskIndex0) * 8) % 8) + 8 * N * (((varying int32) programIndex + ((uniform int32) taskIndex0) * 8) / 8)] = 17.0;
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
knl.ispc:7:7: Performance Warning: Scatter required to store value.
a[8 * i + (((varying int32) programIndex + ((uniform int32) taskIndex0) * 8) % 8) + 8 * N * (((varying int32) programIndex + ((uniform int32) taskIndex0) * 8) / 8)] = 17.0;
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
import loopy as lp
import numpy as np
def gen_code(knl):
knl = lp.preprocess_kernel(knl)
knl = lp.get_one_scheduled_kernel(knl)
codegen_result = lp.generate_code_v2(knl)
return codegen_result.device_code() + "\n" + codegen_result.host_code()
# {{{ settings
if 0:
DOF_DTYPE = np.float64
VEC_WIDTH = 4
else:
DOF_DTYPE = np.float32
VEC_WIDTH = 8
# }}}
# {{{ kernel creation
def make_init_knl(target):
knl = lp.make_kernel(
"{ [i,j]: "
"0<=i<N and 0<=j<N }",
"""
a[i,j] = 17
""",
target=target,
name="init_knl",
default_order="F")
knl = lp.add_and_infer_dtypes(knl, {"a": DOF_DTYPE})
knl = lp.set_argument_order(knl, "N,a")
knl = lp.split_iname(knl, "j", VEC_WIDTH, inner_tag="l.0", outer_tag="g.0")
knl = lp.split_array_dim(knl, ("a", 1, "F"), VEC_WIDTH,
auto_split_inames=False)
knl = lp.tag_data_axes(knl, "a", "N1,N0,N2")
return knl
# }}}
def main():
target = lp.ISPCTarget()
init_knl = make_init_knl(target)
ispc_code = gen_code(init_knl)
print(ispc_code)
if __name__ == "__main__":
main()
# vim: foldmethod=marker
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment