Skip to content

Instantly share code, notes, and snippets.

@kaushikcfd
Last active June 26, 2022 06:54
Show Gist options
  • Save kaushikcfd/cd7668ac6b68315d588be52c13eb87fd to your computer and use it in GitHub Desktop.
Save kaushikcfd/cd7668ac6b68315d588be52c13eb87fd to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "dea29ba5",
"metadata": {},
"outputs": [],
"source": [
"import loopy as lp\n",
"from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9c739f94",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Old shape: (19, 19)\n",
"New shape: (100,)\n",
"#define lid(N) ((int) get_local_id(N))\n",
"#define gid(N) ((int) get_group_id(N))\n",
"#if __OPENCL_C_VERSION__ < 120\n",
"#pragma OPENCL EXTENSION cl_khr_fp64: enable\n",
"#endif\n",
"\n",
"__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global double const *__restrict__ a, __global double *__restrict__ out)\n",
"{\n",
" double tmp_reindexed[100];\n",
"\n",
" for (int j = 0; j <= 9; ++j)\n",
" for (int i = 0; i <= 9; ++i)\n",
" {\n",
" tmp_reindexed[((-2 + 2 * i >= 0 && -2 + 2 * j >= 0) ? 5 * 2 * i + (2 * j) / 2 : ((2 * j == 0 && -2 + 2 * i >= 0) ? 5 * 2 * i : ((2 * i == 0 && -2 + 2 * j >= 0) ? (2 * j) / 2 : 0)))] = a[10 * i + j];\n",
" out[10 * i + j] = tmp_reindexed[((-2 + 2 * i >= 0 && -2 + 2 * j >= 0) ? 5 * 2 * i + (2 * j) / 2 : ((2 * j == 0 && -2 + 2 * i >= 0) ? 5 * 2 * i : ((2 * i == 0 && -2 + 2 * j >= 0) ? (2 * j) / 2 : 0)))] * tmp_reindexed[((-2 + 2 * i >= 0 && -2 + 2 * j >= 0) ? 5 * 2 * i + (2 * j) / 2 : ((2 * j == 0 && -2 + 2 * i >= 0) ? 5 * 2 * i : ((2 * i == 0 && -2 + 2 * j >= 0) ? (2 * j) / 2 : 0)))];\n",
" }\n",
"}\n"
]
}
],
"source": [
"tunit = lp.make_kernel(\n",
" \"{[i, j]: 0<=i,j<10}\",\n",
" \"\"\"\n",
" <> tmp[2*i, 2*j] = a[i, j]\n",
" out[i, j] = tmp[2*i, 2*j] ** 2\n",
" \"\"\")\n",
"\n",
"\n",
"tunit = lp.add_dtypes(tunit, {\"a\": \"float64\"})\n",
"print(\"Old shape:\", tunit.default_entrypoint.temporary_variables[\"tmp\"].shape)\n",
"knl = lp.reindex_using_sehgir_loechner_scheme(tunit.default_entrypoint,\n",
" \"tmp\")\n",
"print(\"New shape:\", knl.temporary_variables[\"tmp_reindexed\"].shape)\n",
"\n",
"tunit = tunit.with_kernel(knl)\n",
"print(lp.generate_code_v2(tunit).device_code())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2803e962",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"#define lid(N) ((int) get_local_id(N))\n",
"#define gid(N) ((int) get_group_id(N))\n",
"#if __OPENCL_C_VERSION__ < 120\n",
"#pragma OPENCL EXTENSION cl_khr_fp64: enable\n",
"#endif\n",
"#define LOOPY_CALL_WITH_INTEGER_TYPES(MACRO_NAME) \\\n",
" MACRO_NAME(int8, char) \\\n",
" MACRO_NAME(int16, short) \\\n",
" MACRO_NAME(int32, int) \\\n",
" MACRO_NAME(int64, long)\n",
"#define LOOPY_DEFINE_FLOOR_DIV_POS_B(SUFFIX, TYPE) \\\n",
" inline TYPE loopy_floor_div_pos_b_##SUFFIX(TYPE a, TYPE b) \\\n",
" { \\\n",
" if (a<0) \\\n",
" a = a - (b-1); \\\n",
" return a/b; \\\n",
" }\n",
"LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV_POS_B)\n",
"#undef LOOPY_DEFINE_FLOOR_DIV_POS_B\n",
"#undef LOOPY_CALL_WITH_INTEGER_TYPES\n",
"\n",
"__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global double const *__restrict__ a, __global double *__restrict__ out)\n",
"{\n",
" double tmp_reindexed[55];\n",
"\n",
" for (int j = 0; j <= 9; ++j)\n",
" for (int i = 0; i <= j; ++i)\n",
" {\n",
" tmp_reindexed[((-1 + i >= 0 && -1 + -1 * i + j >= 0) ? (19 * i) / 2 + loopy_floor_div_pos_b_int32(-1 * i * i, 2) + j : ((-1 * i + j == 0 && -1 + i >= 0) ? (21 * i) / 2 + loopy_floor_div_pos_b_int32(-1 * i * i, 2) : ((i == 0 && -1 + j >= 0) ? j : 0)))] = a[10 * i + j];\n",
" out[10 * i + j] = tmp_reindexed[((-1 + i >= 0 && -1 + -1 * i + j >= 0) ? (19 * i) / 2 + loopy_floor_div_pos_b_int32(-1 * i * i, 2) + j : ((-1 * i + j == 0 && -1 + i >= 0) ? (21 * i) / 2 + loopy_floor_div_pos_b_int32(-1 * i * i, 2) : ((i == 0 && -1 + j >= 0) ? j : 0)))] * tmp_reindexed[((-1 + i >= 0 && -1 + -1 * i + j >= 0) ? (19 * i) / 2 + loopy_floor_div_pos_b_int32(-1 * i * i, 2) + j : ((-1 * i + j == 0 && -1 + i >= 0) ? (21 * i) / 2 + loopy_floor_div_pos_b_int32(-1 * i * i, 2) : ((i == 0 && -1 + j >= 0) ? j : 0)))];\n",
" }\n",
"}\n"
]
}
],
"source": [
"tunit = lp.make_kernel(\n",
" \"{[i, j]: 0<=i<=j<10}\",\n",
" \"\"\"\n",
" <> tmp[i, j] = a[i, j]\n",
" out[i, j] = tmp[i, j] ** 2\n",
" \"\"\")\n",
"\n",
"\n",
"tunit = lp.add_dtypes(tunit, {\"a\": \"float64\"})\n",
"knl = lp.reindex_using_sehgir_loechner_scheme(tunit.default_entrypoint,\n",
" \"tmp\")\n",
"tunit = tunit.with_kernel(knl)\n",
"print(lp.generate_code_v2(tunit).device_code())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0850b1a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment