Skip to content

Instantly share code, notes, and snippets.

@rrnewton
Last active August 29, 2015 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rrnewton/9301898 to your computer and use it in GitHub Desktop.
Save rrnewton/9301898 to your computer and use it in GitHub Desktop.
Fission 2 way vs 4 way, sequential C
#include "stdlib.h"
#include "stdio.h"
#include "stdint.h"
#include "stdbool.h"
#include "math.h"
#define max(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a > _b ? _a : _b; })
#define min(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a < _b ? _a : _b; })
void build_evt144(int64_t inSize, int64_t inStride,
double* tmp_0_1266, double* tmp_0_1267, double* tmp_0_1268,
double v0111, double v0112, double v0113, double* aLt2_057,
double* aLt2_058, double* aLt2_059)
{
// Fold loop, reduction variable(s): [(v0111,Default,TDouble),(v0112,Default,TDouble),(v0113,Default,TDouble)]
// First, some temporaries to back up the inital state
// (we're going to stomp on the reduction vars / formal params):
double eetmp0 = v0111;
double eetmp1 = v0112;
double eetmp2 = v0113;
for (int i3 = 0; (i3 < inSize); i3 = (i3 + inStride))
{
// Fresh round, new accumulator with the identity:
// We shadow the formal params as a hack:
double v0111 = eetmp0;
double v0112 = eetmp1;
double v0113 = eetmp2;
for (int i4 = i3; (i4 < (i3 + inStride)); i4 = (i4 + 1))
{
// (1) create input: we run the generator to produce one or more inputs
int64_t flatidx23 = i4;
double gensym_102;
double gensym_103;
double gensym_104;
int64_t gensym_28;
int64_t gensym_29;
int64_t gensym_30;
int64_t gensym_78;
int64_t gensym_79;
int64_t gensym_80;
int64_t gensym_81;
int64_t gensym_33;
double gensym_82;
double gensym_83;
double gensym_84;
int64_t gensym_85;
int64_t gensym_86;
int64_t gensym_35;
double gensym_87;
double gensym_88;
double gensym_89;
double e5;
double e6;
double e7;
double e8;
double e9;
double e10;
double e11;
double e12;
double e13;
double e14;
double e15;
double e16;
gensym_28 = (flatidx23 / (int64_t)((int64_t)(7500)));
gensym_29 = (flatidx23 % (int64_t)((int64_t)(7500)));
gensym_30 = (int64_t)((int64_t)(0));
gensym_78 = gensym_29;
gensym_79 = gensym_28;
gensym_80 = gensym_78;
gensym_81 = gensym_79;
gensym_33 = gensym_81;
gensym_82 = aLt2_057[gensym_33];
gensym_83 = aLt2_058[gensym_33];
gensym_84 = aLt2_059[gensym_33];
gensym_85 = gensym_78;
gensym_86 = gensym_79;
gensym_35 = gensym_85;
gensym_87 = aLt2_057[gensym_35];
gensym_88 = aLt2_058[gensym_35];
gensym_89 = aLt2_059[gensym_35];
e5 = gensym_88;
e6 = gensym_83;
e7 = gensym_89;
e8 = gensym_84;
e9 = gensym_82;
e10 = gensym_87;
if ((!(((e9 == e10) && ((e6 == e5) && (e8 == e7))))))
{
e11 = (e7 - e8);
e12 = (e10 - e9);
e13 = (e5 - e6);
e14 = (((e12 * e12) + (e13 * e13)) + (e11 * e11));
e15 = (sqrt(e14));
e16 = (((double)(1.0) * (double)(1.0)) / e14);
gensym_102 = ((e16 * e12) / e15);
gensym_103 = ((e16 * e13) / e15);
gensym_104 = ((e16 * e11) / e15);
}
else
{
gensym_102 = (double)(0.0);
gensym_103 = (double)(0.0);
gensym_104 = (double)(0.0);
}
// (2) do the reduction with the resulting values ([gensym_102,gensym_103,gensym_104])
double v1114 = gensym_102;
double v1115 = gensym_103;
double v1116 = gensym_104;
double gensym_117;
double gensym_118;
double gensym_119;
gensym_117 = (v0111 + v1114);
gensym_118 = (v0112 + v1115);
gensym_119 = (v0113 + v1116);
v0111 = gensym_117;
v0112 = gensym_118;
v0113 = gensym_119;
}
// Write the single reduction result to each output array:
tmp_0_1266[(i3 / inStride)] = v0111;
tmp_0_1267[(i3 / inStride)] = v0112;
tmp_0_1268[(i3 / inStride)] = v0113;
}
}
void build_evt145(int64_t inSize, int64_t inStride,
double* tmp_0_1369, double* tmp_0_1370, double* tmp_0_1371,
double v0123, double v0124, double v0125, double* aLt2_160,
double* aLt2_161, double* aLt2_162)
{
// Fold loop, reduction variable(s): [(v0123,Default,TDouble),(v0124,Default,TDouble),(v0125,Default,TDouble)]
// First, some temporaries to back up the inital state
// (we're going to stomp on the reduction vars / formal params):
double eetmp0 = v0123;
double eetmp1 = v0124;
double eetmp2 = v0125;
for (int i3 = 0; (i3 < inSize); i3 = (i3 + inStride))
{
// Fresh round, new accumulator with the identity:
// We shadow the formal params as a hack:
double v0123 = eetmp0;
double v0124 = eetmp1;
double v0125 = eetmp2;
for (int i4 = i3; (i4 < (i3 + inStride)); i4 = (i4 + 1))
{
// (1) create input: we run the generator to produce one or more inputs
int64_t flatidx36 = i4;
double gensym_105;
double gensym_106;
double gensym_107;
int64_t gensym_41;
int64_t gensym_42;
int64_t gensym_43;
int64_t gensym_90;
int64_t gensym_91;
int64_t gensym_92;
int64_t gensym_93;
int64_t gensym_46;
double gensym_94;
double gensym_95;
double gensym_96;
int64_t gensym_97;
int64_t gensym_98;
int64_t gensym_48;
double gensym_99;
double gensym_100;
double gensym_101;
double e5;
double e6;
double e7;
double e8;
double e9;
double e10;
double e11;
double e12;
double e13;
double e14;
double e15;
double e16;
gensym_41 = (flatidx36 / (int64_t)((int64_t)(7500)));
gensym_42 = (flatidx36 % (int64_t)((int64_t)(7500)));
gensym_43 = (int64_t)((int64_t)(0));
gensym_90 = gensym_42;
gensym_91 = gensym_41;
gensym_92 = gensym_90;
gensym_93 = gensym_91;
gensym_46 = gensym_93;
gensym_94 = aLt2_160[gensym_46];
gensym_95 = aLt2_161[gensym_46];
gensym_96 = aLt2_162[gensym_46];
gensym_97 = gensym_90;
gensym_98 = gensym_91;
gensym_48 = gensym_97;
gensym_99 = aLt2_160[gensym_48];
gensym_100 = aLt2_161[gensym_48];
gensym_101 = aLt2_162[gensym_48];
e5 = gensym_100;
e6 = gensym_95;
e7 = gensym_101;
e8 = gensym_96;
e9 = gensym_94;
e10 = gensym_99;
if ((!(((e9 == e10) && ((e6 == e5) && (e8 == e7))))))
{
e11 = (e7 - e8);
e12 = (e10 - e9);
e13 = (e5 - e6);
e14 = (((e12 * e12) + (e13 * e13)) + (e11 * e11));
e15 = (sqrt(e14));
e16 = (((double)(1.0) * (double)(1.0)) / e14);
gensym_105 = ((e16 * e12) / e15);
gensym_106 = ((e16 * e13) / e15);
gensym_107 = ((e16 * e11) / e15);
}
else
{
gensym_105 = (double)(0.0);
gensym_106 = (double)(0.0);
gensym_107 = (double)(0.0);
}
// (2) do the reduction with the resulting values ([gensym_105,gensym_106,gensym_107])
double v1126 = gensym_105;
double v1127 = gensym_106;
double v1128 = gensym_107;
double gensym_129;
double gensym_130;
double gensym_131;
gensym_129 = (v0123 + v1126);
gensym_130 = (v0124 + v1127);
gensym_131 = (v0125 + v1128);
v0123 = gensym_129;
v0124 = gensym_130;
v0125 = gensym_131;
}
// Write the single reduction result to each output array:
tmp_0_1369[(i3 / inStride)] = v0123;
tmp_0_1370[(i3 / inStride)] = v0124;
tmp_0_1371[(i3 / inStride)] = v0125;
}
}
void kernelFun_evt146(int64_t flatidx49, double* tmp_063,
double* tmp_064, double* tmp_065, double* tmp_0_1266,
double* tmp_0_1267, double* tmp_0_1268, double* tmp_0_1369,
double* tmp_0_1370, double* tmp_0_1371);
void kernelFun_evt146(int64_t flatidx49, double* tmp_063,
double* tmp_064, double* tmp_065, double* tmp_0_1266,
double* tmp_0_1267, double* tmp_0_1268, double* tmp_0_1369,
double* tmp_0_1370, double* tmp_0_1371)
{
double gensym_132;
double gensym_133;
double gensym_134;
int64_t gensym_52;
int64_t gensym_53;
int64_t gensym_56;
gensym_52 = (int64_t)((int64_t)(0));
gensym_53 = flatidx49;
if ((gensym_53 < (int64_t)((int64_t)(7500))))
{
gensym_132 = tmp_0_1266[gensym_53];
gensym_133 = tmp_0_1267[gensym_53];
gensym_134 = tmp_0_1268[gensym_53];
}
else
{
gensym_56 = (gensym_53 - (int64_t)((int64_t)(7500)));
gensym_132 = tmp_0_1369[gensym_56];
gensym_133 = tmp_0_1370[gensym_56];
gensym_134 = tmp_0_1371[gensym_56];
}
tmp_063[flatidx49] = gensym_132;
tmp_064[flatidx49] = gensym_133;
tmp_065[flatidx49] = gensym_134;
}
void build_evt146(int64_t sizeArg, double* tmp_063, double* tmp_064
, double* tmp_065, double* tmp_0_1266, double* tmp_0_1267,
double* tmp_0_1268, double* tmp_0_1369, double* tmp_0_1370,
double* tmp_0_1371)
{
for (int i0 = 0; (i0 < sizeArg); i0 = (i0 + 1))
{
kernelFun_evt146(i0, tmp_063, tmp_064, tmp_065, tmp_0_1266,
tmp_0_1267, tmp_0_1268, tmp_0_1369, tmp_0_1370, tmp_0_1371);
}
}
struct ArgRecord
{ // These are all the Use arrays gathered from the Acc computation:
double* aLt2_057;
double* aLt2_058;
double* aLt2_059;
double* aLt2_160;
double* aLt2_161;
double* aLt2_162;
};
struct ArgRecord* CreateArgRecord()
{
return malloc(sizeof(struct ArgRecord));
}
void DestroyArgRecord(struct ArgRecord* arg0)
{
free(arg0);
}
void LoadArg_aLt2_057(struct ArgRecord* arg1, int arg2,
double* arg3)
{
// In the future we could do something with the size argument.
arg1->aLt2_057 = arg3;
}
void LoadArg_aLt2_058(struct ArgRecord* arg4, int arg5,
double* arg6)
{
// In the future we could do something with the size argument.
arg4->aLt2_058 = arg6;
}
void LoadArg_aLt2_059(struct ArgRecord* arg7, int arg8,
double* arg9)
{
// In the future we could do something with the size argument.
arg7->aLt2_059 = arg9;
}
void LoadArg_aLt2_160(struct ArgRecord* arg10, int arg11,
double* arg12)
{
// In the future we could do something with the size argument.
arg10->aLt2_160 = arg12;
}
void LoadArg_aLt2_161(struct ArgRecord* arg13, int arg14,
double* arg15)
{
// In the future we could do something with the size argument.
arg13->aLt2_161 = arg15;
}
void LoadArg_aLt2_162(struct ArgRecord* arg16, int arg17,
double* arg18)
{
// In the future we could do something with the size argument.
arg16->aLt2_162 = arg18;
}
struct ResultRecord
{ // These are all the progResults arrays output from the Acc computation:
double* tmp_063;
int tmp_063_size;
double* tmp_064;
int tmp_064_size;
double* tmp_065;
int tmp_065_size;
// These provide (original) shape information for all progResults:
int tmp_0_shape;
};
struct ResultRecord* CreateResultRecord()
{
return malloc(sizeof(struct ResultRecord));
}
void DestroyResultRecord(struct ResultRecord* arg19)
{
// In the CURRENT protocol, we free all results SIMULTANEOUSLY, here:
free(arg19);
}
double* GetResult_tmp_063(struct ResultRecord* arg20)
{
return arg20->tmp_063;
}
int GetResultSize_tmp_063(struct ResultRecord* arg21)
{
return arg21->tmp_063_size;
}
double* GetResult_tmp_064(struct ResultRecord* arg22)
{
return arg22->tmp_064;
}
int GetResultSize_tmp_064(struct ResultRecord* arg23)
{
return arg23->tmp_064_size;
}
double* GetResult_tmp_065(struct ResultRecord* arg24)
{
return arg24->tmp_065;
}
int GetResultSize_tmp_065(struct ResultRecord* arg25)
{
return arg25->tmp_065_size;
}
// Here we provide getters for the (scalar) shape results of the program:
int GetResult_tmp_0_shape(struct ResultRecord* arg26)
{
return arg26->tmp_0_shape;
}
void MainProg(struct ArgRecord* argsRec,
struct ResultRecord* resultsRec)
{
// First we EXECUTE the program by executing each array op in order:
// 'Use'd arrays are already available in the arguments record:
double* aLt2_057 = argsRec->aLt2_057;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_058 = argsRec->aLt2_058;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_059 = argsRec->aLt2_059;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_160 = argsRec->aLt2_160;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_161 = argsRec->aLt2_161;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_162 = argsRec->aLt2_162;
double gensym_108;
double gensym_109;
double gensym_110;
gensym_108 = (double)(0.0);
gensym_109 = (double)(0.0);
gensym_110 = (double)(0.0);
// Allocate all ouput space for the reduction operation:
double* tmp_0_1266 = malloc(((sizeof(double)) * (56250000 / (int64_t)((int64_t)(7500)))));
double* tmp_0_1267 = malloc(((sizeof(double)) * (56250000 / (int64_t)((int64_t)(7500)))));
double* tmp_0_1268 = malloc(((sizeof(double)) * (56250000 / (int64_t)((int64_t)(7500)))));
build_evt144(56250000, (int64_t)((int64_t)(7500)), tmp_0_1266,
tmp_0_1267, tmp_0_1268, gensym_108, gensym_109, gensym_110,
aLt2_057, aLt2_058, aLt2_059);
double gensym_120;
double gensym_121;
double gensym_122;
gensym_120 = (double)(0.0);
gensym_121 = (double)(0.0);
gensym_122 = (double)(0.0);
// Allocate all ouput space for the reduction operation:
double* tmp_0_1369 = malloc(((sizeof(double)) * (56250000 / (int64_t)((int64_t)(7500)))));
double* tmp_0_1370 = malloc(((sizeof(double)) * (56250000 / (int64_t)((int64_t)(7500)))));
double* tmp_0_1371 = malloc(((sizeof(double)) * (56250000 / (int64_t)((int64_t)(7500)))));
build_evt145(56250000, (int64_t)((int64_t)(7500)), tmp_0_1369,
tmp_0_1370, tmp_0_1371, gensym_120, gensym_121, gensym_122,
aLt2_160, aLt2_161, aLt2_162);
double* tmp_063 = malloc(((sizeof(double)) * (int64_t)((int64_t)(15000))));
double* tmp_064 = malloc(((sizeof(double)) * (int64_t)((int64_t)(15000))));
double* tmp_065 = malloc(((sizeof(double)) * (int64_t)((int64_t)(15000))));
build_evt146(15000, tmp_063, tmp_064, tmp_065, tmp_0_1266,
tmp_0_1267, tmp_0_1268, tmp_0_1369, tmp_0_1370, tmp_0_1371);
int64_t tmp_0_shape;
{ int64_t gensym_135;
gensym_135 = (int64_t)((int64_t)(15000));
tmp_0_shape = gensym_135;
}
// We write the final output to the results record:
resultsRec->tmp_063 = tmp_063;
resultsRec->tmp_063_size = 15000;
resultsRec->tmp_0_shape = tmp_0_shape;
resultsRec->tmp_064 = tmp_064;
resultsRec->tmp_064_size = 15000;
resultsRec->tmp_0_shape = tmp_0_shape;
resultsRec->tmp_065 = tmp_065;
resultsRec->tmp_065_size = 15000;
resultsRec->tmp_0_shape = tmp_0_shape;
// Finally, we free all arrays that are NOT either input or outputs:
free(tmp_0_1266);
free(tmp_0_1267);
free(tmp_0_1268);
free(tmp_0_1369);
free(tmp_0_1370);
free(tmp_0_1371);
}
#include "stdlib.h"
#include "stdio.h"
#include "stdint.h"
#include "stdbool.h"
#include "math.h"
#define max(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a > _b ? _a : _b; })
#define min(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a < _b ? _a : _b; })
void build_evt272(int64_t inSize, int64_t inStride,
double* tmp_0_20120, double* tmp_0_20121, double* tmp_0_20122,
double v0207, double v0208, double v0209, double* aLt2_0105,
double* aLt2_0106, double* aLt2_0107)
{
// Fold loop, reduction variable(s): [(v0207,Default,TDouble),(v0208,Default,TDouble),(v0209,Default,TDouble)]
// First, some temporaries to back up the inital state
// (we're going to stomp on the reduction vars / formal params):
double eetmp0 = v0207;
double eetmp1 = v0208;
double eetmp2 = v0209;
for (int i3 = 0; (i3 < inSize); i3 = (i3 + inStride))
{
// Fresh round, new accumulator with the identity:
// We shadow the formal params as a hack:
double v0207 = eetmp0;
double v0208 = eetmp1;
double v0209 = eetmp2;
for (int i4 = i3; (i4 < (i3 + inStride)); i4 = (i4 + 1))
{
// (1) create input: we run the generator to produce one or more inputs
int64_t flatidx41 = i4;
double gensym_192;
double gensym_193;
double gensym_194;
int64_t gensym_46;
int64_t gensym_47;
int64_t gensym_48;
int64_t gensym_144;
int64_t gensym_145;
int64_t gensym_146;
int64_t gensym_147;
int64_t gensym_51;
double gensym_148;
double gensym_149;
double gensym_150;
int64_t gensym_151;
int64_t gensym_152;
int64_t gensym_53;
double gensym_153;
double gensym_154;
double gensym_155;
double e5;
double e6;
double e7;
double e8;
double e9;
double e10;
double e11;
double e12;
double e13;
double e14;
double e15;
double e16;
gensym_46 = (flatidx41 / (int64_t)((int64_t)(3750)));
gensym_47 = (flatidx41 % (int64_t)((int64_t)(3750)));
gensym_48 = (int64_t)((int64_t)(0));
gensym_144 = gensym_47;
gensym_145 = gensym_46;
gensym_146 = gensym_144;
gensym_147 = gensym_145;
gensym_51 = gensym_147;
gensym_148 = aLt2_0105[gensym_51];
gensym_149 = aLt2_0106[gensym_51];
gensym_150 = aLt2_0107[gensym_51];
gensym_151 = gensym_144;
gensym_152 = gensym_145;
gensym_53 = gensym_151;
gensym_153 = aLt2_0105[gensym_53];
gensym_154 = aLt2_0106[gensym_53];
gensym_155 = aLt2_0107[gensym_53];
e5 = gensym_154;
e6 = gensym_149;
e7 = gensym_155;
e8 = gensym_150;
e9 = gensym_148;
e10 = gensym_153;
if ((!(((e9 == e10) && ((e6 == e5) && (e8 == e7))))))
{
e11 = (e7 - e8);
e12 = (e10 - e9);
e13 = (e5 - e6);
e14 = (((e12 * e12) + (e13 * e13)) + (e11 * e11));
e15 = (sqrt(e14));
e16 = (((double)(1.0) * (double)(1.0)) / e14);
gensym_192 = ((e16 * e12) / e15);
gensym_193 = ((e16 * e13) / e15);
gensym_194 = ((e16 * e11) / e15);
}
else
{
gensym_192 = (double)(0.0);
gensym_193 = (double)(0.0);
gensym_194 = (double)(0.0);
}
// (2) do the reduction with the resulting values ([gensym_192,gensym_193,gensym_194])
double v1210 = gensym_192;
double v1211 = gensym_193;
double v1212 = gensym_194;
double gensym_213;
double gensym_214;
double gensym_215;
gensym_213 = (v0207 + v1210);
gensym_214 = (v0208 + v1211);
gensym_215 = (v0209 + v1212);
v0207 = gensym_213;
v0208 = gensym_214;
v0209 = gensym_215;
}
// Write the single reduction result to each output array:
tmp_0_20120[(i3 / inStride)] = v0207;
tmp_0_20121[(i3 / inStride)] = v0208;
tmp_0_20122[(i3 / inStride)] = v0209;
}
}
void build_evt273(int64_t inSize, int64_t inStride,
double* tmp_0_21123, double* tmp_0_21124, double* tmp_0_21125,
double v0219, double v0220, double v0221, double* aLt2_1108,
double* aLt2_1109, double* aLt2_1110)
{
// Fold loop, reduction variable(s): [(v0219,Default,TDouble),(v0220,Default,TDouble),(v0221,Default,TDouble)]
// First, some temporaries to back up the inital state
// (we're going to stomp on the reduction vars / formal params):
double eetmp0 = v0219;
double eetmp1 = v0220;
double eetmp2 = v0221;
for (int i3 = 0; (i3 < inSize); i3 = (i3 + inStride))
{
// Fresh round, new accumulator with the identity:
// We shadow the formal params as a hack:
double v0219 = eetmp0;
double v0220 = eetmp1;
double v0221 = eetmp2;
for (int i4 = i3; (i4 < (i3 + inStride)); i4 = (i4 + 1))
{
// (1) create input: we run the generator to produce one or more inputs
int64_t flatidx54 = i4;
double gensym_195;
double gensym_196;
double gensym_197;
int64_t gensym_59;
int64_t gensym_60;
int64_t gensym_61;
int64_t gensym_156;
int64_t gensym_157;
int64_t gensym_158;
int64_t gensym_159;
int64_t gensym_64;
double gensym_160;
double gensym_161;
double gensym_162;
int64_t gensym_163;
int64_t gensym_164;
int64_t gensym_66;
double gensym_165;
double gensym_166;
double gensym_167;
double e5;
double e6;
double e7;
double e8;
double e9;
double e10;
double e11;
double e12;
double e13;
double e14;
double e15;
double e16;
gensym_59 = (flatidx54 / (int64_t)((int64_t)(3750)));
gensym_60 = (flatidx54 % (int64_t)((int64_t)(3750)));
gensym_61 = (int64_t)((int64_t)(0));
gensym_156 = gensym_60;
gensym_157 = gensym_59;
gensym_158 = gensym_156;
gensym_159 = gensym_157;
gensym_64 = gensym_159;
gensym_160 = aLt2_1108[gensym_64];
gensym_161 = aLt2_1109[gensym_64];
gensym_162 = aLt2_1110[gensym_64];
gensym_163 = gensym_156;
gensym_164 = gensym_157;
gensym_66 = gensym_163;
gensym_165 = aLt2_1108[gensym_66];
gensym_166 = aLt2_1109[gensym_66];
gensym_167 = aLt2_1110[gensym_66];
e5 = gensym_166;
e6 = gensym_161;
e7 = gensym_167;
e8 = gensym_162;
e9 = gensym_160;
e10 = gensym_165;
if ((!(((e9 == e10) && ((e6 == e5) && (e8 == e7))))))
{
e11 = (e7 - e8);
e12 = (e10 - e9);
e13 = (e5 - e6);
e14 = (((e12 * e12) + (e13 * e13)) + (e11 * e11));
e15 = (sqrt(e14));
e16 = (((double)(1.0) * (double)(1.0)) / e14);
gensym_195 = ((e16 * e12) / e15);
gensym_196 = ((e16 * e13) / e15);
gensym_197 = ((e16 * e11) / e15);
}
else
{
gensym_195 = (double)(0.0);
gensym_196 = (double)(0.0);
gensym_197 = (double)(0.0);
}
// (2) do the reduction with the resulting values ([gensym_195,gensym_196,gensym_197])
double v1222 = gensym_195;
double v1223 = gensym_196;
double v1224 = gensym_197;
double gensym_225;
double gensym_226;
double gensym_227;
gensym_225 = (v0219 + v1222);
gensym_226 = (v0220 + v1223);
gensym_227 = (v0221 + v1224);
v0219 = gensym_225;
v0220 = gensym_226;
v0221 = gensym_227;
}
// Write the single reduction result to each output array:
tmp_0_21123[(i3 / inStride)] = v0219;
tmp_0_21124[(i3 / inStride)] = v0220;
tmp_0_21125[(i3 / inStride)] = v0221;
}
}
void build_evt274(int64_t inSize, int64_t inStride,
double* tmp_0_22126, double* tmp_0_22127, double* tmp_0_22128,
double v0231, double v0232, double v0233, double* aLt2_2111,
double* aLt2_2112, double* aLt2_2113)
{
// Fold loop, reduction variable(s): [(v0231,Default,TDouble),(v0232,Default,TDouble),(v0233,Default,TDouble)]
// First, some temporaries to back up the inital state
// (we're going to stomp on the reduction vars / formal params):
double eetmp0 = v0231;
double eetmp1 = v0232;
double eetmp2 = v0233;
for (int i3 = 0; (i3 < inSize); i3 = (i3 + inStride))
{
// Fresh round, new accumulator with the identity:
// We shadow the formal params as a hack:
double v0231 = eetmp0;
double v0232 = eetmp1;
double v0233 = eetmp2;
for (int i4 = i3; (i4 < (i3 + inStride)); i4 = (i4 + 1))
{
// (1) create input: we run the generator to produce one or more inputs
int64_t flatidx67 = i4;
double gensym_198;
double gensym_199;
double gensym_200;
int64_t gensym_72;
int64_t gensym_73;
int64_t gensym_74;
int64_t gensym_168;
int64_t gensym_169;
int64_t gensym_170;
int64_t gensym_171;
int64_t gensym_77;
double gensym_172;
double gensym_173;
double gensym_174;
int64_t gensym_175;
int64_t gensym_176;
int64_t gensym_79;
double gensym_177;
double gensym_178;
double gensym_179;
double e5;
double e6;
double e7;
double e8;
double e9;
double e10;
double e11;
double e12;
double e13;
double e14;
double e15;
double e16;
gensym_72 = (flatidx67 / (int64_t)((int64_t)(3750)));
gensym_73 = (flatidx67 % (int64_t)((int64_t)(3750)));
gensym_74 = (int64_t)((int64_t)(0));
gensym_168 = gensym_73;
gensym_169 = gensym_72;
gensym_170 = gensym_168;
gensym_171 = gensym_169;
gensym_77 = gensym_171;
gensym_172 = aLt2_2111[gensym_77];
gensym_173 = aLt2_2112[gensym_77];
gensym_174 = aLt2_2113[gensym_77];
gensym_175 = gensym_168;
gensym_176 = gensym_169;
gensym_79 = gensym_175;
gensym_177 = aLt2_2111[gensym_79];
gensym_178 = aLt2_2112[gensym_79];
gensym_179 = aLt2_2113[gensym_79];
e5 = gensym_178;
e6 = gensym_173;
e7 = gensym_179;
e8 = gensym_174;
e9 = gensym_172;
e10 = gensym_177;
if ((!(((e9 == e10) && ((e6 == e5) && (e8 == e7))))))
{
e11 = (e7 - e8);
e12 = (e10 - e9);
e13 = (e5 - e6);
e14 = (((e12 * e12) + (e13 * e13)) + (e11 * e11));
e15 = (sqrt(e14));
e16 = (((double)(1.0) * (double)(1.0)) / e14);
gensym_198 = ((e16 * e12) / e15);
gensym_199 = ((e16 * e13) / e15);
gensym_200 = ((e16 * e11) / e15);
}
else
{
gensym_198 = (double)(0.0);
gensym_199 = (double)(0.0);
gensym_200 = (double)(0.0);
}
// (2) do the reduction with the resulting values ([gensym_198,gensym_199,gensym_200])
double v1234 = gensym_198;
double v1235 = gensym_199;
double v1236 = gensym_200;
double gensym_237;
double gensym_238;
double gensym_239;
gensym_237 = (v0231 + v1234);
gensym_238 = (v0232 + v1235);
gensym_239 = (v0233 + v1236);
v0231 = gensym_237;
v0232 = gensym_238;
v0233 = gensym_239;
}
// Write the single reduction result to each output array:
tmp_0_22126[(i3 / inStride)] = v0231;
tmp_0_22127[(i3 / inStride)] = v0232;
tmp_0_22128[(i3 / inStride)] = v0233;
}
}
void build_evt275(int64_t inSize, int64_t inStride,
double* tmp_0_23129, double* tmp_0_23130, double* tmp_0_23131,
double v0243, double v0244, double v0245, double* aLt2_3114,
double* aLt2_3115, double* aLt2_3116)
{
// Fold loop, reduction variable(s): [(v0243,Default,TDouble),(v0244,Default,TDouble),(v0245,Default,TDouble)]
// First, some temporaries to back up the inital state
// (we're going to stomp on the reduction vars / formal params):
double eetmp0 = v0243;
double eetmp1 = v0244;
double eetmp2 = v0245;
for (int i3 = 0; (i3 < inSize); i3 = (i3 + inStride))
{
// Fresh round, new accumulator with the identity:
// We shadow the formal params as a hack:
double v0243 = eetmp0;
double v0244 = eetmp1;
double v0245 = eetmp2;
for (int i4 = i3; (i4 < (i3 + inStride)); i4 = (i4 + 1))
{
// (1) create input: we run the generator to produce one or more inputs
int64_t flatidx80 = i4;
double gensym_201;
double gensym_202;
double gensym_203;
int64_t gensym_85;
int64_t gensym_86;
int64_t gensym_87;
int64_t gensym_180;
int64_t gensym_181;
int64_t gensym_182;
int64_t gensym_183;
int64_t gensym_90;
double gensym_184;
double gensym_185;
double gensym_186;
int64_t gensym_187;
int64_t gensym_188;
int64_t gensym_92;
double gensym_189;
double gensym_190;
double gensym_191;
double e5;
double e6;
double e7;
double e8;
double e9;
double e10;
double e11;
double e12;
double e13;
double e14;
double e15;
double e16;
gensym_85 = (flatidx80 / (int64_t)((int64_t)(3750)));
gensym_86 = (flatidx80 % (int64_t)((int64_t)(3750)));
gensym_87 = (int64_t)((int64_t)(0));
gensym_180 = gensym_86;
gensym_181 = gensym_85;
gensym_182 = gensym_180;
gensym_183 = gensym_181;
gensym_90 = gensym_183;
gensym_184 = aLt2_3114[gensym_90];
gensym_185 = aLt2_3115[gensym_90];
gensym_186 = aLt2_3116[gensym_90];
gensym_187 = gensym_180;
gensym_188 = gensym_181;
gensym_92 = gensym_187;
gensym_189 = aLt2_3114[gensym_92];
gensym_190 = aLt2_3115[gensym_92];
gensym_191 = aLt2_3116[gensym_92];
e5 = gensym_190;
e6 = gensym_185;
e7 = gensym_191;
e8 = gensym_186;
e9 = gensym_184;
e10 = gensym_189;
if ((!(((e9 == e10) && ((e6 == e5) && (e8 == e7))))))
{
e11 = (e7 - e8);
e12 = (e10 - e9);
e13 = (e5 - e6);
e14 = (((e12 * e12) + (e13 * e13)) + (e11 * e11));
e15 = (sqrt(e14));
e16 = (((double)(1.0) * (double)(1.0)) / e14);
gensym_201 = ((e16 * e12) / e15);
gensym_202 = ((e16 * e13) / e15);
gensym_203 = ((e16 * e11) / e15);
}
else
{
gensym_201 = (double)(0.0);
gensym_202 = (double)(0.0);
gensym_203 = (double)(0.0);
}
// (2) do the reduction with the resulting values ([gensym_201,gensym_202,gensym_203])
double v1246 = gensym_201;
double v1247 = gensym_202;
double v1248 = gensym_203;
double gensym_249;
double gensym_250;
double gensym_251;
gensym_249 = (v0243 + v1246);
gensym_250 = (v0244 + v1247);
gensym_251 = (v0245 + v1248);
v0243 = gensym_249;
v0244 = gensym_250;
v0245 = gensym_251;
}
// Write the single reduction result to each output array:
tmp_0_23129[(i3 / inStride)] = v0243;
tmp_0_23130[(i3 / inStride)] = v0244;
tmp_0_23131[(i3 / inStride)] = v0245;
}
}
void kernelFun_evt276(int64_t flatidx93, double* tmp_0117,
double* tmp_0118, double* tmp_0119, double* tmp_0_20120,
double* tmp_0_20121, double* tmp_0_20122, double* tmp_0_21123,
double* tmp_0_21124, double* tmp_0_21125, double* tmp_0_22126,
double* tmp_0_22127, double* tmp_0_22128, double* tmp_0_23129,
double* tmp_0_23130, double* tmp_0_23131);
void kernelFun_evt276(int64_t flatidx93, double* tmp_0117,
double* tmp_0118, double* tmp_0119, double* tmp_0_20120,
double* tmp_0_20121, double* tmp_0_20122, double* tmp_0_21123,
double* tmp_0_21124, double* tmp_0_21125, double* tmp_0_22126,
double* tmp_0_22127, double* tmp_0_22128, double* tmp_0_23129,
double* tmp_0_23130, double* tmp_0_23131)
{
double gensym_252;
double gensym_253;
double gensym_254;
int64_t gensym_96;
int64_t gensym_97;
int64_t gensym_100;
int64_t gensym_102;
int64_t gensym_104;
gensym_96 = (int64_t)((int64_t)(0));
gensym_97 = flatidx93;
if ((gensym_97 < (int64_t)((int64_t)(3750))))
{
gensym_252 = tmp_0_20120[gensym_97];
gensym_253 = tmp_0_20121[gensym_97];
gensym_254 = tmp_0_20122[gensym_97];
}
else
{
if ((gensym_97 < (int64_t)((int64_t)(7500))))
{
gensym_100 = (gensym_97 - (int64_t)((int64_t)(3750)));
gensym_252 = tmp_0_21123[gensym_100];
gensym_253 = tmp_0_21124[gensym_100];
gensym_254 = tmp_0_21125[gensym_100];
}
else
{
if ((gensym_97 < (int64_t)((int64_t)(11250))))
{
gensym_102 = (gensym_97 - (int64_t)((int64_t)(7500)));
gensym_252 = tmp_0_22126[gensym_102];
gensym_253 = tmp_0_22127[gensym_102];
gensym_254 = tmp_0_22128[gensym_102];
}
else
{
gensym_104 = (gensym_97 - (int64_t)((int64_t)(11250)));
gensym_252 = tmp_0_23129[gensym_104];
gensym_253 = tmp_0_23130[gensym_104];
gensym_254 = tmp_0_23131[gensym_104];
}
}
}
tmp_0117[flatidx93] = gensym_252;
tmp_0118[flatidx93] = gensym_253;
tmp_0119[flatidx93] = gensym_254;
}
void build_evt276(int64_t sizeArg, double* tmp_0117,
double* tmp_0118, double* tmp_0119, double* tmp_0_20120,
double* tmp_0_20121, double* tmp_0_20122, double* tmp_0_21123,
double* tmp_0_21124, double* tmp_0_21125, double* tmp_0_22126,
double* tmp_0_22127, double* tmp_0_22128, double* tmp_0_23129,
double* tmp_0_23130, double* tmp_0_23131)
{
for (int i0 = 0; (i0 < sizeArg); i0 = (i0 + 1))
{
kernelFun_evt276(i0, tmp_0117, tmp_0118, tmp_0119, tmp_0_20120,
tmp_0_20121, tmp_0_20122, tmp_0_21123, tmp_0_21124, tmp_0_21125,
tmp_0_22126, tmp_0_22127, tmp_0_22128, tmp_0_23129, tmp_0_23130,
tmp_0_23131);
}
}
struct ArgRecord
{ // These are all the Use arrays gathered from the Acc computation:
double* aLt2_0105;
double* aLt2_0106;
double* aLt2_0107;
double* aLt2_1108;
double* aLt2_1109;
double* aLt2_1110;
double* aLt2_2111;
double* aLt2_2112;
double* aLt2_2113;
double* aLt2_3114;
double* aLt2_3115;
double* aLt2_3116;
};
struct ArgRecord* CreateArgRecord()
{
return malloc(sizeof(struct ArgRecord));
}
void DestroyArgRecord(struct ArgRecord* arg0)
{
free(arg0);
}
void LoadArg_aLt2_0105(struct ArgRecord* arg1, int arg2,
double* arg3)
{
// In the future we could do something with the size argument.
arg1->aLt2_0105 = arg3;
}
void LoadArg_aLt2_0106(struct ArgRecord* arg4, int arg5,
double* arg6)
{
// In the future we could do something with the size argument.
arg4->aLt2_0106 = arg6;
}
void LoadArg_aLt2_0107(struct ArgRecord* arg7, int arg8,
double* arg9)
{
// In the future we could do something with the size argument.
arg7->aLt2_0107 = arg9;
}
void LoadArg_aLt2_1108(struct ArgRecord* arg10, int arg11,
double* arg12)
{
// In the future we could do something with the size argument.
arg10->aLt2_1108 = arg12;
}
void LoadArg_aLt2_1109(struct ArgRecord* arg13, int arg14,
double* arg15)
{
// In the future we could do something with the size argument.
arg13->aLt2_1109 = arg15;
}
void LoadArg_aLt2_1110(struct ArgRecord* arg16, int arg17,
double* arg18)
{
// In the future we could do something with the size argument.
arg16->aLt2_1110 = arg18;
}
void LoadArg_aLt2_2111(struct ArgRecord* arg19, int arg20,
double* arg21)
{
// In the future we could do something with the size argument.
arg19->aLt2_2111 = arg21;
}
void LoadArg_aLt2_2112(struct ArgRecord* arg22, int arg23,
double* arg24)
{
// In the future we could do something with the size argument.
arg22->aLt2_2112 = arg24;
}
void LoadArg_aLt2_2113(struct ArgRecord* arg25, int arg26,
double* arg27)
{
// In the future we could do something with the size argument.
arg25->aLt2_2113 = arg27;
}
void LoadArg_aLt2_3114(struct ArgRecord* arg28, int arg29,
double* arg30)
{
// In the future we could do something with the size argument.
arg28->aLt2_3114 = arg30;
}
void LoadArg_aLt2_3115(struct ArgRecord* arg31, int arg32,
double* arg33)
{
// In the future we could do something with the size argument.
arg31->aLt2_3115 = arg33;
}
void LoadArg_aLt2_3116(struct ArgRecord* arg34, int arg35,
double* arg36)
{
// In the future we could do something with the size argument.
arg34->aLt2_3116 = arg36;
}
struct ResultRecord
{ // These are all the progResults arrays output from the Acc computation:
double* tmp_0117;
int tmp_0117_size;
double* tmp_0118;
int tmp_0118_size;
double* tmp_0119;
int tmp_0119_size;
// These provide (original) shape information for all progResults:
int tmp_0_shape;
};
struct ResultRecord* CreateResultRecord()
{
return malloc(sizeof(struct ResultRecord));
}
void DestroyResultRecord(struct ResultRecord* arg37)
{
// In the CURRENT protocol, we free all results SIMULTANEOUSLY, here:
free(arg37);
}
double* GetResult_tmp_0117(struct ResultRecord* arg38)
{
return arg38->tmp_0117;
}
int GetResultSize_tmp_0117(struct ResultRecord* arg39)
{
return arg39->tmp_0117_size;
}
double* GetResult_tmp_0118(struct ResultRecord* arg40)
{
return arg40->tmp_0118;
}
int GetResultSize_tmp_0118(struct ResultRecord* arg41)
{
return arg41->tmp_0118_size;
}
double* GetResult_tmp_0119(struct ResultRecord* arg42)
{
return arg42->tmp_0119;
}
int GetResultSize_tmp_0119(struct ResultRecord* arg43)
{
return arg43->tmp_0119_size;
}
// Here we provide getters for the (scalar) shape results of the program:
int GetResult_tmp_0_shape(struct ResultRecord* arg44)
{
return arg44->tmp_0_shape;
}
void MainProg(struct ArgRecord* argsRec,
struct ResultRecord* resultsRec)
{
// First we EXECUTE the program by executing each array op in order:
// 'Use'd arrays are already available in the arguments record:
double* aLt2_0105 = argsRec->aLt2_0105;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_0106 = argsRec->aLt2_0106;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_0107 = argsRec->aLt2_0107;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_1108 = argsRec->aLt2_1108;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_1109 = argsRec->aLt2_1109;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_1110 = argsRec->aLt2_1110;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_2111 = argsRec->aLt2_2111;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_2112 = argsRec->aLt2_2112;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_2113 = argsRec->aLt2_2113;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_3114 = argsRec->aLt2_3114;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_3115 = argsRec->aLt2_3115;
// 'Use'd arrays are already available in the arguments record:
double* aLt2_3116 = argsRec->aLt2_3116;
double gensym_204;
double gensym_205;
double gensym_206;
gensym_204 = (double)(0.0);
gensym_205 = (double)(0.0);
gensym_206 = (double)(0.0);
// Allocate all ouput space for the reduction operation:
double* tmp_0_20120 = malloc(((sizeof(double)) * (14062500 / (int64_t)((int64_t)(3750)))));
double* tmp_0_20121 = malloc(((sizeof(double)) * (14062500 / (int64_t)((int64_t)(3750)))));
double* tmp_0_20122 = malloc(((sizeof(double)) * (14062500 / (int64_t)((int64_t)(3750)))));
build_evt272(14062500, (int64_t)((int64_t)(3750)), tmp_0_20120,
tmp_0_20121, tmp_0_20122, gensym_204, gensym_205, gensym_206,
aLt2_0105, aLt2_0106, aLt2_0107);
double gensym_216;
double gensym_217;
double gensym_218;
gensym_216 = (double)(0.0);
gensym_217 = (double)(0.0);
gensym_218 = (double)(0.0);
// Allocate all ouput space for the reduction operation:
double* tmp_0_21123 = malloc(((sizeof(double)) * (14062500 / (int64_t)((int64_t)(3750)))));
double* tmp_0_21124 = malloc(((sizeof(double)) * (14062500 / (int64_t)((int64_t)(3750)))));
double* tmp_0_21125 = malloc(((sizeof(double)) * (14062500 / (int64_t)((int64_t)(3750)))));
build_evt273(14062500, (int64_t)((int64_t)(3750)), tmp_0_21123,
tmp_0_21124, tmp_0_21125, gensym_216, gensym_217, gensym_218,
aLt2_1108, aLt2_1109, aLt2_1110);
double gensym_228;
double gensym_229;
double gensym_230;
gensym_228 = (double)(0.0);
gensym_229 = (double)(0.0);
gensym_230 = (double)(0.0);
// Allocate all ouput space for the reduction operation:
double* tmp_0_22126 = malloc(((sizeof(double)) * (14062500 / (int64_t)((int64_t)(3750)))));
double* tmp_0_22127 = malloc(((sizeof(double)) * (14062500 / (int64_t)((int64_t)(3750)))));
double* tmp_0_22128 = malloc(((sizeof(double)) * (14062500 / (int64_t)((int64_t)(3750)))));
build_evt274(14062500, (int64_t)((int64_t)(3750)), tmp_0_22126,
tmp_0_22127, tmp_0_22128, gensym_228, gensym_229, gensym_230,
aLt2_2111, aLt2_2112, aLt2_2113);
double gensym_240;
double gensym_241;
double gensym_242;
gensym_240 = (double)(0.0);
gensym_241 = (double)(0.0);
gensym_242 = (double)(0.0);
// Allocate all ouput space for the reduction operation:
double* tmp_0_23129 = malloc(((sizeof(double)) * (14062500 / (int64_t)((int64_t)(3750)))));
double* tmp_0_23130 = malloc(((sizeof(double)) * (14062500 / (int64_t)((int64_t)(3750)))));
double* tmp_0_23131 = malloc(((sizeof(double)) * (14062500 / (int64_t)((int64_t)(3750)))));
build_evt275(14062500, (int64_t)((int64_t)(3750)), tmp_0_23129,
tmp_0_23130, tmp_0_23131, gensym_240, gensym_241, gensym_242,
aLt2_3114, aLt2_3115, aLt2_3116);
double* tmp_0117 = malloc(((sizeof(double)) * (int64_t)((int64_t)(15000))));
double* tmp_0118 = malloc(((sizeof(double)) * (int64_t)((int64_t)(15000))));
double* tmp_0119 = malloc(((sizeof(double)) * (int64_t)((int64_t)(15000))));
build_evt276(15000, tmp_0117, tmp_0118, tmp_0119, tmp_0_20120,
tmp_0_20121, tmp_0_20122, tmp_0_21123, tmp_0_21124, tmp_0_21125,
tmp_0_22126, tmp_0_22127, tmp_0_22128, tmp_0_23129, tmp_0_23130,
tmp_0_23131);
int64_t tmp_0_shape;
{ int64_t gensym_255;
gensym_255 = (int64_t)((int64_t)(15000));
tmp_0_shape = gensym_255;
}
// We write the final output to the results record:
resultsRec->tmp_0117 = tmp_0117;
resultsRec->tmp_0117_size = 15000;
resultsRec->tmp_0_shape = tmp_0_shape;
resultsRec->tmp_0118 = tmp_0118;
resultsRec->tmp_0118_size = 15000;
resultsRec->tmp_0_shape = tmp_0_shape;
resultsRec->tmp_0119 = tmp_0119;
resultsRec->tmp_0119_size = 15000;
resultsRec->tmp_0_shape = tmp_0_shape;
// Finally, we free all arrays that are NOT either input or outputs:
free(tmp_0_20120);
free(tmp_0_20121);
free(tmp_0_20122);
free(tmp_0_21123);
free(tmp_0_21124);
free(tmp_0_21125);
free(tmp_0_22126);
free(tmp_0_22127);
free(tmp_0_22128);
free(tmp_0_23129);
free(tmp_0_23130);
free(tmp_0_23131);
}
GPUProg {progBinds = [GPUProgBind {evtid = evt136,
evtdeps = [],
outarrs = [(aLt2_057, Global, TArray 1 TDouble)],
decor = FreeVars [],
op = Use AccArray [5] [0.204377359711,-0.124071846716,9.23475595191e-2,0.855260848999,-0.355440662242]},
GPUProgBind {evtid = evt137,
evtdeps = [],
outarrs = [(aLt2_058, Global, TArray 1 TDouble)],
decor = FreeVars [],
op = Use AccArray [5] [0.58752346877,-0.430925352499,0.471875966527,0.17408952117,0.29258855246]},
GPUProgBind {evtid = evt138,
evtdeps = [],
outarrs = [(aLt2_059, Global, TArray 1 TDouble)],
decor = FreeVars [],
op = Use AccArray [5] [0.466465813108,0.818155869842,-0.390485706739,-5.50553835928e-2,5.77864721417e-2]},
GPUProgBind {evtid = evt139,
evtdeps = [],
outarrs = [(aLt2_160, Global, TArray 1 TDouble)],
decor = FreeVars [],
op = Use AccArray [5] [-0.699805021286,-0.225843770429,4.79635689408e-2,-0.108046301641,0.881816339679]},
GPUProgBind {evtid = evt140,
evtdeps = [],
outarrs = [(aLt2_161, Global, TArray 1 TDouble)],
decor = FreeVars [],
op = Use AccArray [5] [-0.290639472194,-0.214849968441,0.789017047733,-0.568562230095,-0.122657088563]},
GPUProgBind {evtid = evt141,
evtdeps = [],
outarrs = [(aLt2_162, Global, TArray 1 TDouble)],
decor = FreeVars [],
op = Use AccArray [5] [-0.194938545115,-0.948073166423,0.361016079783,-0.264976296574,-0.182035161182]},
GPUProgBind {evtid = evt144,
evtdeps = [],
outarrs = [(tmp_0_1266, Global, TArray 1 TDouble),
(tmp_0_1267, Global, TArray 1 TDouble),
(tmp_0_1268, Global, TArray 1 TDouble)],
decor = FreeVars [aLt2_057,aLt2_058,aLt2_059],
op = GenReduce {reducer = Lam [(v0111, Default, TDouble),
(v0112, Default, TDouble),
(v0113, Default, TDouble),
(v1114, Default, TDouble),
(v1115, Default, TDouble),
(v1116, Default, TDouble)]
(ScalarBlock [(gensym_117,
Default,
TDouble),
(gensym_118,
Default,
TDouble),
(gensym_119,
Default,
TDouble)]
[gensym_117,
gensym_118,
gensym_119]
[SSet gensym_117
(EPrimApp TDouble
(NP Add)
[EVr v0111,
EVr v1114]),
SSet gensym_118
(EPrimApp TDouble
(NP Add)
[EVr v0112,
EVr v1115]),
SSet gensym_119
(EPrimApp TDouble
(NP Add)
[EVr v0113,
EVr v1116])]),
generator = NonManifest (Gen (TrivConst 25)
(Lam [(flatidx23,
Default,
TInt)]
(ScalarBlock [(gensym_102,
Default,
TDouble),
(gensym_103,
Default,
TDouble),
(gensym_104,
Default,
TDouble),
(gensym_28,
Default,
TInt),
(gensym_29,
Default,
TInt),
(gensym_30,
Default,
TInt),
(gensym_78,
Default,
TInt),
(gensym_79,
Default,
TInt),
(gensym_80,
Default,
TInt),
(gensym_81,
Default,
TInt),
(gensym_33,
Default,
TInt),
(gensym_82,
Default,
TDouble),
(gensym_83,
Default,
TDouble),
(gensym_84,
Default,
TDouble),
(gensym_85,
Default,
TInt),
(gensym_86,
Default,
TInt),
(gensym_35,
Default,
TInt),
(gensym_87,
Default,
TDouble),
(gensym_88,
Default,
TDouble),
(gensym_89,
Default,
TDouble),
(e5,
Default,
TDouble),
(e6,
Default,
TDouble),
(e7,
Default,
TDouble),
(e8,
Default,
TDouble),
(e9,
Default,
TDouble),
(e10,
Default,
TDouble),
(e11,
Default,
TDouble),
(e12,
Default,
TDouble),
(e13,
Default,
TDouble),
(e14,
Default,
TDouble),
(e15,
Default,
TDouble),
(e16,
Default,
TDouble)]
[gensym_102,
gensym_103,
gensym_104]
[SSet gensym_28
(EPrimApp TInt
(IP Quot)
[EVr flatidx23,
EConst (I 5)]),
SSet gensym_29
(EPrimApp TInt
(IP Rem)
[EVr flatidx23,
EConst (I 5)]),
SSet gensym_30
(EConst (I 0)),
SSet gensym_78
(EVr gensym_29),
SSet gensym_79
(EVr gensym_28),
SSet gensym_80
(EVr gensym_78),
SSet gensym_81
(EVr gensym_79),
SSet gensym_33
(EVr gensym_81),
SSet gensym_82
(EIndexScalar aLt2_057
(EVr gensym_33)),
SSet gensym_83
(EIndexScalar aLt2_058
(EVr gensym_33)),
SSet gensym_84
(EIndexScalar aLt2_059
(EVr gensym_33)),
SSet gensym_85
(EVr gensym_78),
SSet gensym_86
(EVr gensym_79),
SSet gensym_35
(EVr gensym_85),
SSet gensym_87
(EIndexScalar aLt2_057
(EVr gensym_35)),
SSet gensym_88
(EIndexScalar aLt2_058
(EVr gensym_35)),
SSet gensym_89
(EIndexScalar aLt2_059
(EVr gensym_35)),
SSet e5
(EVr gensym_88),
SSet e6
(EVr gensym_83),
SSet e7
(EVr gensym_89),
SSet e8
(EVr gensym_84),
SSet e9
(EVr gensym_82),
SSet e10
(EVr gensym_87),
SCond (EPrimApp TBool
(BP Not)
[EPrimApp TBool
(BP And)
[EPrimApp TBool
(SP Eq)
[EVr e9,
EVr e10],
EPrimApp TBool
(BP And)
[EPrimApp TBool
(SP Eq)
[EVr e6,
EVr e5],
EPrimApp TBool
(SP Eq)
[EVr e8,
EVr e7]]]])
[SSet e11
(EPrimApp TDouble
(NP Sub)
[EVr e7,
EVr e8]),
SSet e12
(EPrimApp TDouble
(NP Sub)
[EVr e10,
EVr e9]),
SSet e13
(EPrimApp TDouble
(NP Sub)
[EVr e5,
EVr e6]),
SSet e14
(EPrimApp TDouble
(NP Add)
[EPrimApp TDouble
(NP Add)
[EPrimApp TDouble
(NP Mul)
[EVr e12,
EVr e12],
EPrimApp TDouble
(NP Mul)
[EVr e13,
EVr e13]],
EPrimApp TDouble
(NP Mul)
[EVr e11,
EVr e11]]),
SSet e15
(EPrimApp TDouble
(FP Sqrt)
[EVr e14]),
SSet e16
(EPrimApp TDouble
(FP FDiv)
[EPrimApp TDouble
(NP Mul)
[EConst (D 1.0),
EConst (D 1.0)],
EVr e14]),
SSet gensym_102
(EPrimApp TDouble
(FP FDiv)
[EPrimApp TDouble
(NP Mul)
[EVr e16,
EVr e12],
EVr e15]),
SSet gensym_103
(EPrimApp TDouble
(FP FDiv)
[EPrimApp TDouble
(NP Mul)
[EVr e16,
EVr e13],
EVr e15]),
SSet gensym_104
(EPrimApp TDouble
(FP FDiv)
[EPrimApp TDouble
(NP Mul)
[EVr e16,
EVr e11],
EVr e15])]
[SSet gensym_102
(EConst (D 0.0)),
SSet gensym_103
(EConst (D 0.0)),
SSet gensym_104
(EConst (D 0.0))]]))),
variant = Fold (ScalarBlock [(gensym_108,
Default,
TDouble),
(gensym_109,
Default,
TDouble),
(gensym_110,
Default,
TDouble)]
[gensym_108,
gensym_109,
gensym_110]
[SSet gensym_108
(EConst (D 0.0)),
SSet gensym_109
(EConst (D 0.0)),
SSet gensym_110
(EConst (D 0.0))]),
stride = StrideConst (EConst (I 5))}},
GPUProgBind {evtid = evt145,
evtdeps = [],
outarrs = [(tmp_0_1369, Global, TArray 1 TDouble),
(tmp_0_1370, Global, TArray 1 TDouble),
(tmp_0_1371, Global, TArray 1 TDouble)],
decor = FreeVars [aLt2_160,aLt2_161,aLt2_162],
op = GenReduce {reducer = Lam [(v0123, Default, TDouble),
(v0124, Default, TDouble),
(v0125, Default, TDouble),
(v1126, Default, TDouble),
(v1127, Default, TDouble),
(v1128, Default, TDouble)]
(ScalarBlock [(gensym_129,
Default,
TDouble),
(gensym_130,
Default,
TDouble),
(gensym_131,
Default,
TDouble)]
[gensym_129,
gensym_130,
gensym_131]
[SSet gensym_129
(EPrimApp TDouble
(NP Add)
[EVr v0123,
EVr v1126]),
SSet gensym_130
(EPrimApp TDouble
(NP Add)
[EVr v0124,
EVr v1127]),
SSet gensym_131
(EPrimApp TDouble
(NP Add)
[EVr v0125,
EVr v1128])]),
generator = NonManifest (Gen (TrivConst 25)
(Lam [(flatidx36,
Default,
TInt)]
(ScalarBlock [(gensym_105,
Default,
TDouble),
(gensym_106,
Default,
TDouble),
(gensym_107,
Default,
TDouble),
(gensym_41,
Default,
TInt),
(gensym_42,
Default,
TInt),
(gensym_43,
Default,
TInt),
(gensym_90,
Default,
TInt),
(gensym_91,
Default,
TInt),
(gensym_92,
Default,
TInt),
(gensym_93,
Default,
TInt),
(gensym_46,
Default,
TInt),
(gensym_94,
Default,
TDouble),
(gensym_95,
Default,
TDouble),
(gensym_96,
Default,
TDouble),
(gensym_97,
Default,
TInt),
(gensym_98,
Default,
TInt),
(gensym_48,
Default,
TInt),
(gensym_99,
Default,
TDouble),
(gensym_100,
Default,
TDouble),
(gensym_101,
Default,
TDouble),
(e5,
Default,
TDouble),
(e6,
Default,
TDouble),
(e7,
Default,
TDouble),
(e8,
Default,
TDouble),
(e9,
Default,
TDouble),
(e10,
Default,
TDouble),
(e11,
Default,
TDouble),
(e12,
Default,
TDouble),
(e13,
Default,
TDouble),
(e14,
Default,
TDouble),
(e15,
Default,
TDouble),
(e16,
Default,
TDouble)]
[gensym_105,
gensym_106,
gensym_107]
[SSet gensym_41
(EPrimApp TInt
(IP Quot)
[EVr flatidx36,
EConst (I 5)]),
SSet gensym_42
(EPrimApp TInt
(IP Rem)
[EVr flatidx36,
EConst (I 5)]),
SSet gensym_43
(EConst (I 0)),
SSet gensym_90
(EVr gensym_42),
SSet gensym_91
(EVr gensym_41),
SSet gensym_92
(EVr gensym_90),
SSet gensym_93
(EVr gensym_91),
SSet gensym_46
(EVr gensym_93),
SSet gensym_94
(EIndexScalar aLt2_160
(EVr gensym_46)),
SSet gensym_95
(EIndexScalar aLt2_161
(EVr gensym_46)),
SSet gensym_96
(EIndexScalar aLt2_162
(EVr gensym_46)),
SSet gensym_97
(EVr gensym_90),
SSet gensym_98
(EVr gensym_91),
SSet gensym_48
(EVr gensym_97),
SSet gensym_99
(EIndexScalar aLt2_160
(EVr gensym_48)),
SSet gensym_100
(EIndexScalar aLt2_161
(EVr gensym_48)),
SSet gensym_101
(EIndexScalar aLt2_162
(EVr gensym_48)),
SSet e5
(EVr gensym_100),
SSet e6
(EVr gensym_95),
SSet e7
(EVr gensym_101),
SSet e8
(EVr gensym_96),
SSet e9
(EVr gensym_94),
SSet e10
(EVr gensym_99),
SCond (EPrimApp TBool
(BP Not)
[EPrimApp TBool
(BP And)
[EPrimApp TBool
(SP Eq)
[EVr e9,
EVr e10],
EPrimApp TBool
(BP And)
[EPrimApp TBool
(SP Eq)
[EVr e6,
EVr e5],
EPrimApp TBool
(SP Eq)
[EVr e8,
EVr e7]]]])
[SSet e11
(EPrimApp TDouble
(NP Sub)
[EVr e7,
EVr e8]),
SSet e12
(EPrimApp TDouble
(NP Sub)
[EVr e10,
EVr e9]),
SSet e13
(EPrimApp TDouble
(NP Sub)
[EVr e5,
EVr e6]),
SSet e14
(EPrimApp TDouble
(NP Add)
[EPrimApp TDouble
(NP Add)
[EPrimApp TDouble
(NP Mul)
[EVr e12,
EVr e12],
EPrimApp TDouble
(NP Mul)
[EVr e13,
EVr e13]],
EPrimApp TDouble
(NP Mul)
[EVr e11,
EVr e11]]),
SSet e15
(EPrimApp TDouble
(FP Sqrt)
[EVr e14]),
SSet e16
(EPrimApp TDouble
(FP FDiv)
[EPrimApp TDouble
(NP Mul)
[EConst (D 1.0),
EConst (D 1.0)],
EVr e14]),
SSet gensym_105
(EPrimApp TDouble
(FP FDiv)
[EPrimApp TDouble
(NP Mul)
[EVr e16,
EVr e12],
EVr e15]),
SSet gensym_106
(EPrimApp TDouble
(FP FDiv)
[EPrimApp TDouble
(NP Mul)
[EVr e16,
EVr e13],
EVr e15]),
SSet gensym_107
(EPrimApp TDouble
(FP FDiv)
[EPrimApp TDouble
(NP Mul)
[EVr e16,
EVr e11],
EVr e15])]
[SSet gensym_105
(EConst (D 0.0)),
SSet gensym_106
(EConst (D 0.0)),
SSet gensym_107
(EConst (D 0.0))]]))),
variant = Fold (ScalarBlock [(gensym_120,
Default,
TDouble),
(gensym_121,
Default,
TDouble),
(gensym_122,
Default,
TDouble)]
[gensym_120,
gensym_121,
gensym_122]
[SSet gensym_120
(EConst (D 0.0)),
SSet gensym_121
(EConst (D 0.0)),
SSet gensym_122
(EConst (D 0.0))]),
stride = StrideConst (EConst (I 5))}},
GPUProgBind {evtid = evtNew148,
evtdeps = [],
outarrs = [(tmp_063, Global, TArray 1 TDouble)],
decor = FreeVars [],
op = NewArray (EConst (I 10))},
GPUProgBind {evtid = evtNew149,
evtdeps = [],
outarrs = [(tmp_064, Global, TArray 1 TDouble)],
decor = FreeVars [],
op = NewArray (EConst (I 10))},
GPUProgBind {evtid = evtNew150,
evtdeps = [],
outarrs = [(tmp_065, Global, TArray 1 TDouble)],
decor = FreeVars [],
op = NewArray (EConst (I 10))},
GPUProgBind {evtid = evt146,
evtdeps = [evtNew148,evtNew149,evtNew150,evt144,evt144,evt144,
evt145,evt145,evt145],
outarrs = [],
decor = FreeVars [],
op = Kernel {kerniters = [(flatidx49, EConst (I 10))],
kernbod = Lam [(tmp_063, Global, TArray 1 TDouble),
(tmp_064, Global, TArray 1 TDouble),
(tmp_065, Global, TArray 1 TDouble),
(tmp_0_1266,
Global,
TArray 1 TDouble),
(tmp_0_1267,
Global,
TArray 1 TDouble),
(tmp_0_1268,
Global,
TArray 1 TDouble),
(tmp_0_1369,
Global,
TArray 1 TDouble),
(tmp_0_1370,
Global,
TArray 1 TDouble),
(tmp_0_1371,
Global,
TArray 1 TDouble)]
(ScalarBlock [(gensym_132,
Default,
TDouble),
(gensym_133,
Default,
TDouble),
(gensym_134,
Default,
TDouble),
(gensym_52,
Default,
TInt),
(gensym_53,
Default,
TInt),
(gensym_56,
Default,
TInt)]
[]
[SSet gensym_52
(EConst (I 0)),
SSet gensym_53
(EVr flatidx49),
SCond (EPrimApp TBool
(SP Lt)
[EVr gensym_53,
EConst (I 5)])
[SSet gensym_132
(EIndexScalar tmp_0_1266
(EVr gensym_53)),
SSet gensym_133
(EIndexScalar tmp_0_1267
(EVr gensym_53)),
SSet gensym_134
(EIndexScalar tmp_0_1268
(EVr gensym_53))]
[SSet gensym_56
(EPrimApp TInt
(NP Sub)
[EVr gensym_53,
EConst (I 5)]),
SSet gensym_132
(EIndexScalar tmp_0_1369
(EVr gensym_56)),
SSet gensym_133
(EIndexScalar tmp_0_1370
(EVr gensym_56)),
SSet gensym_134
(EIndexScalar tmp_0_1371
(EVr gensym_56))],
SArrSet tmp_063
(EVr flatidx49)
(EVr gensym_132),
SArrSet tmp_064
(EVr flatidx49)
(EVr gensym_133),
SArrSet tmp_065
(EVr flatidx49)
(EVr gensym_134)]),
kernargs = [EVr tmp_063,EVr tmp_064,EVr tmp_065,
EVr tmp_0_1266,EVr tmp_0_1267,
EVr tmp_0_1268,EVr tmp_0_1369,
EVr tmp_0_1370,EVr tmp_0_1371]}},
GPUProgBind {evtid = evt147,
evtdeps = [],
outarrs = [(tmp_0_shape, Default, TInt)],
decor = FreeVars [],
op = ScalarCode (ScalarBlock [(gensym_135, Default, TInt)]
[gensym_135]
[SSet gensym_135
(EConst (I 10))])}],
progResults = [(tmp_063, [tmp_0_shape]),(tmp_064, [tmp_0_shape]),
(tmp_065, [tmp_0_shape])],
uniqueCounter = 151,
progType = TArray 1 (TTuple [TDouble,TDouble,TDouble]),
sizeEnv = [(aLt2_057, (TDouble, TrivConst 5)),
(aLt2_058, (TDouble, TrivConst 5)),
(aLt2_059, (TDouble, TrivConst 5)),
(aLt2_160, (TDouble, TrivConst 5)),
(aLt2_161, (TDouble, TrivConst 5)),
(aLt2_162, (TDouble, TrivConst 5)),
(tmp_063, (TDouble, TrivConst 10)),
(tmp_064, (TDouble, TrivConst 10)),
(tmp_065, (TDouble, TrivConst 10)),
(tmp_0_1266, (TDouble, TrivConst 5)),
(tmp_0_1267, (TDouble, TrivConst 5)),
(tmp_0_1268, (TDouble, TrivConst 5)),
(tmp_0_1369, (TDouble, TrivConst 5)),
(tmp_0_1370, (TDouble, TrivConst 5)),
(tmp_0_1371, (TDouble, TrivConst 5)),
(tmp_1_1072, (TDouble, TrivConst 25)),
(tmp_1_1073, (TDouble, TrivConst 25)),
(tmp_1_1074, (TDouble, TrivConst 25)),
(tmp_1_975, (TDouble, TrivConst 25)),
(tmp_1_976, (TDouble, TrivConst 25)),
(tmp_1_977, (TDouble, TrivConst 25))],
lastwriteTable = [(aLt2_057, evt136),(aLt2_058, evt137),
(aLt2_059, evt138),(aLt2_160, evt139),(aLt2_161, evt140),
(aLt2_162, evt141),(tmp_1_975, evt142),(tmp_1_976, evt142),
(tmp_1_977, evt142),(tmp_1_1072, evt143),(tmp_1_1073, evt143),
(tmp_1_1074, evt143),(tmp_0_1266, evt144),(tmp_0_1267, evt144),
(tmp_0_1268, evt144),(tmp_0_1369, evt145),(tmp_0_1370, evt145),
(tmp_0_1371, evt145),(tmp_063, evt146),(tmp_064, evt146),
(tmp_065, evt146),(tmp_0_shape, evt147)]}
fission2.c(62): (col. 38) remark: loop was not vectorized: operation cannot be vectorized.
fission2.c(19): (col. 5) remark: loop was not vectorized: unsupported loop structure.
fission2.c(177): (col. 38) remark: loop was not vectorized: operation cannot be vectorized.
fission2.c(134): (col. 5) remark: loop was not vectorized: unsupported loop structure.
fission2.c(277): (col. 5) remark: loop was not vectorized: existence of vector dependence.
fission2.c(412): (col. 5) remark: loop was not vectorized: operation cannot be vectorized.
fission2.c(412): (col. 5) remark: loop was not vectorized: unsupported loop structure.
fission2.c(425): (col. 5) remark: loop was not vectorized: operation cannot be vectorized.
fission2.c(425): (col. 5) remark: loop was not vectorized: unsupported loop structure.
fission2.c(431): (col. 5) remark: loop was not vectorized: vectorization possible but seems inefficient.
fission4.c(62): (col. 38) remark: loop was not vectorized: operation cannot be vectorized.
fission4.c(19): (col. 5) remark: loop was not vectorized: unsupported loop structure.
fission4.c(177): (col. 38) remark: loop was not vectorized: operation cannot be vectorized.
fission4.c(134): (col. 5) remark: loop was not vectorized: unsupported loop structure.
fission4.c(292): (col. 38) remark: loop was not vectorized: operation cannot be vectorized.
fission4.c(249): (col. 5) remark: loop was not vectorized: unsupported loop structure.
fission4.c(407): (col. 38) remark: loop was not vectorized: operation cannot be vectorized.
fission4.c(364): (col. 5) remark: loop was not vectorized: unsupported loop structure.
fission4.c(535): (col. 5) remark: loop was not vectorized: existence of vector dependence.
fission4.c(726): (col. 5) remark: loop was not vectorized: operation cannot be vectorized.
fission4.c(726): (col. 5) remark: loop was not vectorized: unsupported loop structure.
fission4.c(739): (col. 5) remark: loop was not vectorized: operation cannot be vectorized.
fission4.c(739): (col. 5) remark: loop was not vectorized: unsupported loop structure.
fission4.c(752): (col. 5) remark: loop was not vectorized: operation cannot be vectorized.
fission4.c(752): (col. 5) remark: loop was not vectorized: unsupported loop structure.
fission4.c(765): (col. 5) remark: loop was not vectorized: operation cannot be vectorized.
fission4.c(765): (col. 5) remark: loop was not vectorized: unsupported loop structure.
fission4.c(771): (col. 5) remark: loop was not vectorized: vectorization possible but seems inefficient.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment