-
-
Save TheBadGod/2ac4f082eb4086d1f27b6085bd5d2109 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void *main_graph(float **a1, __int64 a2, float *a3) | |
{ | |
// [COLLAPSED LOCAL DECLARATIONS. PRESS KEYPAD CTRL-"+" TO EXPAND] | |
// PART IS OMITTED | |
// Tanh node | |
v296 = ALIGN(malloc(0x64010uLL)); | |
for ( i25 = 0LL; i25 < 64; ++i25 ) | |
{ | |
for ( i26 = 0LL; i26 < 16; ++i26 ) | |
{ | |
for ( i27 = 0LL; i27 < 10; ++i27 ) | |
{ | |
for ( i28 = 0LL; i28 < 10; i28 = v53 + 1 ) | |
{ | |
v46 = (__m128)LODWORD(Tanh_16[1600 * i25 + 100 * i26 + 10 * i27 + i28]); | |
v47 = _mm_cmpgt_ss((__m128)0x40FFF644u, v46); | |
v48 = _mm_or_ps(_mm_andnot_ps(v47, (__m128)0x40FFF644u), _mm_and_ps(v47, v46)); | |
v49 = _mm_cmpgt_ss(v48, (__m128)0xC0FFF644); | |
v50 = _mm_or_ps(_mm_andnot_ps(v49, (__m128)0xC0FFF644), _mm_and_ps(v49, v48)); | |
v287 = v50; | |
v288 = _mm_and_si128((__m128i)v46, (__m128i)xmmword_55007040).m128i_u32[0]; | |
v50.m128_f32[0] = v50.m128_f32[0] * v50.m128_f32[0]; | |
*(__m128 *)v285 = v50; | |
v46.m128_i32[0] = fmaf(v50.m128_f32[0], -2.7607684e-16, 2.0001879e-13); | |
v46.m128_i32[0] = fmaf(v285[0], v46.m128_f32[0], -8.6046718e-11); | |
v46.m128_i32[0] = fmaf(v285[0], v46.m128_f32[0], 0.000000051222973); | |
v50.m128_f32[0] = fmaf(v285[0], v46.m128_f32[0], 0.000014857224); | |
v50.m128_f32[0] = fmaf(v285[0], v50.m128_f32[0], 0.00063726195); | |
v50.m128_f32[0] = fmaf(v285[0], v50.m128_f32[0], 0.0048935246); | |
v51 = v287; | |
v51.m128_f32[0] = v287.m128_f32[0] * v50.m128_f32[0]; | |
v286 = v51; | |
v50.m128_f32[0] = fmaf(v285[0], 0.0000011982584, 0.00011853471); | |
v50.m128_f32[0] = fmaf(v285[0], v50.m128_f32[0], 0.0022684347); | |
v50.m128_f32[0] = fmaf(v285[0], v50.m128_f32[0], 0.004893525); | |
v52 = v286; | |
v53 = i28; | |
v52.m128_f32[0] = v286.m128_f32[0] / v50.m128_f32[0]; | |
v51.m128_i32[0] = 970045207; | |
v54 = _mm_cmplt_ss((__m128)v288, v51); | |
LODWORD(Pad_17[1600 * i25 + 100 * i26 + 10 * i27 + i28]) = _mm_andnot_ps(v54, v52).m128_u32[0] | v54.m128_i32[0] & v287.m128_i32[0]; | |
} | |
} | |
} | |
} | |
// PART IS OMITTED | |
float *v208 = ALIGN(malloc(0x20uLL)); | |
v208[0] = constant_8[0]; // 0x40 | |
v208[1] = constant_9[0]; // -1 | |
dim0 = v208[0]; | |
dim1 = v208[1]; | |
if ( !*v208 ) // if(dim0 == 0) dim0 = 64; | |
dim0 = 64LL; | |
if ( dim0 == -1 ) // if(dim0 == -1) dim0 = 1; | |
dim0 = 1LL; | |
if ( !dim1 ) // if(dim1 == 0) dim1 = 120; | |
dim1 = 120LL; | |
if ( dim1 == -1 ) // if(dim1 == -1) dim1 = 1; // we're here | |
dim1 = 1LL; | |
num_elems = dim1 * dim0; | |
second_dim = dim0; // just set to the constant 64 | |
remaining = 0x1E00 / (__int128)num_elems; // (120*64) / num_elements | |
first_dim = dim1; | |
if ( dim0 == -1 ) | |
second_dim = remaining; | |
if ( dim1 == -1 ) | |
first_dim = remaining; // is now 120 (as 120*64//64 == 120) | |
// first dim = 120, second dim = 64 | |
// the operation right before the first custom operation | |
// => generates the input for custom operation in customop_1_29 | |
// => gets input from the node before (which is the reshape node) | |
float *customop_1_29 = ALIGN(malloc(336 * second_dim + 128)); | |
float tmp_float; | |
float *tmp_float_ptr = &tmp_float; | |
for ( i57 = 0LL; i57 < second_dim; ++i57 ) | |
{ | |
for ( i58 = 0LL; i58 < 84; ++i58 ) | |
{ | |
*tmp_float_ptr = 0.0; | |
for ( i59 = 0LL; i59 < first_dim; ++i59 ) | |
*tmp_float_ptr = Reshape_22[i59 + first_dim * i57] * constant_10[120 * i58 + i59] + *tmp_float_ptr; | |
customop_1_29[84 * i57 + i58] = *tmp_float_ptr + constant_11[i58]; | |
} | |
} | |
// begin of first custom operation | |
float *cos_table = ALIGN(malloc(336 * second_dim + 16)); | |
for ( i60 = 0LL; i60 < second_dim; ++i60 ) | |
{ | |
for ( i61 = 0LL; i61 < 84; ++i61 ) | |
{ | |
cos_table[84 * i60 + i61] = cosf(customop_1_29[84 * i60 + i61]); | |
} | |
} | |
float *sin_table = ALIGN(malloc(336 * second_dim + 16)); | |
for ( i62 = 0LL; i62 < second_dim; ++i62 ) | |
{ | |
for ( i63 = 0LL; i63 < 84; ++i63 ) | |
{ | |
sin_table[84 * i62 + i63] = sinf(customop_1_29[84 * i62 + i63]); | |
} | |
} | |
float *customop_2_30 = ALIGN(malloc(336 * second_dim + 16)); // input buffer for next custom operation | |
for ( i64 = 0LL; i64 < second_dim; ++i64 ) | |
{ | |
for ( i65 = 0LL; i65 < 84; ++i65 ) | |
{ | |
customop_2_30[84 * i64 + i65] = cos_table[84 * i64 + i65] + sin_table[84 * i64 + i65]; | |
} | |
} | |
// begin second custom operation | |
float *customop_2_30_tanh = ALIGN(malloc(336 * second_dim + 16)); | |
for ( i66 = 0LL; i66 < second_dim; ++i66 ) // kinda feels familiar :) | |
{ | |
for ( i67 = 0LL; i67 < 84; ++i67 ) // variable renaming kinda broke this, but eh, structure is the same | |
{ | |
v108 = (__m128)LODWORD(customop_2_30[84 * i66 + i67]); | |
v109 = _mm_cmpgt_ss((__m128)0x40FFF644u, v108); | |
v110 = _mm_or_ps(_mm_andnot_ps(v109, (__m128)0x40FFF644u), _mm_and_ps(v109, v108)); | |
v111 = _mm_cmpgt_ss(v110, (__m128)0xC0FFF644); | |
v112 = _mm_or_ps(_mm_andnot_ps(v111, (__m128)0xC0FFF644), _mm_and_ps(v111, v110)); | |
v165 = _mm_and_si128((__m128i)v108, (__m128i)xmmword_55007040).m128i_u32[0]; | |
v112.m128_f32[0] = v112.m128_f32[0] * v112.m128_f32[0]; | |
*(__m128 *)v162 = v112; | |
v108.m128_i32[0] = fmaf(v112.m128_f32[0], -2.7607684e-16, 2.0001879e-13); | |
v108.m128_i32[0] = fmaf(v162[0], v108.m128_f32[0], -8.6046718e-11); | |
v108.m128_i32[0] = fmaf(v162[0], v108.m128_f32[0], 0.000000051222973); | |
v112.m128_f32[0] = fmaf(v162[0], v108.m128_f32[0], 0.000014857224); | |
v112.m128_f32[0] = fmaf(v162[0], v112.m128_f32[0], 0.00063726195); | |
v112.m128_f32[0] = fmaf(v162[0], v112.m128_f32[0], 0.0048935246); | |
v112.m128_f32[0] = v112.m128_f32[0] * v112.m128_f32[0]; | |
v112.m128_f32[0] = fmaf(v162[0], 0.0000011982584, 0.00011853471); | |
v112.m128_f32[0] = fmaf(v162[0], v112.m128_f32[0], 0.0022684347); | |
v112.m128_f32[0] = fmaf(v162[0], v112.m128_f32[0], 0.004893525); | |
v112.m128_f32[0] = v112.m128_f32[0] / v112.m128_f32[0]; | |
v112.m128_i32[0] = 970045207; | |
v116 = _mm_cmplt_ss((__m128)v165, v112); | |
customop_2_30_tanh[84 * i66 + i67] = | |
_mm_andnot_ps(v116, v112).m128_u32[0] | v116.m128_i32[0] & v112.m128_i32[0]; | |
} | |
} | |
float *customop_2_31 = ALIGN(malloc(336 * second_dim + 16)); // input buffer for next custom operation | |
for ( i68 = 0LL; i68 < second_dim; ++i68 ) | |
{ | |
for ( i69 = 0LL; i69 < 84; ++i69 ) | |
{ | |
customop_2_31[84 * i68 + i69] = customop_2_30[84 * i68 + i69] - customop_2_30_tanh[84 * i68 + i69]; | |
} | |
} | |
// begin third custom operation | |
gemm_inp = ALIGN(malloc(336 * second_dim + 16)); // input for last operation (not custom) | |
for ( i70 = 0LL; i70 < second_dim; ++i70 ) | |
{ | |
for ( i71 = 0LL; i71 < 84; i71++ ) | |
{ | |
elem = customop_2_31[84 * i70 + i71]; | |
mask = _mm_cmplt_ss(elem, 0); // elem < 0 ? 0xffffffff : 0 | |
gemm_inp[84 * i70 + i71] = | |
_mm_andnot_ps(mask, elem).m128_u32[0] | // ~mask & elem | |
mask.m128_i32[0] & COERCE_UNSIGNED_INT(customop_2_31[84 * i70 + i71] * 0.0099999998); // mask & (elem / 100.0) | |
} | |
} | |
float *unaligned_output = malloc(40 * second_dim + 128); | |
float *output = ALIGN(unaligned_output); | |
for ( i72 = 0LL; i72 < second_dim; ++i72 ) // GEMM | |
{ | |
for ( i73 = 0LL; i73 < 10; ++i73 ) | |
{ | |
*tmp_float_ptr = 0.0; | |
for ( i74 = 0LL; i74 < 84; ++i74 ) | |
*tmp_float_ptr = (gemm_inp[84 * i72 + i74] * constant_12[84 * i73 + i74]) + *tmp_float_ptr; | |
output[10 * i72 + i73] = *tmp_float_ptr + constant_13[i73]; | |
} | |
} | |
// returns some kind of struct | |
return struct data { | |
unaligned_output, | |
output, | |
0, | |
second_dim, | |
10, | |
10, | |
1 | |
}; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment