Created
July 9, 2018 15:48
-
-
Save ra1u/719fd7688abb0f0f57817bdd5a1828b2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
uniform const size_t kWidth = 8; | |
uniform const size_t kHeight = 8; | |
uniform const size_t kSquares = kWidth * kHeight; | |
uniform const size_t kWtiles = 4; //(kWidth + 1) / 2; | |
uniform const size_t kTiles = kWtiles * kWtiles; // 16 | |
uniform const size_t kWinogradAlpha = 4; | |
uniform const size_t kWinogradTile = kWinogradAlpha * kWinogradAlpha; | |
export void transfer_in_ispc(uniform size_t batch_size, | |
const uniform float input[], uniform size_t channels, | |
uniform float output[]) | |
{ | |
float x[kWinogradAlpha][kWinogradAlpha]; | |
float T1[kWinogradAlpha][kWinogradAlpha]; | |
for (uniform size_t batch_index = 0; batch_index < batch_size; | |
batch_index++) { | |
uniform size_t input_batch = batch_index * kWidth * kHeight * channels; | |
uniform size_t V_batch = channels * kTiles * batch_index; | |
for (uniform int block_y = 0; block_y < kWtiles; block_y++) { | |
for (uniform int block_x = 0; block_x < kWtiles; block_x++) { | |
const uniform int yin = 2 * block_y - 1; | |
const uniform int xin = 2 * block_x - 1; | |
//for (uniform size_t channel = 0; channel < channels; channel++) { | |
foreach(channel = 0 ... channels) { | |
size_t V_channel = V_batch + channel; | |
size_t input_channel = input_batch + channel * (kWidth * kHeight); | |
for (uniform int i = 0; i < kWinogradAlpha; i++) { | |
for (uniform int j = 0; j < kWinogradAlpha; j++) { | |
// foreach(j = 0 .. kWinogradAlpha) | |
if ((yin + i) >= 0 && (xin + j) >= 0 && | |
(yin + i) < kHeight && (xin + j) < kWidth) { | |
{ | |
x[i][j] = input[input_channel + | |
(yin + i) * kWidth + (xin + j)]; | |
} | |
} | |
else { | |
{ | |
x[i][j] = 0.0f; | |
} | |
} | |
} | |
} | |
T1[0][0] = x[0][0] - x[2][0]; | |
T1[0][1] = x[0][1] - x[2][1]; | |
T1[0][2] = x[0][2] - x[2][2]; | |
T1[0][3] = x[0][3] - x[2][3]; | |
T1[1][0] = x[1][0] + x[2][0]; | |
T1[1][1] = x[1][1] + x[2][1]; | |
T1[1][2] = x[1][2] + x[2][2]; | |
T1[1][3] = x[1][3] + x[2][3]; | |
T1[2][0] = x[2][0] - x[1][0]; | |
T1[2][1] = x[2][1] - x[1][1]; | |
T1[2][2] = x[2][2] - x[1][2]; | |
T1[2][3] = x[2][3] - x[1][3]; | |
T1[3][0] = x[1][0] - x[3][0]; | |
T1[3][1] = x[1][1] - x[3][1]; | |
T1[3][2] = x[1][2] - x[3][2]; | |
T1[3][3] = x[1][3] - x[3][3]; | |
const size_t V_incr = | |
channels * kTiles * batch_size; | |
const size_t wTile_V = | |
V_channel + channels * (block_y * kWtiles + block_x); | |
output[wTile_V + V_incr * 0] = T1[0][0] - T1[0][2]; | |
output[wTile_V + V_incr * 1] = T1[0][1] + T1[0][2]; | |
output[wTile_V + V_incr * 2] = T1[0][2] - T1[0][1]; | |
output[wTile_V + V_incr * 3] = T1[0][1] - T1[0][3]; | |
output[wTile_V + V_incr * 4] = T1[1][0] - T1[1][2]; | |
output[wTile_V + V_incr * 5] = T1[1][1] + T1[1][2]; | |
output[wTile_V + V_incr * 6] = T1[1][2] - T1[1][1]; | |
output[wTile_V + V_incr * 7] = T1[1][1] - T1[1][3]; | |
output[wTile_V + V_incr * 8] = T1[2][0] - T1[2][2]; | |
output[wTile_V + V_incr * 9] = T1[2][1] + T1[2][2]; | |
output[wTile_V + V_incr * 10] = T1[2][2] - T1[2][1]; | |
output[wTile_V + V_incr * 11] = T1[2][1] - T1[2][3]; | |
output[wTile_V + V_incr * 12] = T1[3][0] - T1[3][2]; | |
output[wTile_V + V_incr * 13] = T1[3][1] + T1[3][2]; | |
output[wTile_V + V_incr * 14] = T1[3][2] - T1[3][1]; | |
output[wTile_V + V_incr * 15] = T1[3][1] - T1[3][3]; | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment