Created
February 20, 2019 05:45
-
-
Save jamesr66a/e07b9c4377ccebb97830cbdab87e908d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
T* out = BaseType::getBuf() + | |
g * this->packedBufferSize(block.row_size, block.col_size); | |
for (int i = block.row_start; i < block.row_start + block.row_size; ++i) { | |
+ auto r = i; | |
+ int32_t block_row_id = r / BaseType::blockRowSize(); | |
+ int32_t brow_offset = (block_row_id * BaseType::blockCols()) * | |
+ (BaseType::blockRowSize() * BaseType::blockColSize()); | |
+ | |
+ int32_t inblock_offset_row_cpt = (r % BaseType::blockRowSize() / row_interleave_) * | |
+ BaseType::blockColSize() * row_interleave_ + r % row_interleave_; | |
+ | |
+ int32_t block_col_id = block.col_start / BaseType::blockColSize(); | |
+ int32_t block_col_offs = block.col_start % BaseType::blockColSize(); | |
for (int j = block.col_start; j < block.col_start + block.col_size; ++j) { | |
T val = tr ? smat_[i + (g * block.col_size + j) * ld_] | |
: smat_[(g * block.row_size + i) * ld_ + j]; | |
- out[addr(i, j)] = tconv(val, out[addr(i, j)]); | |
+ int32_t bcol_offset = | |
+ block_col_id * BaseType::blockRowSize() * BaseType::blockColSize(); | |
+ int32_t block_offset = brow_offset + bcol_offset; | |
+ int32_t inblock_offset = inblock_offset_row_cpt + (block_col_offs) * row_interleave_; | |
+ int32_t index = block_offset + inblock_offset; | |
+ out[index] = tconv(val, out[index]); | |
+ | |
+ block_col_offs++; | |
+ if (block_col_offs == BaseType::blockColSize()) { | |
+ block_col_offs = 0; | |
+ block_col_id++; | |
+ } | |
} | |
} | |
// fill the remaining with zero. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment