Skip to content

Instantly share code, notes, and snippets.

@jamesr66a
Created February 20, 2019 05:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jamesr66a/e07b9c4377ccebb97830cbdab87e908d to your computer and use it in GitHub Desktop.
Save jamesr66a/e07b9c4377ccebb97830cbdab87e908d to your computer and use it in GitHub Desktop.
T* out = BaseType::getBuf() +
g * this->packedBufferSize(block.row_size, block.col_size);
for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
+ auto r = i;
+ int32_t block_row_id = r / BaseType::blockRowSize();
+ int32_t brow_offset = (block_row_id * BaseType::blockCols()) *
+ (BaseType::blockRowSize() * BaseType::blockColSize());
+
+ int32_t inblock_offset_row_cpt = (r % BaseType::blockRowSize() / row_interleave_) *
+ BaseType::blockColSize() * row_interleave_ + r % row_interleave_;
+
+ int32_t block_col_id = block.col_start / BaseType::blockColSize();
+ int32_t block_col_offs = block.col_start % BaseType::blockColSize();
for (int j = block.col_start; j < block.col_start + block.col_size; ++j) {
T val = tr ? smat_[i + (g * block.col_size + j) * ld_]
: smat_[(g * block.row_size + i) * ld_ + j];
- out[addr(i, j)] = tconv(val, out[addr(i, j)]);
+ int32_t bcol_offset =
+ block_col_id * BaseType::blockRowSize() * BaseType::blockColSize();
+ int32_t block_offset = brow_offset + bcol_offset;
+ int32_t inblock_offset = inblock_offset_row_cpt + (block_col_offs) * row_interleave_;
+ int32_t index = block_offset + inblock_offset;
+ out[index] = tconv(val, out[index]);
+
+ block_col_offs++;
+ if (block_col_offs == BaseType::blockColSize()) {
+ block_col_offs = 0;
+ block_col_id++;
+ }
}
}
// fill the remaining with zero.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment