Skip to content

Instantly share code, notes, and snippets.

@exjam
Created October 30, 2018 10:48
Show Gist options
  • Save exjam/ce2de9fcc1e98ad0e12251c12bad5234 to your computer and use it in GitHub Desktop.
Save exjam/ce2de9fcc1e98ad0e12251c12bad5234 to your computer and use it in GitHub Desktop.
#pragma once
#include <cstdint>
#include <cstring>
namespace gpu7::tiler
{
enum TileMode : uint32_t
{
LinearGeneral = 0x0,
LinearAligned = 0x1,
Tiled1DThin1 = 0x2,
Tiled1DThick = 0x3,
Tiled2DThin1 = 0x4,
Tiled2DThin2 = 0x5,
Tiled2DThin4 = 0x6,
Tiled2DThick = 0x7,
Tiled2BThin1 = 0x8,
Tiled2BThin2 = 0x9,
Tiled2BThin4 = 0xA,
Tiled2BThick = 0xB,
Tiled3DThin1 = 0xC,
Tiled3DThick = 0xD,
Tiled3BThin1 = 0xE,
Tiled3BThick = 0xF,
LinearSpecial = 0x10,
};
struct TiledSurface
{
void *image;
uint32_t bpp;
uint32_t tileMode;
uint32_t swizzle;
uint32_t pitch;
uint32_t height;
uint32_t depth;
uint32_t numSamples;
bool isDepth;
uint32_t bankSwizzle;
uint32_t pipeSwizzle;
};
namespace detail
{
static constexpr auto MicroTileWidth = 8;
static constexpr auto MicroTileHeight = 8;
struct MicroTiler8
{
/*
8 bits per element:
0: 0, 1, 2, 3, 4, 5, 6, 7,
8: 16, 17, 18, 19, 20, 21, 22, 23,
16: 8, 9, 10, 11, 12, 13, 14, 15,
24: 24, 25, 26, 27, 28, 29, 30, 31,
32: 32, 33, 34, 35, 36, 37, 38, 39,
40: 48, 49, 50, 51, 52, 53, 54, 55,
48: 40, 41, 42, 43, 44, 45, 46, 47,
56: 56, 57, 58, 59, 60, 61, 62, 63,
*/
void apply(uint8_t *src, unsigned srcStrideBytes, uint8_t *dst, unsigned dstStrideBytes)
{
static constexpr auto rowSize = MicroTileWidth * sizeof(uint8_t);
auto dstRow = [&](int row) { return dst + row * dstStrideBytes; };
auto srcRow = [&](int row) { return src + row * srcStrideBytes; };
for (int y = 0; y < MicroTileHeight; y += 4) {
std::memcpy(dstRow(y + 0), srcRow(y + 0), rowSize);
std::memcpy(dstRow(y + 1), srcRow(y + 2), rowSize);
std::memcpy(dstRow(y + 2), srcRow(y + 1), rowSize);
std::memcpy(dstRow(y + 3), srcRow(y + 3), rowSize);
}
}
};
struct MicroTiler16
{
/*
16 bits per element:
0: 0, 1, 2, 3, 4, 5, 6, 7,
8: 8, 9, 10, 11, 12, 13, 14, 15,
16: 16, 17, 18, 19, 20, 21, 22, 23,
24: 24, 25, 26, 27, 28, 29, 30, 31,
32: 32, 33, 34, 35, 36, 37, 38, 39,
40: 40, 41, 42, 43, 44, 45, 46, 47,
48: 48, 49, 50, 51, 52, 53, 54, 55,
56: 56, 57, 58, 59, 60, 61, 62, 63,
*/
void apply(uint8_t *src, unsigned srcStrideBytes, uint8_t *dst, unsigned dstStrideBytes)
{
static constexpr auto rowSize = MicroTileWidth * sizeof(uint16_t);
for (int y = 0; y < MicroTileHeight; ++y) {
std::memcpy(dst, src, rowSize);
src += srcStrideBytes;
dst += dstStrideBytes;
}
}
};
struct MicroTiler32
{
/*
32 bits per element:
0: 0, 1, 2, 3, 8, 9, 10, 11,
8: 4, 5, 6, 7, 12, 13, 14, 15,
16: 16, 17, 18, 19, 24, 25, 26, 27,
24: 20, 21, 22, 23, 28, 29, 30, 31,
32: 32, 33, 34, 35, 40, 41, 42, 43,
40: 36, 37, 38, 39, 44, 45, 46, 47,
48: 48, 49, 50, 51, 56, 57, 58, 59,
56: 52, 53, 54, 55, 60, 61, 62, 63,
*/
void apply(uint8_t *src, unsigned srcStrideBytes, uint8_t *dst, unsigned dstStrideBytes)
{
static constexpr auto groupSize = 4 * sizeof(uint32_t);
auto srcElem = [&](int idx) { return src + (idx * 4) + (idx / 8) * srcStrideBytes; };
auto dstElem = [&](int idx) { return dst + (idx * 4) + (idx / 8) * dstStrideBytes; };
for (int y = 0; y < MicroTileHeight; y += 2) {
auto yElem = y * MicroTileWidth;
std::memcpy(dstElem(0 + yElem), srcElem(0 + yElem), groupSize);
std::memcpy(dstElem(8 + yElem), srcElem(4 + yElem), groupSize);
std::memcpy(dstElem(4 + yElem), srcElem(8 + yElem), groupSize);
std::memcpy(dstElem(12 + yElem), srcElem(12 + yElem), groupSize);
}
}
};
struct MicroTiler64
{
/*
64 bits per element:
0: 0, 1, 4, 5, 8, 9, 12, 13,
8: 2, 3, 6, 7, 10, 11, 14, 15,
16: 16, 17, 20, 21, 24, 25, 28, 29,
24: 18, 19, 22, 23, 26, 27, 30, 31,
32: 32, 33, 36, 37, 40, 41, 44, 45,
40: 34, 35, 38, 39, 42, 43, 46, 47,
48: 48, 49, 52, 53, 56, 57, 60, 61,
56: 50, 51, 54, 55, 58, 59, 62, 63,
*/
inline void apply(uint8_t *src, unsigned srcStrideBytes, uint8_t *dst, unsigned dstStrideBytes)
{
static constexpr auto groupBytes = 2 * sizeof(uint64_t);
auto srcElem = [&](int idx) { return src + (idx * groupBytes) + (idx / MicroTileWidth) * srcStrideBytes; };
auto dstElem = [&](int idx) { return dst + (idx * groupBytes) + (idx / MicroTileWidth) * dstStrideBytes; };
for (int y = 0; y < MicroTileHeight; y += 2) {
for (int x = 0; x < MicroTileWidth; x += 2) {
auto idx = x + y * MicroTileWidth;
std::memcpy(dstElem(0 + idx), srcElem(0 + idx), groupBytes);
std::memcpy(dstElem(8 + idx), srcElem(2 + idx), groupBytes);
}
}
}
};
struct MicroTiler128
{
/*
128 bits per element:
0: 0, 2, 4, 6, 8, 10, 12, 14,
8: 1, 3, 5, 7, 9, 11, 13, 15,
16: 16, 18, 20, 22, 24, 26, 28, 30,
24: 17, 19, 21, 23, 25, 27, 29, 31,
32: 32, 34, 36, 38, 40, 42, 44, 46,
40: 33, 35, 37, 39, 41, 43, 45, 47,
48: 48, 50, 52, 54, 56, 58, 60, 62,
56: 49, 51, 53, 55, 57, 59, 61, 63,
*/
inline void apply(uint8_t *src, unsigned srcStrideBytes, uint8_t *dst, unsigned dstStrideBytes)
{
static constexpr auto elemBytes = 8;
auto srcElem = [&](int idx) { return src + (idx * elemBytes) + (idx / MicroTileWidth) * srcStrideBytes; };
auto dstElem = [&](int idx) { return dst + (idx * elemBytes) + (idx / MicroTileWidth) * dstStrideBytes; };
for (int y = 0; y < MicroTileHeight; y += 2) {
for (int x = 0; x < MicroTileWidth; x += 2) {
auto idx = x + y * MicroTileWidth;
std::memcpy(dstElem(idx + 0), srcElem(idx + 0), elemBytes);
std::memcpy(dstElem(idx + 1), srcElem(idx + 2), elemBytes);
std::memcpy(dstElem(idx + 8), srcElem(idx + 1), elemBytes);
std::memcpy(dstElem(idx + 9), srcElem(idx + 3), elemBytes);
}
}
}
};
struct MicroTilerDepth
{
/*
depth elements:
0: 0, 1, 4, 5, 16, 17, 20, 21,
8: 2, 3, 6, 7, 18, 19, 22, 23,
16: 8, 9, 12, 13, 24, 25, 28, 29,
24: 10, 11, 14, 15, 26, 27, 30, 31,
32: 32, 33, 36, 37, 48, 49, 52, 53,
40: 34, 35, 38, 39, 50, 51, 54, 55,
48: 40, 41, 44, 45, 56, 57, 60, 61,
56: 42, 43, 46, 47, 58, 59, 62, 63,
*/
inline void apply(uint8_t *src, unsigned srcStrideBytes, uint8_t *dst, unsigned dstStrideBytes, unsigned bpp)
{
auto groupSize = 2 * bpp;
auto srcElem = [&](int idx) { return src + (idx * bpp) + (idx / 8) * srcStrideBytes; };
auto dstElem = [&](int idx) { return dst + (idx * bpp) + (idx / 8) * dstStrideBytes; };
for (int y = 0; y < MicroTileHeight; y += 4) {
for (int x = 0; x < MicroTileWidth; x += 4) {
auto idx = x * 4 + y * 4;
std::memcpy(dstElem(0 + idx), srcElem(0 + idx), groupSize);
std::memcpy(dstElem(2 + idx), srcElem(4 + idx), groupSize);
std::memcpy(dstElem(8 + idx), srcElem(2 + idx), groupSize);
std::memcpy(dstElem(10 + idx), srcElem(6 + idx), groupSize);
std::memcpy(dstElem(16 + idx), srcElem(8 + idx), groupSize);
std::memcpy(dstElem(18 + idx), srcElem(12 + idx), groupSize);
std::memcpy(dstElem(24 + idx), srcElem(10 + idx), groupSize);
std::memcpy(dstElem(26 + idx), srcElem(14 + idx), groupSize);
}
}
}
};
template<typename MicroTiler>
void applyMicroTiler(TiledSurface &tiled, unsigned sliceOffset, unsigned microTileBytes)
{
auto bytesPerPixel = tiled.bpp / 8;
auto microTilesPerRow = tiled.pitch / MicroTileWidth;
auto microTilesNumRows = tiled.height / MicroTileHeight;
auto microTileOffset = sliceOffset;
auto dstStrideBytes = tiled.pitch * bytesPerPixel;
for (auto microTileIndexY = 0; microTileIndexY < microTilesNumRows; ++microTileIndexY) {
for (auto microTileIndexX = 0; microTileIndexX < microTilesPerRow; ++microTileIndexX) {
auto pixelX = microTileIndexX * MicroTileWidth;
auto pixelY = microTileIndexY * MicroTileHeight;
auto dstOffset = (pixelX + pixelY * tiled.pitch) * bytesPerPixel;
MicroTiler::apply(static_cast<uint8_t *>(tiled.image) + microTileOffset,
MicroTileWidth * bytesPerPixel,
static_cast<uint8_t *>(dst) + dstOffset,
dstStrideBytes);
microTileOffset += microTileBytes;
}
}
}
bool untileMicroTiledSurface(TiledSurface &tiled, void *dst, int slice)
{
auto bytesPerPixel = tiled.bpp / 8;
auto microTileThickness = (tiled.tileMode == TileMode::Tiled1DThick) ? 4 : 1;
auto microTileBytes = MicroTileWidth * MicroTileHeight * microTileThickness * bytesPerPixel;
// Calculate slice offset
auto microTileIndexZ = slice / microTileThickness;
auto sliceBytes = tiled.pitch * tiled.height * microTileThickness * bytesPerPixel;
auto sliceOffset = microTileIndexZ * sliceBytes;
if (tiled.isDepth) {
applyMicroTiler<MicroTilerDepth>(tiled, sliceOffset, microTileBytes);
return true;
}
switch (tiled.bpp) {
case 8:
applyMicroTiler<MicroTiler8>(tiled, sliceOffset, microTileBytes);
break;
case 16:
applyMicroTiler<MicroTiler16>(tiled, sliceOffset, microTileBytes);
break;
case 32:
applyMicroTiler<MicroTiler32>(tiled, sliceOffset, microTileBytes);
break;
case 64:
applyMicroTiler<MicroTiler64>(tiled, sliceOffset, microTileBytes);
break;
case 128:
applyMicroTiler<MicroTiler128>(tiled, sliceOffset, microTileBytes);
break;
default:
return false;
}
return true;
}
} // namespace detail
bool untile(TiledSurface &tiled, void *dst)
{
switch (static_cast<TileMode>(tiled.tileMode)) {
case TileMode::LinearGeneral:
case TileMode::LinearAligned:
case TileMode::LinearSpecial:
// Already "untiled"
return true;
case TileMode::Tiled1DThin1:
return detail::untileMicroTiledSurface(tiled, dst, 0);
case TileMode::Tiled1DThick:
return detail::untileMicroTiledSurface(tiled, dst, 0);
default:
return false;
}
// Linear
// MicroTiled
// MacroTiled
/*
case ADDR_TM_2D_TILED_THIN1:
case ADDR_TM_2D_TILED_THIN2:
case ADDR_TM_2D_TILED_THIN4:
case ADDR_TM_2D_TILED_THICK:
case ADDR_TM_2B_TILED_THIN1:
case ADDR_TM_2B_TILED_THIN2:
case ADDR_TM_2B_TILED_THIN4:
case ADDR_TM_2B_TILED_THICK:
case ADDR_TM_3D_TILED_THIN1:
case ADDR_TM_3D_TILED_THICK:
case ADDR_TM_3B_TILED_THIN1:
case ADDR_TM_3B_TILED_THICK:
addr = ComputeSurfaceAddrFromCoordMacroTiled(pIn->x,
pIn->y,
pIn->slice,
pIn->sample,
pIn->bpp,
pIn->pitch,
pIn->height,
numSamples,
pIn->tileMode,
pIn->isDepth,
pIn->tileBase,
pIn->compBits,
pIn->pipeSwizzle,
pIn->bankSwizzle,
&pOut->bitPosition);
break;
default:
addr = 0;
}
return addr;
*/
}
} // namespace gpu7::tiler
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment