Skip to content

Instantly share code, notes, and snippets.

@profi200
Last active August 30, 2023 22:08
Show Gist options
  • Save profi200/e31df7d31b8c2cdccc0608aaa71681fc to your computer and use it in GitHub Desktop.
Save profi200/e31df7d31b8c2cdccc0608aaa71681fc to your computer and use it in GitHub Desktop.
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <3ds.h>
#include <citro3d.h>
#include <string.h>
#include "vshader_shbin.h"
#define CLEAR_COLOR (0x68B0D8FF)
#define DISPLAY_TRANSFER_FLAGS \
(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGB8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
static DVLB_s* vshader_dvlb;
static shaderProgram_s program;
static int uLoc_projection;
static C3D_Mtx projection;
static C3D_Tex maintex;
__attribute__((always_inline))
static inline u32 denormalize(const float r, const float g, const float b, const float a)
{
return lroundf(255 * a)<<24 | lroundf(255 * b)<<16 |
lroundf(255 * g)<<8 | lroundf(255 * r);
}
static void loadTexFile(const char *const path, C3D_Tex *const tex)
{
FILE *f = fopen(path, "rb");
u8 *const linearTex = linearAlloc(512 * 512 * 3);
u8 *tmpTexPtr = linearTex;
for(u32 i = 0; i < 160; i++)
{
fread(tmpTexPtr, 240 * 3, 1, f);
tmpTexPtr += 512 * 3;
}
fclose(f);
GSPGPU_FlushDataCache(linearTex, 512 * 512 * 3);
C3D_TexInit(tex, 512, 512, GPU_RGB8);
const u32 flags = GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(1) | GX_TRANSFER_RAW_COPY(0) |
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGB8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) |
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO);
C3D_SyncDisplayTransfer((u32*)linearTex, GX_BUFFER_DIM(512, 512), (u32*)tex->data, GX_BUFFER_DIM(512, 512), flags);
linearFree(linearTex);
/*f = fopen("sdmc:/texture_dump.bgr", "wb");
fwrite(tex->data, 512 * 512 * 3, 1, f);
fclose(f);*/
}
static void sceneInit(void)
{
// Load the vertex shader, create a shader program and bind it
vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
shaderProgramInit(&program);
shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
C3D_BindProgram(&program);
// Get the location of the uniforms
uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
// Configure attributes for use with the vertex shader
// Attribute format and element count are ignored in immediate mode
C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
AttrInfo_Init(attrInfo);
AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position xyzw. w not used.
AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 2); // v1=texcoord uv (xy coordinates).
// Compute the projection matrix.
Mtx_OrthoTilt(&projection, 0.f, 400.f, 0.f, 240.f, 0.f, 1.f, true);
loadTexFile("sdmc:/rgb_gba_test.bgr", &maintex);
C3D_TexSetFilter(&maintex, GPU_LINEAR, GPU_NEAREST); // Set to nearest, nearest for 1:1 scale.
C3D_TexSetWrap(&maintex, GPU_CLAMP_TO_EDGE, GPU_CLAMP_TO_EDGE);
C3D_TexBind(0, &maintex);
// Configure the first fragment shading substage to just pass through the vertex color
// See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
/*C3D_TexEnv* env = C3D_GetTexEnv(0);
C3D_TexEnvInit(env);
C3D_TexEnvSrc(env, C3D_Both, GPU_TEXTURE0, 0, 0);
C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE);*/
/*
* Input
* [r]
* [g]
* [b]
*
* Correction Output
* [ r][rg][rb] [r]
* [gr][ g][gb] [g]
* [br][bg][ b] [b]
*/
// Note: OpenGL uses column-major format.
/*const mat4 GBA_sRGB = mat4(
0.80, 0.135, 0.195, 0.0, //red channel
0.275, 0.64, 0.155, 0.0, //green channel
-0.075, 0.225, 0.65, 0.0, //blue channel
0.0, 0.0, 0.0, 0.93 //alpha channel
);*/
C3D_TexEnv* env = C3D_GetTexEnv(0);
C3D_TexEnvInit(env);
C3D_TexEnvSrc(env, C3D_RGB, GPU_TEXTURE0, GPU_TEXTURE0, 0);
C3D_TexEnvOpRgb(env, GPU_TEVOP_RGB_SRC_COLOR, GPU_TEVOP_RGB_SRC_COLOR, 0);
C3D_TexEnvFunc(env, C3D_RGB, GPU_MODULATE); // pow(screen, 2.0).
C3D_TexEnvBufUpdate(C3D_RGB, 1u<<1);
C3D_TexEnv* env1 = C3D_GetTexEnv(1);
C3D_TexEnvInit(env1);
C3D_TexEnvSrc(env1, C3D_RGB, GPU_PREVIOUS, GPU_CONSTANT, 0);
C3D_TexEnvOpRgb(env1, GPU_TEVOP_RGB_SRC_COLOR, GPU_TEVOP_RGB_SRC_COLOR, 0);
C3D_TexEnvFunc(env1, C3D_RGB, GPU_MODULATE);
C3D_TexEnvColor(env1, denormalize(0.93f, 0.93f, 0.93f, 0.f)); // screen *= 0.93;
C3D_TexEnv* env2 = C3D_GetTexEnv(2);
C3D_TexEnvInit(env2);
C3D_TexEnvSrc(env2, C3D_RGB, GPU_CONSTANT, GPU_PREVIOUS, 0); // Previous buffer update is delayed by one stage. Come on, really?
C3D_TexEnvOpRgb(env2, GPU_TEVOP_RGB_SRC_COLOR, GPU_TEVOP_RGB_SRC_R, 0);
C3D_TexEnvFunc(env2, C3D_RGB, GPU_MODULATE);
C3D_TexEnvColor(env2, denormalize(0.8f, 0.135f, 0.195f, 0.f)); // r*r, rg*r, rb*r
C3D_TexEnv* env3 = C3D_GetTexEnv(3);
C3D_TexEnvInit(env3);
C3D_TexEnvSrc(env3, C3D_RGB, GPU_CONSTANT, GPU_PREVIOUS_BUFFER, GPU_PREVIOUS);
C3D_TexEnvOpRgb(env3, GPU_TEVOP_RGB_SRC_COLOR, GPU_TEVOP_RGB_SRC_G, GPU_TEVOP_RGB_SRC_COLOR);
C3D_TexEnvFunc(env3, C3D_RGB, GPU_MULTIPLY_ADD);
C3D_TexEnvColor(env3, denormalize(0.275f, 0.64f, 0.155f, 0.f)); // gr*g, g*g, gb*g
C3D_TexEnv* env4 = C3D_GetTexEnv(4);
C3D_TexEnvInit(env4);
C3D_TexEnvSrc(env4, C3D_Both, GPU_CONSTANT, GPU_PREVIOUS_BUFFER, GPU_PREVIOUS);
C3D_TexEnvOpRgb(env4, GPU_TEVOP_RGB_SRC_COLOR, GPU_TEVOP_RGB_SRC_B, GPU_TEVOP_RGB_SRC_COLOR);
C3D_TexEnvFunc(env4, C3D_Both, GPU_MULTIPLY_ADD);
C3D_TexEnvColor(env4, denormalize(0.f, 0.225f, 0.65f, 1.f)); // br*b, bg*b, b*b
}
static void sceneRender(void)
{
// Update the uniforms.
C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
// Draw a textured quad directly.
C3D_ImmDrawBegin(GPU_TRIANGLE_STRIP);
{
const float texW = 512.f;
const float texH = 512.f;
const float subTexW = 240.f;//360.f;
const float subTexH = 160.f;//240.f;
const float scale = 1.5f;//1.f;
// Center the frame.
const float xShift = (400.f - subTexW * scale) / 2;
const float yShift = (240.f - subTexH * scale) / 2;
// Note: Vertex coordinates with this projection matrix start at bottom left.
// Note: Texture coordinates start at bottom left and are 0-1.
// Note: Our sub texture is located top left.
const float vertZ = 0.5f;
const float vertW = 1.f; // Forced to 1 in vertex shader.
// Bottom left corner.
C3D_ImmSendAttrib(xShift, yShift, vertZ, vertW); // v0=position xyzw.
C3D_ImmSendAttrib(0.f, 1.f - subTexH / texH, 0.f, 0.f); // v1=texcoord uv (xy coordinates).
// Bottom right corner.
C3D_ImmSendAttrib(subTexW * scale + xShift, yShift, vertZ, vertW);
C3D_ImmSendAttrib(subTexW / texW, 1.f - subTexH / texH, 0.f, 0.f);
// Top left corner.
C3D_ImmSendAttrib(xShift, subTexH * scale + yShift, vertZ, vertW);
C3D_ImmSendAttrib(0.f, 1.f, 0.f, 0.f);
// Top right corner.
C3D_ImmSendAttrib(subTexW * scale + xShift, subTexH * scale + yShift, vertZ, vertW);
C3D_ImmSendAttrib(subTexW / texW, 1.f, 0.f, 0.f);
}
C3D_ImmDrawEnd();
}
static void sceneExit(void)
{
// Free the shader program.
shaderProgramFree(&program);
DVLB_Free(vshader_dvlb);
// Delete texture.
C3D_TexDelete(&maintex);
}
int main()
{
// Initialize graphics.
gfxInit(GSP_BGR8_OES, GSP_RGB565_OES, false);
C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
// Initialize the render target.
C3D_RenderTarget *const topScreenTarget = C3D_RenderTargetCreate(240, 400, GPU_RB_RGB8, GPU_RB_DEPTH24_STENCIL8);
C3D_StencilTest(false, GPU_ALWAYS, 0, 0, 0);
C3D_EarlyDepthTest(false, GPU_EARLYDEPTH_GEQUAL, 0);
C3D_DepthTest(false, GPU_ALWAYS, GPU_WRITE_COLOR);
C3D_AlphaTest(false, GPU_ALWAYS, 0);
C3D_RenderTargetSetOutput(topScreenTarget, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
// Initialize the scene.
sceneInit();
while(aptMainLoop())
{
hidScanInput();
if(hidKeysDown() & KEY_START) break;
// Render the scene.
C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
{
C3D_RenderTargetClear(topScreenTarget, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
C3D_FrameDrawOn(topScreenTarget);
sceneRender();
}
C3D_FrameEnd(0);
}
// Deinitialize the scene.
sceneExit();
C3D_RenderTargetDelete(topScreenTarget);
// Deinitialize graphics.
C3D_Fini();
gfxExit();
return 0;
}
// -Wl,-wrap=GX_ProcessCommandList
Result __real_GX_ProcessCommandList(u32* buf0a, u32 buf0s, u8 flags);
Result __wrap_GX_ProcessCommandList(u32* buf0a, u32 buf0s, u8 flags)
{
static u32 dumped = 0;
if(dumped == 0)
{
dumped++;
FILE *f = fopen("sdmc:/first_cmd_list.bin", "wb");
fwrite(buf0a, buf0s, 1, f);
fclose(f);
}
else if(dumped == 1)
{
dumped++;
FILE *f = fopen("sdmc:/second_cmd_list.bin", "wb");
fwrite(buf0a, buf0s, 1, f);
fclose(f);
}
return __real_GX_ProcessCommandList(buf0a, buf0s, flags);
}
; Example PICA200 vertex shader
; Uniforms
.fvec projection[4]
; Constants
.constf myconst(0.0, 1.0, -1.0, 0.5)
.alias zeros myconst.xxxx ; Vector full of zeros
.alias ones myconst.yyyy ; Vector full of ones
; Outputs
.out outpos position
.out outtc0 texcoord0
; Inputs (defined as aliases for convenience)
.alias inpos v0
.alias intex v1
.proc main
; Force the w component of inpos to be 1.0
mov r0.xyz, inpos
mov r0.w, ones
; outpos = projectionMatrix * inpos
dp4 outpos.x, projection[0], r0
dp4 outpos.y, projection[1], r0
dp4 outpos.z, projection[2], r0
dp4 outpos.w, projection[3], r0
; outtc0 = intex
mov outtc0, intex
; We're finished
end
.end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment