Skip to content

Instantly share code, notes, and snippets.

@ISSOtm
Last active February 24, 2019 00:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ISSOtm/c99b525019454d21a769d67c691f8fa4 to your computer and use it in GitHub Desktop.
Save ISSOtm/c99b525019454d21a769d67c691f8fa4 to your computer and use it in GitHub Desktop.
A documentation-less raster FX lib for the Game Boy (consider this a draft for an upcoming GitHub repo)
; TODO: timings have changed, verify this comment
;
; Be careful with effects on consecutive lines!
; A "double" effect will end a handful of cycles too late if the preceding scanline was really busy
; (approx. 25 M-cycles, HBlank can be as short as 22 cycles plus 1~2 cycles of latency explained below)
; The textbox appears to last up to 22 M-cycles so it should be fine
; The LY=LYC interrupt appears to be unable to trigger in the first (first two?) cycles of a scanline, giving extra leeway
; Anyways, using an effect just after either of the previous conditions may slightly delay it, and repeating the condition will accumulate the delays
; Mode 2 being 20 cycles long, it should be possible to stack the delays somewhat before visible breakage happens, but it's better to avoid it at all
SECTION "Raster fx helper functions", ROM0
; Get a pointer to the currently free scanline buffer
; @return a The pointer
; @return c The pointer
; @destroy a c
GetFreeScanlineBuf::
ldh a, [hWhichScanlineBuffer]
xor LOW(hScanlineFXBuffer2) ^ LOW(hScanlineFXBuffer1)
ld c, a
ret
; Switches to the currently free scanline buffer
; @return c A pointer to the newly freed buffer
; @return b A pointer to the newly used buffer
; @destroy a c
SwitchScanlineBuf::
call GetFreeScanlineBuf
ldh [hWhichScanlineBuffer], a
ld b, a
xor LOW(hScanlineFXBuffer2) ^ LOW(hScanlineFXBuffer1)
ld c, a
ret
; Switches to the currently free scanline buffer, and copies it over to the other buffer
; @destroy a c hl
SwitchAndCopyScanlineBuf::
call SwitchScanlineBuf
ld l, b
ld h, HIGH(hScanlineFXBuffer1)
.loop
ld a, [hli]
ld [$ff00+c], a
inc c
inc a
jr nz, .loop
ret
; Low byte of the current scanline buffer
; Permits double-buffering
hWhichScanlineBuffer::
db
; Low byte of byte read by STAT handler
; NO TOUCHY
hScanlineFXIndex::
db
; Scanline FX buffers (scanline, addr, value)
; Double-buffering used to prevent race conditions
hScanlineFXBuffer1::
ds 3 * 5 + 1
hScanlineFXBuffer2::
ds 3 * 5 + 1
; Addr/value pair to allow writing to 2 regs in the same scanline
hSecondFXAddr::
db
hSecondFXValue::
db
; Run this at game init, or when soft-resetting
ld a, LOW(hScanlineFXBuffer1)
ldh [hWhichScanlineBuffer], a
ld a, $FF
ldh [hScanlineFXBuffer1], a
ld a, STATF_LYC
ldh [rSTAT], a
SECTION "Interrupt vectors", ROM0[$0040]
transfer_reg: MACRO
ldh a, [h\1]
ldh [r\1], a
ENDM
; VBlank
push af
transfer_reg LCDC
jp VBlankHandler
; LCD
push af
ldh a, [rLYC]
and a ; Check if on scanline 0, which means music
; Scanline 0 FX are handled by the music, kinda
jr nz, LCDHandler ; Jump to LCD handler, because it's the most likely case
jr MusicHandler
; Stubbed int handlers for the example, but you can put anything
; Timer
reti
ds 7
; Serial
reti
ds 7
; Joypad (usually empty, but you can put something here if you want)
reti
; Dispatching a LY=LYC interrupt can take between 5 and 12 cycles
; Adding the 11 cycles of the code above, we're between 16 and 23.
; Mode 3 takes up to 72 cycles, and the write must not occur before that, and it must not occur after the next Mode 2 ends (42 cycle leeway)
; So, we need to wait at least 72 - 16 = 52 cycles, part of which will be spent doing updates: TIME TO CYCLE-COUNT!
LCDHandler:
push bc
ldh a, [hScanlineFXIndex]
ld c, a
ld a, [$ff00+c] ; Get port ID
ld b, a ; Save port ID for later
inc c
inc c
ld a, [$ff00+c] ; Get next effect's scanline
dec a ; Compensate for processing time
ldh [rLYC], a ; Get set up (this should reset the STAT interrupt trigger line)
ld a, c ; Point to next effect's port ID
inc a
ldh [hScanlineFXIndex], a
dec c
ld a, [$FF00+c] ; Get effect's value
ld c, a ; Since we don't need the read index anymore, use c to retrieve the value faster later
; Wait a bit to write during HBlank, to avoid gfx artifacts
; We spent 28 cycles above, out of the required 52
; That leaves 20 cycles
; However, the first write occurs 8 cycles after the loop exits, so there's really 12 cycles to be waited
; Each iteration of the loop takes 4 cycles, except the last one which only takes 3
; This means we need to do (12 - 3) / 4 + 1 = 2 iterations + 1 cycle
; This one cycle will only appear on the textbox, and shouldn't be problematic unless the preceding scanline is busy
; And, HBlank is really short so we need to finish up quickly
ld a, 2
.waitMode0
dec a
jr nz, .waitMode0
; Check if we're trying to write to $FF*00* (rP1)
ld a, b
and a ; Note: `and $7F` can be used instead to have control on bit 7 (if ever needed)
ld a, c ; Get back value
; $00 (rP1) is hooked to instead perform textbox ops, since writing to it has no use
jr z, .textbox ; The textbox performs its write slightly earlier, so use the extra jump cycle to delay it slightly
ld c, b ; Retrieve port
res 7, c
ld [$ff00+c], a ; Apply FX
bit 7, b
jr z, .onlyOneEffect
ldh a, [hSecondFXAddr]
ld c, a
ldh a, [hSecondFXValue]
ld [$ff00+c], a
.onlyOneEffect
pop bc
pop af
reti
.textbox
ldh [rSCY], a ; Store value, which is actually for SCY (dat plot twist, eh?)
xor a
ldh [rSCX], a
ldh a, [hLCDC] ; Retrieve LCDC value
and ~(LCDCF_WINON | LCDCF_OBJON)
or LCDCF_BG9C00 | LCDCF_BG8000
ldh [rLCDC], a
ldh a, [hTextboxBGP]
ldh [rBGP], a
; Note: this is scrapped support for sprites on the textbox
; It was initially planned for JP diacritics.
; If for whatever reason, you need to re-activate this feature...
; ...uncomment this, and remove "LCDCF_OBJON" from above.
;
; ; Perform OAM DMA to get textbox's sprites
; ; Luckily, sprites are hidden during DMA
; ; Also no sprites should be present on the textbox 1st row, hiding our trickery >:P
; ld a, HIGH(wTextboxOAM)
; call hOAMDMA
; ; Reload OAM on next frame
; ldh a, [hCurrentOAMBuffer]
; ldh [hOAMBuffer], a
pop bc
pop af
reti
; Cycle counting is important here as well: if an effect needs to trigger on scanline 1, its code must start running on scanline 0
; But, we hooked scanline 0 for our own purposes! And, scanline 0 triggers partway through VBlank! (111 cycles)
; ...Actually, according to @liji32, the interrupt will trigger again at the beginning of render line 0, so we don't need to worry
MusicHandler:
push bc
; Set up things for the actual raster FX
ldh a, [hScanlineFXIndex]
ld c, a
inc a
ldh [hScanlineFXIndex], a
ld a, [$FF00+c] ; Get scanline
dec a ; Usual accounting
ldh [rLYC], a
; The music code needs to be interruptable, especially if we need a scanline 0 int
ei
; Insert jump (or call) to music code here
pop bc
pop af
ret
SECTION "VBlank handler", ROM0
VBlankHandler:
push bc
; ============= Here are things that need to be updated, even on lag frames ==============
; Update IO from HRAM shadow
transfer_reg SCY
transfer_reg SCX
transfer_reg WY
transfer_reg WX
transfer_reg BGP
transfer_reg OBP0
transfer_reg OBP1
; Prepare raster FX stuff
; NOTE: this assumes no effect is scheduled on line 0
; This should never happen; instead, use the HRAM shadow regs (hSCY, etc.)
ldh a, [hWhichScanlineBuffer]
ldh [hScanlineFXIndex], a
; Set int to happen on first scanline, for music (partway into VBlank, but oh well)
xor a
ldh [rLYC], a
; ... anything can follow
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment