feilipu/Screenshot from 2018-05-23 01-00-22.png

## mandel-feilipu-rc2014-lut.asm
;
;  Compute a Mandelbrot set on a simple Z80 computer.
;
; From https://rosettacode.org/wiki/Mandelbrot_set#Z80_Assembly
; Adapted to CP/M and colorzied by J.B. Langston
;
; Assemble with z88dk for RC2014 CP/M
; zcc +rc2014 -subtype=cpm -v -m --list mandel-feilipu-rc2014-lut.asm -o mand-lut -create-app
;
; Assemble with z88dk for RC2014 BASIC
; zcc +rc2014 -subtype=basic -v -m --list mandel-feilipu-rc2014-lut.asm -o mand-lut -create-app
;
; CP/M
; Upload using XMODEM; mandel-lut.bin -> mand-lut.com
;
; MS Basic
; Upload using hload or SCM: cat > /dev/ttyUSB0 < ./mand-lut.ihx
;
; To calculate the theoretical minimum time at 115200 baud.
; Normally 10 colour codes, and 1 character per point.
; A line is (3 x 80) x 11 + CR + LF = 2642 characters
; There are 10 x 60 / 4 lines = 150 lines
; Therefore 396,300 characters need to be transmitted.
; Serial rate is 115200 baud or 14,400 8 bit characters per second
;
; Therefore the theoretical minimum time is 27.52 seconds.
;
; Results          FCPU         Original    Table mlt   LUT mlt
; RC2014 CP/M    7.432MHz         4'58"       3'48"       2'55"
; RC2014 BASIC   7.432MHz         4'58"       3'55"
;
; Results          FCPU         Original    Optimised   z180 mlt
; YAZ180 CP/M   18.432MHz         1'40"       1'24"       1'00"
; YAZ180 yabios 18.432Mhz                     1'14"         54"
; YAZ180 CP/M   36.864MHz         1'06"         58"         46"
; YAZ180 yabios 36.864MHz                       56"         45"
;
;
; Porting this program to another Z80 platform should be easy and straight-
; forward: The only dependencies on my homebrew machine are the system-calls
; used to print strings and characters. These calls are performed by loading
; IX with the number of the system-call and performing an RST 08. To port this
; program to another operating system just replace these system-calls with
; the appropriate versions. Only three system-calls are used in the following:
; _crlf: Prints a CR/LF, _puts: Prints a 0-terminated string (the adress of
; which is expected in HL), and _putc: Print a single character which is
; expected in A. RST 0 give control back to the monitor.
;

include "config_rc2014_private.inc"


defc _CPM       = 0

defc cr         = $0D                     ; carriage return
defc lf         = $0A                     ; line feed
defc esc        = $1B                     ; escape

IF _CPM                                   ; cp/m ram model
defc bdos       = $05                     ; bdos vector
defc condio     = 6                       ; console direct I/O call
defc prints     = 9                       ; print string bdos call

defc eos        = '$'                     ; end of string marker
ELSE
defc eos        = $00                     ; end of string marker
ENDIF

defc pixel      = '#'                     ; character to output for pixel
defc SCALE      = 256                     ; Do NOT change this - the
                                          ; arithmetic routines rely on
                                          ; this scaling factor! :-)

;------------------------------------------------------------------------------

SECTION  data_user

x:              defw    0                       ; x-coordinate
x_start:        defw    -2 * SCALE              ; Minimum x-coordinate
x_end:          defw    1 * SCALE               ; Maximum x-coordinate
x_step:         defw    SCALE / 80              ; x-coordinate step-width

y:              defw    0                       ; y-coordinate
y_start:        defw    -5 * SCALE / 4          ; Minimum y-coordinate
y_end:          defw    5 * SCALE / 4           ; Maximum y-coordinate
y_step:         defw    SCALE / 60              ; y-coordinate step-width

iteration_max:  defb    30                      ; How many iterations
divergent:      defw    SCALE * 4
scale:          defw    SCALE
                defw    0

z_0:            defs    4,0
z_1:            defs    4,0

z_2:            defw    0

z_0_square:
z_0_square_low: defw    0
z_0_square_high:defw    0
z_1_square:
z_1_square_low: defw    0
z_1_square_high:defw    0


display:        defm    " .-+*=#@"              ; 8 characters for the display

hsv:            defm 0                          ; hsv color table
                defm 201, 200, 199, 198, 197
                defm 196, 202, 208, 214, 220
                defm 226, 190, 154, 118,  82
                defm 46,   47,  48,  49,  50
                defm 51,   45,  39,  33,  27
                defm 21,   57,  93, 129, 165

welcome:        defm    "Generating a Mandelbrot set"
crlf:           defm    cr, lf, eos
finished:       defm    esc, "[0mComputation finished.", cr, lf, eos
ansifg:         defm    esc, "[38;5;", eos
ansibg:         defm    esc, "[48;5;", eos
cls:            defm    esc, "[2J",eos

;------------------------------------------------------------------------------

SECTION code_user

PUBLIC _main

._main
        ld      de, cls                 ; VT100 clear screen
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF

        ld      de, welcome             ; Print a welcome message
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF

; for (y = <initial_value> ; y <= y_end; y += y_step)
; {
        ld      hl, (y_start)           ; y = y_start
        ld      (y), hl
.outer_loop
        ld      hl, (y_end)             ; Is y <= y_end?
        ld      de, (y)
        and     a                       ; Clear carry
        sbc     hl, de                  ; Perform the comparison
        jp      M, mandel_end           ; End of outer loop reached

;    for (x = x_start; x <= x_end; x += x_step)
;    {
        ld      hl, (x_start)           ; x = x_start
        ld      (x), hl
.inner_loop
        ld      hl, (x_end)             ; Is x <= x_end?
        ld      de, (x)
        and     a
        sbc     hl, de
        jp      M, inner_loop_end       ; End of inner loop reached

;      z_0 = z_1 = 0;
        ld      hl, 0
        ld      (z_0), hl
        ld      (z_1), hl

;      for (iteration = iteration_max; iteration; iteration--)
;      {
        ld      a, (iteration_max)
        ld      b, a

.iteration_loop
        push    bc                      ; iteration -> stack

;        z2 = (z_0 * z_0 - z_1 * z_1) / scale;

        ld      hl, (z_1)               ; Compute DE HL = z_1 * z_1
        ld      d, h
        ld      e, l
        call    l_muls_32_16x16
        ld      (z_1_square_low), hl    ; z_1 ** 2 is needed later again
        ld      (z_1_square_high), de

        ld      hl, (z_0)               ; Compute DE HL = z_0 * z_0
        ld      d, h
        ld      e, l
        call    l_muls_32_16x16
        ld      (z_0_square_low), hl    ; z_1 ** 2 will be also needed
        ld      (z_0_square_high), de

        and     a                       ; Compute subtraction
        ld      bc, (z_1_square_low)
        sbc     hl, bc
        push    hl                      ; Save lower 16 bit of result
        ld      h, d
        ld      l, e
        ld      bc, (z_1_square_high)
        sbc     hl, bc
        pop     bc                      ; HL BC = z_0 ^ 2 - z_1 ^ 2

        ld      c, b                    ; Divide by scale = 256
        ld      b, l                    ; Discard the rest
        push    bc                      ; We need BC later

;        z_3 = 2 * z_0 * z_1 / scale;
        ld      hl, (z_0)               ; Compute DE HL = 2 * z_0 * z_1
        add     hl, hl
        ld      de, (z_1)
        call    l_muls_32_16x16

        ld      b, e                    ; Divide by scale (= 256)
        ld      c, h                    ; BC contains now z_3

;        z_1 = z_3 + y;
        ld      hl, (y)
        add     hl, bc
        ld      (z_1), hl

;        z_0 = z_2 + x;
        pop     bc                      ; Here BC is needed again :-)
        ld      hl, (x)
        add     hl, bc
        ld      (z_0), hl

;        if (z_0 * z_0 / scale + z_1 * z_1 / scale > 4 * scale)
        ld      hl, (z_0_square_low)    ; Use the squares computed
        ld      de, (z_1_square_low)    ; above
        add     hl, de
        ld      b, h                    ; BC contains lower word of sum
        ld      c, l

        ld      hl, (z_0_square_high)
        ld      de, (z_1_square_high)
        adc     hl, de

        ld      h, l                    ; HL now contains (z_0 ^ 2 -
        ld      l, b                    ; z_1 ^ 2) / scale

        ld      (z_2),hl                ; save z_2

        ld      bc,(divergent)
        and     a
        sbc     hl, bc

;          break;
        jr      C, iteration_dec        ; No break
        pop     bc                      ; Get latest iteration counter
        jr      iteration_end           ; Exit loop

;        iteration++;
.iteration_dec
        pop     bc                      ; Get iteration counter
        djnz    iteration_loop          ; We might fall through!
;      }
.iteration_end
;      printf("%c", display[iteration % 7]);
;       call    asciipixel              ; Print the character
        call    colorpixel              ; Print the character

IF _CPM                                 ; cp/m ram model
        ld      c, condio
        call    bdos
ELSE
        ld      a,e
        rst     08
ENDIF

        ld      de, (x_step)            ; x += x_step
        ld      hl, (x)
        add     hl, de
        ld      (x), hl

        jp      inner_loop
;    }
;    printf("\n");
.inner_loop_end
        ld      de, crlf                ; Print a CR/LF pair
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF
        ld      de, (y_step)            ; y += y_step
        ld      hl, (y)
        add     hl, de
        ld      (y), hl                 ; Store new y-value

        jp      outer_loop
; }

.mandel_end
        call    asm_delay
        call    asm_delay
        ld      de, finished            ; Print finished-message
IF _CPM                                 ; cp/m ram model
        jp      prtmesg                 ; Return to CP/M
ELSE
        jp      asm_pstring             ; Return to Basic
ENDIF


.colorpixel
        ld      a,b                     ; iter count in B -> C
        and     $1F                     ; lower five bits only
        ld      c,a
        ld      b,0
        ld      hl, hsv                 ; get ANSI color code
        add     hl, bc
        ld      a,(hl)
        call    setcolor
        ld      e, pixel                ; show pixel
        ret

.asciipixel
        ld      a, b                    ; iter count in B -> L
        and     $07                     ; lower three bits only
        sbc     hl, hl
        ld      l, a
        ld      de, display             ; Get start of character array
        add     hl, de                  ; address and load the
        ld      e, (hl)                 ; character to be printed
        ret

.setcolor
        push    af                      ; save accumulator
        ld      de,ansifg               ; start ANSI control sequence to set foreground color
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF
        pop     af
        call    printdec                ; print ANSI color code
IF _CPM                                 ; cp/m ram model
        ld      e,'m'                   ; finish control sequence
        ld      c, condio
        jp      bdos
ELSE
        ld      a,'m'                   ; finish control sequence
        jp      asm_pchar
ENDIF

.printdec
        ld      c,-100                  ; print 100s place
        call    pd1
        ld      c,-10                   ; 10s place
        call    pd1
        ld      c,-1                    ; 1s place
.pd1
        ld      e,'0'-1                 ; start ASCII right before 0
.pd2
        inc     e                       ; increment ASCII code
        add     a,c                     ; subtract 1 place value
        jr      C,pd2                   ; loop until negative
        sub     c                       ; add back the last value
        push    af                      ; save accumulator
        ld      a,-1                    ; are we in the ones place?
        cp      c
        jr      Z,pd3                   ; if so, skip to output
        ld      a,'0'                   ; don't print leading 0s
        cp      e
        jr      Z,pd4
.pd3
IF _CPM                                 ; cp/m ram model
        ld      c, condio
        call    bdos
ELSE
        ld      a,e
        rst     08h
ENDIF
.pd4
        pop     af                      ; restore accumulator
        ret

;==============================================================================
;
; Compute DEHL = DE * HL (signed): This routine is not too clever but it
; works. It is based on a standard 16-by-16 multiplication routine for unsigned
; integers. At the beginning the sign of the result is determined based on the
; signs of the operands which are negated if necessary. Then the unsigned
; multiplication takes place, followed by negating the result if necessary.
;

.l_muls_32_16x16
       ld b,d                       ; d = MSB of multiplicand
       ld c,h                       ; h = MSB of multiplier
       push bc                      ; save sign info

       bit 7,d
       jr Z,l_pos_de                ; take absolute value of multiplicand

       ld a,e
       cpl
       ld e,a
       ld a,d
       cpl
       ld d,a
       inc de

.l_pos_de
       bit 7,h
       jr Z,l_pos_hl                ; take absolute value of multiplier

       ld a,l
       cpl
       ld l,a
       ld a,h
       cpl
       ld h,a
       inc hl

.l_pos_hl                           ; prepare unsigned dehl = de x hl

.l_mulu_32_16x16
        ; multiplication of two 16-bit numbers into a 32-bit product
        ;
        ; enter : de = 16-bit multiplicand = x
        ;         hl = 16-bit multiplicand = y
        ;
        ; exit  : dehl = 32-bit product
        ;         carry reset
        ;
        ; uses  : af, bc, de, hl

        ld b,h                      ; 4  b = y1
        ld c,d                      ; 4  c = x1
        push bc                     ; 11 preserve y1*x1

        ld b,l                      ; 4  b = y0
        ld c,e                      ; 4  c = x0
        push bc                     ; 11 preserve y0*x0

;;; MLT HE (xBC) ;;;;;;;;;;;;;;;; y1*x0
        ld c,__IO_LUT_OPERAND_LATCH ; 7  operand latch address
        ld b,h                      ; 4  operand Y in B
        out (c),e                   ; 12 operand X from E
        in e,(c)                    ; 12 result Z LSB to E
        inc c                       ; 4  result MSB address
        in h,(c)                    ; 12 result Z MSB to H

;;; MLT DL (xBC) ;;;;;;;;;;;;;;;; x1*y0
        dec c                       ; 4  operand latch address
        ld b,d                      ; 4  operand Y in B
        out (c),l                   ; 12 operand X from L
        in l,(c)                    ; 12 result Z LSB to L
        inc c                       ; 4  result MSB address
        in d,(c)                    ; 12 result Z MSB to D

        xor a                       ; 4  zero A
        add hl,de                   ; 11 add cross products
        adc a,a                     ; 4  capture carry

        pop de                      ; 10 restore y0*x0

;;; MLT DE (xBC) ;;;;;;;;;;;;;;;; y0*x0
        dec c                       ; 4  operand latch address
        ld b,d                      ; 4  operand Y in B
        out (c),e                   ; 12 operand X from A
        in e,(c)                    ; 12 result Z LSB to E
        inc c                       ; 4  result MSB address
        in d,(c)                    ; 12 result Z MSB to D

        ld b,a                      ; 4  carry from cross products

        ld a,d                      ; 4
        add a,l                     ; 4
        ld d,a                      ; 4  DE = final LSW

        ld l,h                      ; 4  LSB of MSW from cross products
        ld h,b                      ; 4  carry from cross products

        ex (sp),hl                  ; 19 restore y1*x1, stack interim p3 p2

;;; MLT HL (xBC) ;;;;;;;;;;;;;;;; x1*y1
        dec c                       ; 4  operand latch address
        ld b,h                      ; 4  operand Y in B
        out (c),l                   ; 12 operand X from L
        in l,(c)                    ; 12 result Z LSB to L
        inc c                       ; 4  result MSB address
        in h,(c)                    ; 12 result Z MSB to H

        pop bc                      ; 10 destack interim p3 p2

        adc hl,bc                   ; 15 hl = final MSW
        ex de,hl                    ; 4  DEHL = final product

;------------------

        pop bc                      ; recover sign info from multiplicand and multiplier
        ld a,b
        xor c
        ret P                       ; return if positive product

        ld a,l                      ; negate product and return
        cpl
        ld l,a
        ld a,h
        cpl
        ld h,a
        ld a,e
        cpl
        ld e,a
        ld a,d
        cpl
        ld d,a
        inc l
        ret NZ
        inc h
        ret NZ
        inc de
        ret

;==============================================================================
;       DELAY SUBROUTINE
;

.asm_delay
        push bc
        ld bc, $00
.asm_delay_loop
        ex (sp), hl
        ex (sp), hl
        djnz asm_delay_loop
        pop bc
        ret

;==============================================================================
;       OUTPUT SUBROUTINES
;

IF _CPM

;   Print message pointed to by (DE). It will end with a '$'.
;   modifies AF, DE, & HL
.prtmesg
        ld      a,(de)              ; Get character from DE address
        cp      eos
        ret     Z
        inc     de
        push    de                  ;otherwise, bump pointer and print it.
        ld      e,a
        ld      c, condio
        call    bdos
        pop     de
        jp      prtmesg

ELSE

defc    asm_pchar = 0008h           ; address of pchar

; print string from location in DE to ACIA, modifies AF, DE, & HL
.asm_pstring
        ld a,(de)                   ; Get character from DE address
        or a                        ; Is it $00 ?
        ret Z                       ; Then return on terminator
        rst 08h                     ; Print it
        inc de                      ; Point to next character
        jp asm_pstring              ; Continue until $00

; print contents of HL as 16 bit number in ASCII HEX, modifies AF & HL
.asm_phexwd
        push hl
        ld l,h                      ; high byte to L
        call asm_phex
        pop hl                      ; recover HL, for low byte in L
        call asm_phex
        ret

; print contents of L as 8 bit number in ASCII HEX to ACIA, modifies AF & HL
.asm_phex
        ld a,l                      ; modifies AF, HL
        push af
        rrca
        rrca
        rrca
        rrca
        call  asm_phex_conv
        pop af
.asm_phex_conv
        and  $0F
        add  a,$90
        daa
        adc  a,$40
        daa
        jp asm_pchar                ; print character on ACIA

ENDIF                               ; IF _CPM

;==============================================================================


## mandel-feilipu-rc2014.asm
;
;  Compute a Mandelbrot set on a simple Z80 computer.
;
; From https://rosettacode.org/wiki/Mandelbrot_set#Z80_Assembly
; Adapted to CP/M and colorzied by J.B. Langston
;
; Assemble with z88dk for RC2014 CP/M
; zcc +rc2014 -subtype=cpm -v -m --list mandel-feilipu-rc2014.asm -o mandel -create-app
;
; Assemble with z88dk for RC2014 BASIC
; zcc +rc2014 -subtype=basic -v -m --list mandel-feilipu-rc2014.asm -o mandel -create-app
;
; CP/M
; Upload using XMODEM; mandel.bin -> mandel.com
;
; MS Basic
; Upload using hload or SCM: cat > /dev/ttyUSB0 < ./mandel.ihx
;
; To calculate the theoretical minimum time at 115200 baud.
; Normally 10 colour codes, and 1 character per point.
; A line is (3 x 80) x 11 + CR + LF = 2642 characters
; There are 10 x 60 / 4 lines = 150 lines
; Therefore 396,300 characters need to be transmitted.
; Serial rate is 115200 baud or 14,400 8 bit characters per second
;
; Therefore the theoretical minimum time is 27.52 seconds.
;
; Results          FCPU         Original    Table mlt   Runer mlt
; RC2014 CP/M    7.432MHz         4'58"       3'48"       2'53"
; RC2014 BASIC   7.432MHz         4'58"       3'55"
;
; Results          FCPU         Original    Optimised   z180 mlt
; YAZ180 CP/M   18.432MHz         1'40"       1'24"       1'00"
; YAZ180 yabios 18.432Mhz                     1'14"         54"
; YAZ180 CP/M   36.864MHz         1'06"         58"         46"
; YAZ180 yabios 36.864MHz                       56"         45"
;
;
; Porting this program to another Z80 platform should be easy and straight-
; forward: The only dependencies on my homebrew machine are the system-calls
; used to print strings and characters. These calls are performed by loading
; IX with the number of the system-call and performing an RST 08. To port this
; program to another operating system just replace these system-calls with
; the appropriate versions. Only three system-calls are used in the following:
; _crlf: Prints a CR/LF, _puts: Prints a 0-terminated string (the adress of
; which is expected in HL), and _putc: Print a single character which is
; expected in A. RST 0 give control back to the monitor.
;

include "config_rc2014_private.inc"


defc _CPM        = 0
defc _MULU_TABLE = 0

defc cr         = $0D                     ; carriage return
defc lf         = $0A                     ; line feed
defc esc        = $1B                     ; escape

IF _CPM                                   ; cp/m ram model
defc bdos       = $05                     ; bdos vector
defc condio     = 6                       ; console direct I/O call
defc prints     = 9                       ; print string bdos call

defc eos        = '$'                     ; end of string marker
ELSE
defc eos        = $00                     ; end of string marker
ENDIF

defc pixel      = '#'                     ; character to output for pixel
defc SCALE      = 256                     ; Do NOT change this - the
                                          ; arithmetic routines rely on
                                          ; this scaling factor! :-)

;------------------------------------------------------------------------------

SECTION  data_user

x:              defw    0                       ; x-coordinate
x_start:        defw    -2 * SCALE              ; Minimum x-coordinate
x_end:          defw    1 * SCALE               ; Maximum x-coordinate
x_step:         defw    SCALE / 80              ; x-coordinate step-width

y:              defw    0                       ; y-coordinate
y_start:        defw    -5 * SCALE / 4          ; Minimum y-coordinate
y_end:          defw    5 * SCALE / 4           ; Maximum y-coordinate
y_step:         defw    SCALE / 60              ; y-coordinate step-width

iteration_max:  defb    30                      ; How many iterations
divergent:      defw    SCALE * 4
scale:          defw    SCALE
                defw    0

z_0:            defs    4,0
z_1:            defs    4,0

z_2:            defw    0

z_0_square:
z_0_square_low: defw    0
z_0_square_high:defw    0
z_1_square:
z_1_square_low: defw    0
z_1_square_high:defw    0


display:        defm    " .-+*=#@"              ; 8 characters for the display

hsv:            defm 0                          ; hsv color table
                defm 201, 200, 199, 198, 197
                defm 196, 202, 208, 214, 220
                defm 226, 190, 154, 118,  82
                defm 46,   47,  48,  49,  50
                defm 51,   45,  39,  33,  27
                defm 21,   57,  93, 129, 165

welcome:        defm    "Generating a Mandelbrot set"
crlf:           defm    cr, lf, eos
finished:       defm    esc, "[0mComputation finished.", cr, lf, eos
ansifg:         defm    esc, "[38;5;", eos
ansibg:         defm    esc, "[48;5;", eos
cls:            defm    esc, "[2J",eos

;------------------------------------------------------------------------------

SECTION code_user

PUBLIC _main

._main
        ld      de, cls                 ; VT100 clear screen
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF

        ld      de, welcome             ; Print a welcome message
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF

; for (y = <initial_value> ; y <= y_end; y += y_step)
; {
        ld      hl, (y_start)           ; y = y_start
        ld      (y), hl
.outer_loop
        ld      hl, (y_end)             ; Is y <= y_end?
        ld      de, (y)
        and     a                       ; Clear carry
        sbc     hl, de                  ; Perform the comparison
        jp      M, mandel_end           ; End of outer loop reached

;    for (x = x_start; x <= x_end; x += x_step)
;    {
        ld      hl, (x_start)           ; x = x_start
        ld      (x), hl
.inner_loop
        ld      hl, (x_end)             ; Is x <= x_end?
        ld      de, (x)
        and     a
        sbc     hl, de
        jp      M, inner_loop_end       ; End of inner loop reached

;      z_0 = z_1 = 0;
        ld      hl, 0
        ld      (z_0), hl
        ld      (z_1), hl

;      for (iteration = iteration_max; iteration; iteration--)
;      {
        ld      a, (iteration_max)
        ld      b, a

.iteration_loop
        push    bc                      ; iteration -> stack

;        z2 = (z_0 * z_0 - z_1 * z_1) / scale;

        ld      hl, (z_1)               ; Compute DE HL = z_1 * z_1
        ld      d, h
        ld      e, l
        call    l_muls_32_16x16
        ld      (z_1_square_low), hl    ; z_1 ** 2 is needed later again
        ld      (z_1_square_high), de

        ld      hl, (z_0)               ; Compute DE HL = z_0 * z_0
        ld      d, h
        ld      e, l
        call    l_muls_32_16x16
        ld      (z_0_square_low), hl    ; z_1 ** 2 will be also needed
        ld      (z_0_square_high), de

        and     a                       ; Compute subtraction
        ld      bc, (z_1_square_low)
        sbc     hl, bc
        push    hl                      ; Save lower 16 bit of result
        ld      h, d
        ld      l, e
        ld      bc, (z_1_square_high)
        sbc     hl, bc
        pop     bc                      ; HL BC = z_0 ^ 2 - z_1 ^ 2

        ld      c, b                    ; Divide by scale = 256
        ld      b, l                    ; Discard the rest
        push    bc                      ; We need BC later

;        z_3 = 2 * z_0 * z_1 / scale;
        ld      hl, (z_0)               ; Compute DE HL = 2 * z_0 * z_1
        add     hl, hl
        ld      de, (z_1)
        call    l_muls_32_16x16

        ld      b, e                    ; Divide by scale (= 256)
        ld      c, h                    ; BC contains now z_3

;        z_1 = z_3 + y;
        ld      hl, (y)
        add     hl, bc
        ld      (z_1), hl

;        z_0 = z_2 + x;
        pop     bc                      ; Here BC is needed again :-)
        ld      hl, (x)
        add     hl, bc
        ld      (z_0), hl

;        if (z_0 * z_0 / scale + z_1 * z_1 / scale > 4 * scale)
        ld      hl, (z_0_square_low)    ; Use the squares computed
        ld      de, (z_1_square_low)    ; above
        add     hl, de
        ld      b, h                    ; BC contains lower word of sum
        ld      c, l

        ld      hl, (z_0_square_high)
        ld      de, (z_1_square_high)
        adc     hl, de

        ld      h, l                    ; HL now contains (z_0 ^ 2 -
        ld      l, b                    ; z_1 ^ 2) / scale

        ld      (z_2),hl                ; save z_2

        ld      bc,(divergent)
        and     a
        sbc     hl, bc

;          break;
        jr      C, iteration_dec        ; No break
        pop     bc                      ; Get latest iteration counter
        jr      iteration_end           ; Exit loop

;        iteration++;
.iteration_dec
        pop     bc                      ; Get iteration counter
        djnz    iteration_loop          ; We might fall through!
;      }
.iteration_end
;      printf("%c", display[iteration % 7]);
;       call    asciipixel              ; Print the character
        call    colorpixel              ; Print the character

IF _CPM                                 ; cp/m ram model
        ld      c, condio
        call    bdos
ELSE
        ld      a,e
        rst     08
ENDIF

        ld      de, (x_step)            ; x += x_step
        ld      hl, (x)
        add     hl, de
        ld      (x), hl

        jp      inner_loop
;    }
;    printf("\n");
.inner_loop_end
        ld      de, crlf                ; Print a CR/LF pair
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF
        ld      de, (y_step)            ; y += y_step
        ld      hl, (y)
        add     hl, de
        ld      (y), hl                 ; Store new y-value

        jp      outer_loop
; }

.mandel_end
        call    asm_delay
        call    asm_delay
        ld      de, finished            ; Print finished-message
IF _CPM                                 ; cp/m ram model
        jp      prtmesg                 ; Return to CP/M
ELSE
        jp      asm_pstring             ; Return to Basic
ENDIF


.colorpixel
        ld      a,b                     ; iter count in B -> C
        and     $1F                     ; lower five bits only
        ld      c,a
        ld      b,0
        ld      hl, hsv                 ; get ANSI color code
        add     hl, bc
        ld      a,(hl)
        call    setcolor
        ld      e, pixel                ; show pixel
        ret

.asciipixel
        ld      a, b                    ; iter count in B -> L
        and     $07                     ; lower three bits only
        sbc     hl, hl
        ld      l, a
        ld      de, display             ; Get start of character array
        add     hl, de                  ; address and load the
        ld      e, (hl)                 ; character to be printed
        ret

.setcolor
        push    af                      ; save accumulator
        ld      de,ansifg               ; start ANSI control sequence to set foreground color
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF
        pop     af
        call    printdec                ; print ANSI color code
IF _CPM                                 ; cp/m ram model
        ld      e,'m'                   ; finish control sequence
        ld      c, condio
        jp      bdos
ELSE
        ld      a,'m'                   ; finish control sequence
        jp      asm_pchar
ENDIF

.printdec
        ld      c,-100                  ; print 100s place
        call    pd1
        ld      c,-10                   ; 10s place
        call    pd1
        ld      c,-1                    ; 1s place
.pd1
        ld      e,'0'-1                 ; start ASCII right before 0
.pd2
        inc     e                       ; increment ASCII code
        add     a,c                     ; subtract 1 place value
        jr      C,pd2                   ; loop until negative
        sub     c                       ; add back the last value
        push    af                      ; save accumulator
        ld      a,-1                    ; are we in the ones place?
        cp      c
        jr      Z,pd3                   ; if so, skip to output
        ld      a,'0'                   ; don't print leading 0s
        cp      e
        jr      Z,pd4
.pd3
IF _CPM                                 ; cp/m ram model
        ld      c, condio
        call    bdos
ELSE
        ld      a,e
        rst     08h
ENDIF
.pd4
        pop     af                      ; restore accumulator
        ret

;==============================================================================
;
; Compute DEHL = DE * HL (signed): This routine is not too clever but it
; works. It is based on a standard 16-by-16 multiplication routine for unsigned
; integers. At the beginning the sign of the result is determined based on the
; signs of the operands which are negated if necessary. Then the unsigned
; multiplication takes place, followed by negating the result if necessary.
;

.l_muls_32_16x16
       ld b,d                       ; d = MSB of multiplicand
       ld c,h                       ; h = MSB of multiplier
       push bc                      ; save sign info

       bit 7,d
       jr Z,l_pos_de                ; take absolute value of multiplicand

       ld a,e
       cpl
       ld e,a
       ld a,d
       cpl
       ld d,a
       inc de

.l_pos_de
       bit 7,h
       jr Z,l_pos_hl                ; take absolute value of multiplier

       ld a,l
       cpl
       ld l,a
       ld a,h
       cpl
       ld h,a
       inc hl

.l_pos_hl                           ; prepare unsigned dehl = de x hl

IF _MULU_TABLE
                                     ; prepare unsigned dehl = de x hl
.l_mulu_32_16x16
        ; multiplication of two 16-bit numbers into a 32-bit product
        ;
        ; enter : de = 16-bit multiplicand = y
        ;         hl = 16-bit multiplicand = x
        ;
        ; exit  : dehl = 32-bit product
        ;         carry reset
        ;
        ; uses  : af, bc, de, hl

        ld b,l                      ; x0
        ld c,e                      ; y0
        ld e,l                      ; x0
        ld l,d
        push hl                     ; x1 y1
        push bc                     ; x0 y0
        ld l,c                      ; y0

        ; de = y1 x0
        ; hl = x1 y0
        ; stack = x1 y1
        ; stack = x0 y0

        call l_z80_mulu_de          ; y1*x0
        ex de,hl
        call l_z80_mulu_de          ; x1*y0

        xor a                       ; zero A
        add hl,de                   ; sum cross products p2 p1
        adc a,a                     ; capture carry p3

        pop de                      ; x0 y0
        ex af,af
        call l_z80_mulu_de          ; y0*x0
        ex af,af
        ld b,a                      ; carry from cross products
        ld c,h                      ; LSB of MSW from cross products

        ld a,d
        add a,l
        ld h,a
        ld l,e                      ; LSW in HL p1 p0

        pop de                      ; x1 y1
        push bc
        ex af,af
        call l_z80_mulu_de          ; x1*y1
        ex af,af
        pop bc
        ex de,hl
        adc hl,bc
        ex de,hl                    ; de = final MSW

;------------------

        pop bc                      ; recover sign info from multiplicand and multiplier
        ld a,b
        xor c
        ret P                       ; return if positive product

        ld a,l                      ; negate product and return
        cpl
        ld l,a
        ld a,h
        cpl
        ld h,a
        ld a,e
        cpl
        ld e,a
        ld a,d
        cpl
        ld d,a
        inc l
        ret NZ
        inc h
        ret NZ
        inc de
        ret

;------------------------------------------------------------------------------
;
; Fast mulu_16_8x8 using a 512 byte table
;
; x*y = ((x+y)/2)2 - ((x-y)/2)2           <- if x+y is even
;     = ((x+y-1)/2)2 - ((x-y-1)/2)2 + y   <- if x+y is odd and x>=y
;
; enter : d = 8-bit multiplicand
;         e = 8-bit multiplicand
;
; uses  : af, bc
;
; exit  : de = 16-bit product

.l_z80_mulu_de
        ld a,d                  ; put largest in d
        cp e
        jr NC,lnc
        ld d,e
        ld e,a

.lnc                            ; with largest in d
        xor a
        or e
        jr Z,lzeroe             ; multiply by 0

        ld b,d                  ; keep larger -> b
        ld c,e                  ; keep smaller -> c

        ld a,d
        sub e
        rra
        ld d,a                  ; (x-y)/2 -> d

        ld a,b
        add a,c
        rra                     ; check for odd/even

        push hl                 ; preserve hl

        ld l,a                  ; (x+y)/2 -> l
        ld h,sqrlo/$100         ; loads sqrlo page
        ld a,(hl)               ; LSB ((x+y)/2)2 -> a
        ld e,l                  ; (x+y)/2 -> e
        ld l,d                  ; (x-y)/2 -> l (for index)
        jr NC,leven
                                ; odd tail
        sub (hl)                ; LSB ((x+y)/2)2 - ((x-y)/2)2
        ld l,e                  ; (x+y)/2 -> l
        ld e,a                  ; LSB ((x+y)/2)2 - ((x-y)/2)2 -> e
        inc h                   ; loads sqrhi page
        ld a,(hl)               ; MSB ((x+y)/2)2 -> a
        ld l,d                  ; (x-y)/2 -> l
        sbc a,(hl)              ; MSB ((x+y)/2)2 - ((x-y)/2)2 -> a
        ld d,a                  ; MSB ((x+y)/2)2 - ((x-y)/2)2 -> d

        ld a,e
        add a,c                 ; add smaller y
        ld e,a
        ld a,d
        adc a,0
        ld d,a

        pop hl
        ret

.leven                          ; even tail
        sub (hl)                ; LSB ((x+y)/2)2 - ((x-y)/2)2
        ld l,e                  ; (x+y)/2 -> l
        ld e,a                  ; LSB ((x+y)/2)2 - ((x-y)/2)2 -> e
        inc h                   ; loads sqrhi page
        ld a,(hl)               ; MSB ((x+y)/2)2 -> a
        ld l,d                  ; (x-y)/2 -> l
        sbc a,(hl)              ; MSB ((x+y)/2)2 - ((x-y)/2)2 -> a
        ld d,a                  ; MSB ((x+y)/2)2 - ((x-y)/2)2 -> d

        pop hl
        ret

.lzeroe
        ld d,e
        ret


SECTION rodata_align_256

ALIGN $100

.sqrlo ;low(x*x) should located on the page border
        defb $00,$01,$04,$09,$10,$19,$24,$31,$40,$51,$64,$79,$90,$a9,$c4,$e1
        defb $00,$21,$44,$69,$90,$b9,$e4,$11,$40,$71,$a4,$d9,$10,$49,$84,$c1
        defb $00,$41,$84,$c9,$10,$59,$a4,$f1,$40,$91,$e4,$39,$90,$e9,$44,$a1
        defb $00,$61,$c4,$29,$90,$f9,$64,$d1,$40,$b1,$24,$99,$10,$89,$04,$81
        defb $00,$81,$04,$89,$10,$99,$24,$b1,$40,$d1,$64,$f9,$90,$29,$c4,$61
        defb $00,$a1,$44,$e9,$90,$39,$e4,$91,$40,$f1,$a4,$59,$10,$c9,$84,$41
        defb $00,$c1,$84,$49,$10,$d9,$a4,$71,$40,$11,$e4,$b9,$90,$69,$44,$21
        defb $00,$e1,$c4,$a9,$90,$79,$64,$51,$40,$31,$24,$19,$10,$09,$04,$01
        defb $00,$01,$04,$09,$10,$19,$24,$31,$40,$51,$64,$79,$90,$a9,$c4,$e1
        defb $00,$21,$44,$69,$90,$b9,$e4,$11,$40,$71,$a4,$d9,$10,$49,$84,$c1
        defb $00,$41,$84,$c9,$10,$59,$a4,$f1,$40,$91,$e4,$39,$90,$e9,$44,$a1
        defb $00,$61,$c4,$29,$90,$f9,$64,$d1,$40,$b1,$24,$99,$10,$89,$04,$81
        defb $00,$81,$04,$89,$10,$99,$24,$b1,$40,$d1,$64,$f9,$90,$29,$c4,$61
        defb $00,$a1,$44,$e9,$90,$39,$e4,$91,$40,$f1,$a4,$59,$10,$c9,$84,$41
        defb $00,$c1,$84,$49,$10,$d9,$a4,$71,$40,$11,$e4,$b9,$90,$69,$44,$21
        defb $00,$e1,$c4,$a9,$90,$79,$64,$51,$40,$31,$24,$19,$10,$09,$04,$01
.sqrhi ;high(x*x) located on next page (automatically)
        defb $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
        defb $01,$01,$01,$01,$01,$01,$01,$02,$02,$02,$02,$02,$03,$03,$03,$03
        defb $04,$04,$04,$04,$05,$05,$05,$05,$06,$06,$06,$07,$07,$07,$08,$08
        defb $09,$09,$09,$0a,$0a,$0a,$0b,$0b,$0c,$0c,$0d,$0d,$0e,$0e,$0f,$0f
        defb $10,$10,$11,$11,$12,$12,$13,$13,$14,$14,$15,$15,$16,$17,$17,$18
        defb $19,$19,$1a,$1a,$1b,$1c,$1c,$1d,$1e,$1e,$1f,$20,$21,$21,$22,$23
        defb $24,$24,$25,$26,$27,$27,$28,$29,$2a,$2b,$2b,$2c,$2d,$2e,$2f,$30
        defb $31,$31,$32,$33,$34,$35,$36,$37,$38,$39,$3a,$3b,$3c,$3d,$3e,$3f
        defb $40,$41,$42,$43,$44,$45,$46,$47,$48,$49,$4a,$4b,$4c,$4d,$4e,$4f
        defb $51,$52,$53,$54,$55,$56,$57,$59,$5a,$5b,$5c,$5d,$5f,$60,$61,$62
        defb $64,$65,$66,$67,$69,$6a,$6b,$6c,$6e,$6f,$70,$72,$73,$74,$76,$77
        defb $79,$7a,$7b,$7d,$7e,$7f,$81,$82,$84,$85,$87,$88,$8a,$8b,$8d,$8e
        defb $90,$91,$93,$94,$96,$97,$99,$9a,$9c,$9d,$9f,$a0,$a2,$a4,$a5,$a7
        defb $a9,$aa,$ac,$ad,$af,$b1,$b2,$b4,$b6,$b7,$b9,$bb,$bd,$be,$c0,$c2
        defb $c4,$c5,$c7,$c9,$cb,$cc,$ce,$d0,$d2,$d4,$d5,$d7,$d9,$db,$dd,$df
        defb $e1,$e2,$e4,$e6,$e8,$ea,$ec,$ee,$f0,$f2,$f4,$f6,$f8,$fa,$fc,$fe

ELSE

;------------------------------------------------------------------------------
;
; Made by Runer112
; Analysed by Zeda
; https://raw.githubusercontent.com/Zeda/z80float/master/common/mul16.z80
; Tested by jacobly
;
; DE*HL --> DEHL
;
; enter : de   = 16-bit multiplicand  = x
;         hl   = 16-bit multiplier = y
;
; exit  : dehl = 32-bit product
;
; uses  : af, bc, de, hl

.l_mulu_32_16x16

        ld a,d
        ld d,0
        ld b,h
        ld c,l

        add a,a
        jr C,bit14
        add a,a
        jr C,bit13
        add a,a
        jr C,bit12
        add a,a
        jr C,bit11
        add a,a
        jr C,bit10
        add a,a
        jr C,bit9
        add a,a
        jr C,bit8
        add a,a
        jr C,bit7

        ld a,e
        and %11111110
        add a,a
        jr C,bit6
        add a,a
        jr C,bit5
        add a,a
        jr C,bit4
        add a,a
        jr C,bit3
        add a,a
        jr C,bit2
        add a,a
        jr C,bit1
        add a,a
        jr C,bit0
        rr e
        jr C,continue

        ld h,d
        ld l,e
        jp continue

.bit14
        add hl,hl
        adc a,a
        jr NC,bit13
        add hl,bc
        adc a,d

.bit13
        add hl,hl
        adc a,a
        jr NC,bit12
        add hl,bc
        adc a,d

.bit12
        add hl,hl
        adc a,a
        jr NC,bit11
        add hl,bc
        adc a,d

.bit11
        add hl,hl
        adc a,a
        jr NC,bit10
        add hl,bc
        adc a,d

.bit10
        add hl,hl
        adc a,a
        jr NC,bit9
        add hl,bc
        adc a,d

.bit9
        add hl,hl
        adc a,a
        jr NC,bit8
        add hl,bc
        adc a,d

.bit8
        add hl,hl
        adc a,a
        jr NC,bit7
        add hl,bc
        adc a,d

.bit7
        ld d,a
        ld a,e
        and %11111110
        add hl,hl
        adc a,a
        jr NC,bit6
        add hl,bc
        adc a,0

.bit6
        add hl,hl
        adc a,a
        jr NC,bit5
        add hl,bc
        adc a,0

.bit5
        add hl,hl
        adc a,a
        jr NC,bit4
        add hl,bc
        adc a,0

.bit4
        add hl,hl
        adc a,a
        jr NC,bit3
        add hl,bc
        adc a,0

.bit3
        add hl,hl
        adc a,a
        jr NC,bit2
        add hl,bc
        adc a,0

.bit2
        add hl,hl
        adc a,a
        jr NC,bit1
        add hl,bc
        adc a,0

.bit1
        add hl,hl
        adc a,a
        jr NC,bit0
        add hl,bc
        adc a,0

.bit0
        add hl,hl
        adc a,a
        jr C,funkyCarry
        rr e
        ld e,a
        jr NC,continue
        add hl,bc
        jr NC,continue
        inc e
        jr NZ,continue
        inc d

.continue
;------------------

        pop bc                      ; recover sign info from multiplicand and multiplier
        ld a,b
        xor c
        ret P                       ; return if positive product

        ld a,l                      ; negate product and return
        cpl
        ld l,a
        ld a,h
        cpl
        ld h,a
        ld a,e
        cpl
        ld e,a
        ld a,d
        cpl
        ld d,a
        inc l
        ret NZ
        inc h
        ret NZ
        inc de
        ret

.funkyCarry
        inc d
        rr e
        ld e,a
        ret NC
        add hl,bc
        ret NC
        inc e
        jp continue

ENDIF                               ; IF _MULU_TABLE

;==============================================================================
;       DELAY SUBROUTINE
;

.asm_delay
        push bc
        ld bc, $00
.asm_delay_loop
        ex (sp), hl
        ex (sp), hl
        djnz asm_delay_loop
        pop bc
        ret

;==============================================================================
;       OUTPUT SUBROUTINES
;

IF _CPM

;   Print message pointed to by (DE). It will end with a '$'.
;   modifies AF, DE, & HL
.prtmesg
        ld      a,(de)              ; Get character from DE address
        cp      eos
        ret     Z
        inc     de
        push    de                  ;otherwise, bump pointer and print it.
        ld      e,a
        ld      c, condio
        call    bdos
        pop     de
        jp      prtmesg

ELSE

defc    asm_pchar = 0008h           ; address of pchar

; print string from location in DE to ACIA, modifies AF, DE, & HL
.asm_pstring
        ld a,(de)                   ; Get character from DE address
        or a                        ; Is it $00 ?
        ret Z                       ; Then return on terminator
        rst 08h                     ; Print it
        inc de                      ; Point to next character
        jp asm_pstring              ; Continue until $00

; print contents of HL as 16 bit number in ASCII HEX, modifies AF & HL
.asm_phexwd
        push hl
        ld l,h                      ; high byte to L
        call asm_phex
        pop hl                      ; recover HL, for low byte in L
        call asm_phex
        ret

; print contents of L as 8 bit number in ASCII HEX to ACIA, modifies AF & HL
.asm_phex
        ld a,l                      ; modifies AF, HL
        push af
        rrca
        rrca
        rrca
        rrca
        call  asm_phex_conv
        pop af
.asm_phex_conv
        and  $0F
        add  a,$90
        daa
        adc  a,$40
        daa
        jp asm_pchar                ; print character on ACIA

ENDIF                               ; IF _CPM

;==============================================================================


## mandel-feilipu-yaz180.asm
;
;  Compute a Mandelbrot set on a simple Z80 computer.
;
; From https://rosettacode.org/wiki/Mandelbrot_set#Z80_Assembly
; Adapted to CP/M and colorzied by J.B. Langston
;
; Assemble with z88dk for YAZ180 CP/M
; zcc +yaz180 -subtype=cpm -v -m --list mandel-feilipu-yaz180.asm -o mand180 -create-app
;
; CP/M
; Upload using XMODEM; mandel.bin -> mandel.com
;
; Assemble with z88dk for YABIOS Am9511A APU
; zcc +yaz180 -subtype=app -v -m --list mandel-feilipu-yaz180.asm -o mandapu -create-app
;
; yabios
; Upload using loadh: cat > /dev/ttyUSB0 < ./mandel.ihx
;
; To calculate the theoretical minimum time at 115200 baud.
; Normally 10 colour codes, and 1 character per point.
; A line is (3 x 80) x 11 + CR + LF = 2642 characters
; There are 10 x 60 / 4 lines = 150 lines
; Therefore 396,300 characters need to be transmitted.
; Serial rate is 115200 baud or 14,400 8 bit characters per second
;
; Therefore the theoretical minimum time is 27.52 seconds.
;
; Results          FCPU         Original    Table mlt   Runer mlt
; RC2014 CP/M    7.432MHz         4'58"       3'48"       2'53"
; RC2014 BASIC   7.432MHz         4'58"       3'55"
;
; Results          FCPU         Original    Optimised   z180 mlt
; YAZ180 CP/M   18.432MHz         1'40"       1'24"       1'00"
; YAZ180 yabios 18.432Mhz                     1'14"         54"
; YAZ180 CP/M   36.864MHz         1'06"         58"         46"
; YAZ180 yabios 36.864MHz                       56"         45"
;
;
; Porting this program to another Z80 platform should be easy and straight-
; forward: The only dependencies on my homebrew machine are the system-calls
; used to print strings and characters. These calls are performed by loading
; IX with the number of the system-call and performing an RST 08. To port this
; program to another operating system just replace these system-calls with
; the appropriate versions. Only three system-calls are used in the following:
; _crlf: Prints a CR/LF, _puts: Prints a 0-terminated string (the adress of
; which is expected in HL), and _putc: Print a single character which is
; expected in A. RST 0 give control back to the monitor.
;

include "config_yaz180_private.inc"
include "config_am9511_private.inc"

EXTERN asm_phexwd, asm_phex, asm_pchar, asm_pstring

EXTERN asm_am9511a_reset

defc _CPM       = 1
defc _APU       = 0
defc _DOUBLE    = 0

defc cr         = $0D                     ; carriage return
defc lf         = $0A                     ; line feed
defc esc        = $1B                     ; escape

IF _CPM                                   ; cp/m ram model
defc bdos       = $05                     ; bdos vector
defc condio     = 6                       ; console direct I/O call
defc prints     = 9                       ; print string bdos call

defc eos        = '$'                     ; end of string marker
ELSE
defc eos        = $00                     ; end of string marker
ENDIF

defc pixel      = '#'                     ; character to output for pixel
defc SCALE      = 256                     ; Do NOT change this - the
                                          ; arithmetic routines rely on
                                          ; this scaling factor! :-)

;------------------------------------------------------------------------------

SECTION  data_user

x:              defw    0                       ; x-coordinate
x_start:        defw    -2 * SCALE              ; Minimum x-coordinate
x_end:          defw    1 * SCALE               ; Maximum x-coordinate
x_step:         defw    SCALE / 80              ; x-coordinate step-width

y:              defw    0                       ; y-coordinate
y_start:        defw    -5 * SCALE / 4          ; Minimum y-coordinate
y_end:          defw    5 * SCALE / 4           ; Maximum y-coordinate
y_step:         defw    SCALE / 60              ; y-coordinate step-width

iteration_max:  defb    30                      ; How many iterations
divergent:      defw    SCALE * 4
scale:          defw    SCALE
                defw    0

z_0:            defs    4,0
z_1:            defs    4,0

z_2:            defw    0

z_0_square:
z_0_square_low: defw    0
z_0_square_high:defw    0
z_1_square:
z_1_square_low: defw    0
z_1_square_high:defw    0


display:        defm    " .-+*=#@"              ; 8 characters for the display

hsv:            defm 0                          ; hsv color table
                defm 201, 200, 199, 198, 197
                defm 196, 202, 208, 214, 220
                defm 226, 190, 154, 118,  82
                defm 46,   47,  48,  49,  50
                defm 51,   45,  39,  33,  27
                defm 21,   57,  93, 129, 165

IF _APU
welcome:        defm    "APU: Generating a Mandelbrot set"
ELSE
welcome:        defm    "Z180: Generating a Mandelbrot set"
ENDIF

crlf:           defm    cr, lf, eos
finished:       defm    esc, "[0mComputation finished.", cr, lf, eos
ansifg:         defm    esc, "[38;5;", eos
ansibg:         defm    esc, "[48;5;", eos
cls:            defm    esc, "[2J",eos

;------------------------------------------------------------------------------

SECTION code_user

PUBLIC _main

._main
        ld      de, cls                 ; VT100 clear screen
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF

        ld      de, welcome             ; Print a welcome message
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF

IF _APU
        ld      a,DCNTL_MWI1|DCNTL_IWI1
        out0    (DCNTL),a

        call    asm_am9511a_reset       ; INITIALISE THE APU
        call    delay
ENDIF

; for (y = <initial_value> ; y <= y_end; y += y_step)
; {
        ld      hl, (y_start)           ; y = y_start
        ld      (y), hl
.outer_loop
        ld      hl, (y_end)             ; Is y <= y_end?
        ld      de, (y)
        and     a                       ; Clear carry
        sbc     hl, de                  ; Perform the comparison
        jp      M, mandel_end           ; End of outer loop reached

;    for (x = x_start; x <= x_end; x += x_step)
;    {
        ld      hl, (x_start)           ; x = x_start
        ld      (x), hl
.inner_loop
        ld      hl, (x_end)             ; Is x <= x_end?
        ld      de, (x)
        and     a
        sbc     hl, de
        jp      M, inner_loop_end       ; End of inner loop reached

IF 0
        push af
        push hl
        ld hl,(x)
        call asm_phexwd
        ld l,':'
        call asm_pchar
        ld hl,(y)
        call asm_phexwd
        ld l,'>'
        call asm_pchar
        call delay_short
        pop hl
        pop af
ENDIF

;      z_0 = z_1 = 0;
        ld      hl, 0
        ld      (z_0), hl
        ld      (z_1), hl

;      for (iteration = iteration_max; iteration; iteration--)
;      {
        ld      a, (iteration_max)
        ld      b, a

.iteration_loop
        push    bc                      ; iteration -> stack

IF _APU
        call    apu_calc
        ld      hl,(z_2)                ; recover z_2 into HL
ELSE

;        z2 = (z_0 * z_0 - z_1 * z_1) / scale;

        ld      hl, (z_1)               ; Compute DE HL = z_1 * z_1
        ld      d, h
        ld      e, l
        call    l_muls_32_16x16
        ld      (z_1_square_low), hl    ; z_1 ** 2 is needed later again
        ld      (z_1_square_high), de

        ld      hl, (z_0)               ; Compute DE HL = z_0 * z_0
        ld      d, h
        ld      e, l
        call    l_muls_32_16x16
        ld      (z_0_square_low), hl    ; z_1 ** 2 will be also needed
        ld      (z_0_square_high), de

        and     a                       ; Compute subtraction
        ld      bc, (z_1_square_low)
        sbc     hl, bc
        push    hl                      ; Save lower 16 bit of result
        ld      h, d
        ld      l, e
        ld      bc, (z_1_square_high)
        sbc     hl, bc
        pop     bc                      ; HL BC = z_0 ^ 2 - z_1 ^ 2

        ld      c, b                    ; Divide by scale = 256
        ld      b, l                    ; Discard the rest
        push    bc                      ; We need BC later

;        z_3 = 2 * z_0 * z_1 / scale;
        ld      hl, (z_0)               ; Compute DE HL = 2 * z_0 * z_1
        add     hl, hl
        ld      de, (z_1)
        call    l_muls_32_16x16

        ld      b, e                    ; Divide by scale (= 256)
        ld      c, h                    ; BC contains now z_3

;        z_1 = z_3 + y;
        ld      hl, (y)
        add     hl, bc
        ld      (z_1), hl

;        z_0 = z_2 + x;
        pop     bc                      ; Here BC is needed again :-)
        ld      hl, (x)
        add     hl, bc
        ld      (z_0), hl

;        if (z_0 * z_0 / scale + z_1 * z_1 / scale > 4 * scale)
        ld      hl, (z_0_square_low)    ; Use the squares computed
        ld      de, (z_1_square_low)    ; above
        add     hl, de
        ld      b, h                    ; BC contains lower word of sum
        ld      c, l

        ld      hl, (z_0_square_high)
        ld      de, (z_1_square_high)
        adc     hl, de

        ld      h, l                    ; HL now contains (z_0 ^ 2 -
        ld      l, b                    ; z_1 ^ 2) / scale

        ld      (z_2),hl                ; save z_2

ENDIF                                   ; IF _APU

        ld      bc,(divergent)
        and     a
        sbc     hl, bc

;          break;
        jr      C, iteration_dec        ; No break
        pop     bc                      ; Get latest iteration counter
        jr      iteration_end           ; Exit loop

;        iteration++;
.iteration_dec
        pop     bc                      ; Get iteration counter
        djnz    iteration_loop          ; We might fall through!
;      }
.iteration_end
;      printf("%c", display[iteration % 7]);
;       call    asciipixel              ; Print the character
        call    colorpixel              ; Print the character

IF 0
        ld l,' '
        call asm_pchar
        ld hl,(z_2)
        call asm_phexwd                 ; print final z_2
        ld l,' '
        call asm_pchar
        ld l,b
        call asm_phex                   ; print the iteration
        ld l,cr
        call asm_pchar
        ld l,lf
        call asm_pchar
        call delay_short
ENDIF

IF _CPM                                 ; cp/m ram model
        ld      c, condio
        call    bdos
ELSE
        ld      l,e
        call    asm_pchar
ENDIF

        ld      de, (x_step)            ; x += x_step
        ld      hl, (x)
        add     hl, de
        ld      (x), hl

        jp      inner_loop
;    }
;    printf("\n");
.inner_loop_end
        ld      de, crlf                ; Print a CR/LF pair
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF
        ld      de, (y_step)            ; y += y_step
        ld      hl, (y)
        add     hl, de
        ld      (y), hl                 ; Store new y-value

        jp      outer_loop
; }

.mandel_end

IF _APU
        ld      a,DCNTL_MWI0|DCNTL_IWI0 ; return wait states to original
        out0    (DCNTL),a
ENDIF

        ld      de, finished            ; Print finished-message
IF _CPM                                 ; cp/m ram model
        jp      prtmesg                 ; Return to CP/M
ELSE
        jp      asm_pstring             ; Return to yabios
ENDIF


.colorpixel
        ld      a,b                     ; iter count in B -> C
        and     $1F                     ; lower five bits only
        ld      c,a
        ld      b,0
        ld      hl, hsv                 ; get ANSI color code
        add     hl, bc
        ld      a,(hl)
        call    setcolor
        ld      e, pixel                ; show pixel
        ret

.asciipixel
        ld      a, b                    ; iter count in B -> L
        and     $07                     ; lower three bits only
        sbc     hl, hl
        ld      l, a
        ld      de, display             ; Get start of character array
        add     hl, de                  ; address and load the
        ld      e, (hl)                 ; character to be printed
        ret

.setcolor
        push    af                      ; save accumulator
        ld      de,ansifg               ; start ANSI control sequence to set foreground color
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF
        pop     af
        call    printdec                ; print ANSI color code
IF _CPM                                 ; cp/m ram model
        ld      e,'m'                   ; finish control sequence
        ld      c, condio
        jp      bdos
ELSE
        ld      l,'m'                   ; finish control sequence
        jp      asm_pchar
ENDIF

.printdec
        ld      c,-100                  ; print 100s place
        call    pd1
        ld      c,-10                   ; 10s place
        call    pd1
        ld      c,-1                    ; 1s place
.pd1
        ld      e,'0'-1                 ; start ASCII right before 0
.pd2
        inc     e                       ; increment ASCII code
        add     a,c                     ; subtract 1 place value
        jr      C,pd2                   ; loop until negative
        sub     c                       ; add back the last value
        push    af                      ; save accumulator
        ld      a,-1                    ; are we in the ones place?
        cp      c
        jr      Z,pd3                   ; if so, skip to output
        ld      a,'0'                   ; don't print leading 0s
        cp      e
        jr      Z,pd4
.pd3
IF _CPM                                 ; cp/m ram model
        ld      c, condio
        call    bdos
ELSE
        ld      l,e
        call    asm_pchar
ENDIF
.pd4
        pop     af                      ; restore accumulator

IF _APU
        jp      delay_short
ELSE
        ret
ENDIF

;==============================================================================

IF _APU

EXTERN  asm_am9511a_cmd, asm_am9511a_opp
EXTERN  asm_am9511a_isr, asm_am9511a_chk_idle

.apu_calc

IF _DOUBLE

;;;;;;; double calc

        ld hl,z_0                       ; Extend 16 bit z_0 to 32 bit
        inc hl
        ld a,(hl)
        add a,a                         ; Put sign bit into carry
        sbc a,a                         ; A = 0 if carry == 0, $FF otherwise
        inc hl
        ld (hl),a
        inc hl
        ld (hl),a

        ld hl,z_1                       ; Extend 16 bit z_1 to 32 bit
        inc hl
        ld a,(hl)
        add a,a                         ; Put sign bit into carry
        sbc a,a                         ; A = 0 if carry == 0, $FF otherwise
        inc hl
        ld (hl),a
        inc hl
        ld (hl),a

;       z_2 = (z_0 * z_0 - z_1 * z_1) / scale;

        ld de,z_0
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (double)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_DMUL           ; COMMAND for DMUL (multiply lower)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,z_0_square
        ld bc,__IO_APU_OP_REM32         ; REMOVE 32 bit (double) to z_0_square
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld de,z_1
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (double)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_DMUL           ; COMMAND for DMUL (multiply lower)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,z_1_square
        ld bc,__IO_APU_OP_REM32         ; REMOVE 32 bit (double) to z_1_square
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_DSUB           ; COMMAND for DSUB (subtract double)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,scale
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (double)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_DDIV           ; COMMAND for DDIV (divide double)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_POPS           ; COMMAND for POPS (pop single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

;       z_0 = z_2 + x;

        ld de,x
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (word)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_SADD           ; COMMAND for SADD (add single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

;       z_3 = 2 * z_0 * z_1 / scale;

        ld de,z_0
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (double)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_DADD           ; COMMAND for DADD (add double)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,z_1
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (double)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_DMUL           ; COMMAND for DMUL (multiply lower)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,scale
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (double)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_DDIV           ; COMMAND for DDIV (divide double)
        call asm_am9511a_cmd            ; ENTER a COMMAND

;       z_1 = z_3 + y;

        ld c,__IO_APU_OP_POPS           ; COMMAND for POPS (pop single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,y
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (word)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_SADD           ; COMMAND for SADD (add single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,z_1
        ld bc,__IO_APU_OP_REM16         ; REMOVE 16 bit (word) to z_1
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld de,z_0
        ld bc,__IO_APU_OP_REM16         ; REMOVE 16 bit (word) to z_0
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

;       if (z_0 * z_0 / scale + z_1 * z_1 / scale > 4 * scale)

        ld de,z_0_square
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (double) from z_0_square
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld de,z_1_square
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (double) from z_1_square
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_DADD           ; COMMAND for DADD (add double)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,scale
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (double)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_DDIV           ; COMMAND for DDIV (divide double)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_POPS           ; COMMAND for POPS (pop single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,z_2
        ld bc,__IO_APU_OP_REM16         ; REMOVE 16 bit (single) to z_2
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

ELSE                                    ; IF !_DOUBLE

;;;;;;; float calc

;       z_2 = (z_0 * z_0 - z_1 * z_1) / scale;

        ld de,z_0
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_FMUL           ; COMMAND for FMUL (multiply float)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,z_0_square
        ld bc,__IO_APU_OP_REM32         ; REMOVE 32 bit (float) to z_0_square
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld de,z_1
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_FMUL           ; COMMAND for FMUL (multiply float)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,z_1_square
        ld bc,__IO_APU_OP_REM32         ; REMOVE 32 bit (float) to z_1_square
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_FSUB           ; COMMAND for FSUB (subtract float)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,scale
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_FDIV           ; COMMAND for FDIV (divide float)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_FIXS           ; COMMAND for FIXS (fix single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

;       z_0 = z_2 + x;

        ld de,x
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (word)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_SADD           ; COMMAND for SADD (add single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

;       z_3 = 2 * z_0 * z_1 / scale;

        ld de,z_0
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_FADD           ; COMMAND for FADD (add float)
        call asm_am9511a_cmd            ; ENTER a COMMAND


        ld de,z_1
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_FMUL           ; COMMAND for FMUL (multiply float)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,scale
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_FDIV           ; COMMAND for FDIV (divide float)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_FIXS           ; COMMAND for FIXS (fix single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

;       z_1 = z_3 + y;

        ld de,y
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (word)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_SADD           ; COMMAND for SADD (add single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,z_1
        ld bc,__IO_APU_OP_REM16         ; REMOVE 16 bit (word) to z_1
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld de,z_0
        ld bc,__IO_APU_OP_REM16         ; REMOVE 16 bit (word) to z_0
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

;       if (z_0 * z_0 / scale + z_1 * z_1 / scale > 4 * scale)

        ld de,z_0_square
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (float) from z_0_square
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld de,z_1_square
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (float) from z_1_square
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_FADD           ; COMMAND for FADD (add float)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,scale
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (word)
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

        ld c,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_FDIV           ; COMMAND for FDIV (divide float)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld c,__IO_APU_OP_FIXS           ; COMMAND for FIXS (fix single)
        call asm_am9511a_cmd            ; ENTER a COMMAND

        ld de,z_2
        ld bc,__IO_APU_OP_REM16         ; REMOVE 16 bit (single) to z_2
        call asm_am9511a_opp            ; POINTER TO OPERAND IN OPERAND BUFFER

ENDIF                                   ; IF _DOUBLE

        call asm_am9511a_isr            ; KICK OFF APU PROCESS, WHICH THEN INTERRUPTS

        jp asm_am9511a_chk_idle         ; CHECK, because it could be doing a last command

ELSE                                    ; IF !_APU


   ; signed multiplication of two 16-bit numbers into a 32-bit product.
   ; using the z180 hardware unsigned 8x8 multiply instruction
   ;
   ; enter : de = 16-bit multiplicand = y
   ;         hl = 16-bit multiplier = x
   ;
   ; exit  : dehl = 32-bit product
   ;         carry reset
   ;
   ; uses  : af, bc, de, hl

.l_muls_32_16x16
   ld b,d                       ; d = MSB of multiplicand
   ld c,h                       ; h = MSB of multiplier
   push bc                      ; save sign info

   bit 7,d
   jr Z,l_pos_de                ; take absolute value of multiplicand

   ld a,e
   cpl
   ld e,a
   ld a,d
   cpl
   ld d,a
   inc de

.l_pos_de
   bit 7,h
   jr Z,l_pos_hl                ; take absolute value of multiplier

   ld a,l
   cpl
   ld l,a
   ld a,h
   cpl
   ld h,a
   inc hl

.l_pos_hl                       ; prepare unsigned dehl = de x hl

    ; unsigned multiplication of two 16-bit numbers into a 32-bit product
    ;
    ; enter : de = 16-bit multiplicand = y
    ;         hl = 16-bit multiplicand = x
    ;
    ; exit  : dehl = 32-bit product
    ;         carry reset
    ;
    ; uses  : af, bc, de, hl

    ld b,l                       ; xl
    ld c,d                       ; yh
    ld d,l                       ; xl
    ld l,c
    push hl                      ; xh yh
    ld l,e                       ; yl

    ; bc = xl yh
    ; de = xl yl
    ; hl = xh yl
    ; stack = xh yh

    mlt de                       ; xl * yl

    mlt bc                       ; xl * yh
    mlt hl                       ; xh * yl

    add hl,bc                    ; sum cross products

    sbc a,a
    and $01
    ld b,a                       ; carry from cross products
    ld c,h                       ; LSB of MSW from cross products

    ld a,d
    add a,l
    ld d,a                       ; de = final product LSW

    pop hl
    mlt hl                       ; xh * yh

    adc hl,bc                    ; hl = final product MSW
    ex de,hl

    pop bc                       ; recover sign info from multiplicand and multiplier
    ld a,b
    xor c
    ret P                        ; return if positive product

    ld a,l                       ; negate product and return
    cpl
    ld l,a
    ld a,h
    cpl
    ld h,a
    ld a,e
    cpl
    ld e,a
    ld a,d
    cpl
    ld d,a
    inc l
    ret NZ
    inc h
    ret NZ
    inc de
    ret

ENDIF                           ; IF _APU

;==============================================================================
;       DELAY SUBROUTINE
;

.delay_short
    push bc
    ld b, $00
.delay_short_loop
    ex (sp), hl
    ex (sp), hl
    djnz delay_short_loop
    pop bc
    ret

.delay
    push bc
    ld bc, $00
.delay_loop
    ex (sp), hl
    ex (sp), hl
    djnz delay_loop
    dec c
    jr NZ,delay_loop
    pop bc
    ret

;==============================================================================
;       OUTPUT SUBROUTINE
;

IF _CPM

;   Print message pointed to by (DE). It will end with a '$'.
;   modifies AF, DE, & HL
.prtmesg
    ld      a,(de)              ; Get character from DE address
    cp      eos
    ret     Z
    inc     de
    push    de                  ;otherwise, bump pointer and print it.
    ld      e,a
    ld      c, condio
    call    bdos
    pop     de
    jp      prtmesg

ENDIF                               ; IF _CPM

;==============================================================================


## mandel-original.asm
;
;  Compute a Mandelbrot set on a simple Z80 computer.
;
; From https://rosettacode.org/wiki/Mandelbrot_set#Z80_Assembly
; Adapted to CP/M and colorzied by J.B. Langston
; Latest version at https://gist.github.com/jblang/3b17598ccfa0f7e5cca79ad826a399a9
; Assemble with sjasm
;
; Porting this program to another Z80 platform should be easy and straight-
; forward: The only dependencies on my homebrew machine are the system-calls
; used to print strings and characters. These calls are performed by loading
; IX with the number of the system-call and performing an RST 08. To port this
; program to another operating system just replace these system-calls with
; the appropriate versions. Only three system-calls are used in the following:
; _crlf: Prints a CR/LF, _puts: Prints a 0-terminated string (the adress of
; which is expected in HL), and _putc: Print a single character which is
; expected in A. RST 0 give control back to the monitor.
;

                org     100h

                jp      start

bdos            equ     05h                     ; bdos vector
conout          equ     2                       ; console output bdos call
prints          equ     9                       ; print string bdos call
cr              equ     $0D                     ; carriage return
lf              equ     $0A                     ; line feed
esc             equ     $1B                     ; escape
eos             equ     '$'                     ; end of string marker
pixel           equ     219                     ; character to output for pixel
scale           equ     256                     ; Do NOT change this - the
                                                ; arithmetic routines rely on
                                                ; this scaling factor! :-)
divergent       equ     scale * 4

iteration_max   defb    30                      ; How many iterations
x               defw    0                       ; x-coordinate
x_start         defw    -2 * scale              ; Minimum x-coordinate
x_end           defw    1 * scale               ; Maximum x-coordinate
x_step          defw    scale / 80              ; x-coordinate step-width
y               defw    -5 * scale / 4          ; Minimum y-coordinate
y_end           defw    5 * scale / 4           ; Maximum y-coordinate
y_step          defw    scale / 60              ; y-coordinate step-width
z_0             defw    0
z_1             defw    0
scratch_0       defw    0
z_0_square_high defw    0
z_0_square_low  defw    0
z_1_square_high defw    0
z_1_square_low  defw    0
display         defb    " .-+*=#@"              ; 8 characters for the display

hsv             defb 0                          ; hsv color table
                defb 201, 200, 199, 198, 197
                defb 196, 202, 208, 214, 220
                defb 226, 190, 154, 118, 82
                defb 46, 47, 48, 49, 50
                defb 51, 45, 39, 33, 27
                defb 21, 57, 93, 129, 165

welcome         defb    "Generating a Mandelbrot set"
crlf            defb    cr, lf, eos
finished        defb    esc, "[0mComputation finished.", cr, lf, eos
ansifg          defb    esc, "[38;5;", eos
ansibg          defb    esc, "[48;5;", eos


start           ld      de, welcome             ; Print a welcome message
                ld      c, prints
                call    bdos

; for (y = <initial_value> ; y <= y_end; y += y_step)
; {
outer_loop      ld      hl, (y_end)             ; Is y <= y_end?
                ld      de, (y)
                and     a                       ; Clear carry
                sbc     hl, de                  ; Perform the comparison
                jp      m, mandel_end           ; End of outer loop reached

;    for (x = x_start; x <= x_end; x += x_step)
;    {
                ld      hl, (x_start)           ; x = x_start
                ld      (x), hl
inner_loop      ld      hl, (x_end)             ; Is x <= x_end?
                ld      de, (x)
                and     a
                sbc     hl, de
                jp      m, inner_loop_end       ; End of inner loop reached

;      z_0 = z_1 = 0;
                ld      hl, 0
                ld      (z_0), hl
                ld      (z_1), hl

;      for (iteration = iteration_max; iteration; iteration--)
;      {
                ld      a, (iteration_max)
                ld      b, a
iteration_loop  push    bc                      ; iteration -> stack
;        z2 = (z_0 * z_0 - z_1 * z_1) / SCALE;
                ld      de, (z_1)               ; Compute DE HL = z_1 * z_1
                ld      b, d
                ld      c, e
                call    mul_16
                ld      (z_0_square_low), hl    ; z_0 ** 2 is needed later again
                ld      (z_0_square_high), de

                ld      de, (z_0)               ; Compute DE HL = z_0 * z_0
                ld      b, d
                ld      c, e
                call    mul_16
                ld      (z_1_square_low), hl    ; z_1 ** 2 will be also needed
                ld      (z_1_square_high), de

                and     a                       ; Compute subtraction
                ld      bc, (z_0_square_low)
                sbc     hl, bc
                ld      (scratch_0), hl         ; Save lower 16 bit of result
                ld      h, d
                ld      l, e
                ld      bc, (z_0_square_high)
                sbc     hl, bc
                ld      bc, (scratch_0)         ; HL BC = z_0 ** 2 - z_1 ** 2

                ld      c, b                    ; Divide by scale = 256
                ld      b, l                    ; Discard the rest
                push    bc                      ; We need BC later

;        z3 = 2 * z0 * z1 / SCALE;
                ld      hl, (z_0)               ; Compute DE HL = 2 * z_0 * z_1
                add     hl, hl
                ld      d, h
                ld      e, l
                ld      bc, (z_1)
                call    mul_16

                ld      b, e                    ; Divide by scale (= 256)
                ld      c, h                    ; BC contains now z_3

;        z1 = z3 + y;
                ld      hl, (y)
                add     hl, bc
                ld      (z_1), hl

;        z_0 = z_2 + x;
                pop     bc                      ; Here BC is needed again :-)
                ld      hl, (x)
                add     hl, bc
                ld      (z_0), hl

;        if (z0 * z0 / SCALE + z1 * z1 / SCALE > 4 * SCALE)
                ld      hl, (z_0_square_low)    ; Use the squares computed
                ld      de, (z_1_square_low)    ; above
                add     hl, de
                ld      b, h                  ; BC contains lower word of sum
                ld      c, l

                ld      hl, (z_0_square_high)
                ld      de, (z_1_square_high)
                adc     hl, de

                ld      h, l                    ; HL now contains (z_0 ** 2 +
                ld      l, b                    ; z_1 ** 2) / scale

                ld      bc, divergent
                and     a
                sbc     hl, bc

;          break;
                jp      c, iteration_dec        ; No break
                pop     bc                      ; Get latest iteration counter
                jr      iteration_end           ; Exit loop

;        iteration++;
iteration_dec   pop     bc                      ; Get iteration counter
                djnz    iteration_loop          ; We might fall through!
;      }
iteration_end
;      printf("%c", display[iteration % 7]);
                call    colorpixel
                ld      c, conout               ; Print the character
                call    bdos

                ld      de, (x_step)            ; x += x_step
                ld      hl, (x)
                add     hl, de
                ld      (x), hl

                jp      inner_loop
;    }
;    printf("\n");
inner_loop_end
                ld      de, crlf
                ld      c, prints              ; Print a CR/LF pair
                call    bdos

                ld      de, (y_step)            ; y += y_step
                ld      hl, (y)
                add     hl, de
                ld      (y), hl                 ; Store new y-value

                jp      outer_loop
; }

mandel_end      ld      de, finished            ; Print finished-message
                ld      c, prints
                call bdos

                ret                             ; Return to CP/M

colorpixel      ld      c,b                     ; iter count in BC
                ld      b,0
                ld      hl, hsv                 ; get ANSI color code
                add     hl, bc
                ld      a,(hl)
                call    setcolor
                ld      e, pixel                ; show pixel
                ret

asciipixel      ld      a, b
                and     $7                      ; lower three bits only (c = 0)
                sbc     hl, hl
                ld      l, a
                ld      de, display             ; Get start of character array
                add     hl, de                  ; address and load the
                ld      e, (hl)                 ; character to be printed
                ret

setcolor        push    af                      ; save accumulator
                ld      de,ansifg               ; start ANSI control sequence
                ld      c,prints                ; to set foreground color
                call    bdos
                pop     af
                call    printdec                ; print ANSI color code
                ld      c,conout
                ld      e,'m'                   ; finish control sequence
                call    bdos
                ret

printdec        ld      c,-100                  ; print 100s place
                call    pd1
                ld      c,-10                   ; 10s place
                call    pd1
                ld      c,-1                    ; 1s place
pd1             ld      e,'0'-1                 ; start ASCII right before 0
pd2             inc     e                       ; increment ASCII code
                add     a,c                     ; subtract 1 place value
                jr      c,pd2                   ; loop until negative
                sub     c                       ; add back the last value
                push    af                      ; save accumulator
                ld      a,-1                    ; are we in the ones place?
                cp      c
                jr      z,pd3                   ; if so, skip to output
                ld      a,'0'                   ; don't print leading 0s
                cp      e
                jr      z,pd4
pd3             ld      c,conout
                call    bdos
pd4             pop     af                      ; restore accumulator
                ret


;
;   Compute DEHL = BC * DE (signed): This routine is not too clever but it
; works. It is based on a standard 16-by-16 multiplication routine for unsigned
; integers. At the beginning the sign of the result is determined based on the
; signs of the operands which are negated if necessary. Then the unsigned
; multiplication takes place, followed by negating the result if necessary.
;
mul_16          xor     a                       ; Clear carry and A (-> +)
                bit     7, b                    ; Is BC negative?
                jr      z, bc_positive          ; No
                sub     c                       ; A is still zero, complement
                ld      c, a
                ld      a, 0
                sbc     a, b
                ld      b, a
                scf                             ; Set carry (-> -)
bc_positive     bit     7, D                    ; Is DE negative?
                jr      z, de_positive          ; No
                push    af                      ; Remember carry for later!
                xor     a
                sub     e
                ld      e, a
                ld      a, 0
                sbc     a, d
                ld      d, a
                pop     af                      ; Restore carry for complement
                ccf                             ; Complement Carry (-> +/-?)
de_positive     push    af                      ; Remember state of carry
                and     a                       ; Start multiplication
                sbc     hl, hl
                ld      a, 16                   ; 16 rounds
mul_16_loop     add     hl, hl
                rl      e
                rl      d
                jr      nc, mul_16_exit
                add     hl, bc
                jr      nc, mul_16_exit
                inc     de
mul_16_exit     dec     a
                jr      nz, mul_16_loop
                pop     af                      ; Restore carry from beginning
                ret     nc                      ; No sign inversion necessary
                xor     a                       ; Complement DE HL
                sub     l
                ld      l, a
                ld      a, 0
                sbc     a, h
                ld      h, a
                ld      a, 0
                sbc     a, e
                ld      e, a
                ld      a, 0
                sbc     a, d
                ld      d, a
                ret

## mandel_APU.asm
;
;  Compute a Mandelbrot set on a simple Z80 computer.
;
; From https://rosettacode.org/wiki/Mandelbrot_set#Z80_Assembly
; Adapted to CP/M and colorzied by J.B. Langston
;
; Assemble with z88dk for RC2014 CP/M
; zcc +rc2014 -subtype=cpm -v -m --list --am9511 mandel_APU.asm -o mandapu -create-app
;
; Assemble with z88dk for RC2014 BASIC
; zcc +rc2014 -subtype=basic -v -m --list --am9511 mandel_APU.asm -o mandapu -create-app
;
; CP/M
; Upload using XMODEM; mandapu.bin -> mandapu.com
;
; MS Basic
; Upload using hload or SCM: cat > /dev/ttyUSB0 < ./mandapu.ihx
;
; To calculate the theoretical minimum time at 115200 baud.
; Normally 10 colour codes, and 1 character per point.
; A line is (3 x 80) x 11 + CR + LF = 2642 characters
; There are 10 x 60 / 4 lines = 150 lines
; Therefore 396,300 characters need to be transmitted.
; Serial rate is 115200 baud or 14,400 8 bit characters per second
;
; Therefore the theoretical minimum time is 27.52 seconds.
;
; Results          FCPU         Original    Table mlt   Runer mlt
; RC2014 CP/M    7.432MHz         4'58"       3'48"       2'53"
; RC2014 BASIC   7.432MHz         4'58"       3'55"
;
; Results          FCPU         Original    Optimised   z180 mlt
; YAZ180 CP/M   18.432MHz         1'40"       1'24"       1'00"
; YAZ180 yabios 18.432Mhz                     1'14"         54"
; YAZ180 CP/M   36.864MHz         1'06"         58"         46"
; YAZ180 yabios 36.864MHz                       56"         45"
;
;
; Porting this program to another Z80 platform should be easy and straight-
; forward: The only dependencies on my homebrew machine are the system-calls
; used to print strings and characters. These calls are performed by loading
; IX with the number of the system-call and performing an RST 08. To port this
; program to another operating system just replace these system-calls with
; the appropriate versions. Only three system-calls are used in the following:
; _crlf: Prints a CR/LF, _puts: Prints a 0-terminated string (the adress of
; which is expected in HL), and _putc: Print a single character which is
; expected in A. RST 0 give control back to the monitor.
;

include "config_rc2014_private.inc"

;
; Configuration of CP/M or Basic & APU Double or Float
; NOTE TO DO: APU Float is NOT CONVERTED FROM YAZ180 Calling, won't work as is with RC2014
;

defc _CPM       = 0
defc _APU       = 1
defc _DOUBLE    = 1

defc cr         = $0D                     ; carriage return
defc lf         = $0A                     ; line feed
defc esc        = $1B                     ; escape

IF _CPM                                   ; cp/m ram model
defc bdos       = $05                     ; bdos vector
defc condio     = 6                       ; console direct I/O call
defc prints     = 9                       ; print string bdos call

defc eos        = '$'                     ; end of string marker
ELSE
defc eos        = $00                     ; end of string marker
ENDIF

defc pixel      = '#'                     ; character to output for pixel
defc SCALE      = 256                     ; Do NOT change this - the
                                          ; arithmetic routines rely on
                                          ; this scaling factor! :-)

;------------------------------------------------------------------------------

SECTION  data_user

x:              defw    0                       ; x-coordinate
x_start:        defw    -2 * SCALE              ; Minimum x-coordinate
x_end:          defw    1 * SCALE               ; Maximum x-coordinate
x_step:         defw    SCALE / 80              ; x-coordinate step-width

y:              defw    0                       ; y-coordinate
y_start:        defw    -5 * SCALE / 4          ; Minimum y-coordinate
y_end:          defw    5 * SCALE / 4           ; Maximum y-coordinate
y_step:         defw    SCALE / 60              ; y-coordinate step-width

iteration_max:  defb    30                      ; How many iterations
divergent:      defw    SCALE * 4
scale:          defw    SCALE
                defw    0

z_0:            defs    4,0
z_1:            defs    4,0

z_2:            defw    0

z_0_square:
z_0_square_low: defw    0
z_0_square_high:defw    0
z_1_square:
z_1_square_low: defw    0
z_1_square_high:defw    0


display:        defm    " .-+*=#@"              ; 8 characters for the display

hsv:            defm 0                          ; hsv color table
                defm 201, 200, 199, 198, 197
                defm 196, 202, 208, 214, 220
                defm 226, 190, 154, 118,  82
                defm 46,   47,  48,  49,  50
                defm 51,   45,  39,  33,  27
                defm 21,   57,  93, 129, 165

welcome:        defm    "Generating a Mandelbrot set"
crlf:           defm    cr, lf, eos
finished:       defm    esc, "[0mComputation finished.", cr, lf, eos
ansifg:         defm    esc, "[38;5;", eos
ansibg:         defm    esc, "[48;5;", eos
cls:            defm    esc, "[2J",eos

;------------------------------------------------------------------------------

SECTION code_user

PUBLIC _main

._main
        ld      de, cls                 ; VT100 clear screen
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF

        ld      de, welcome             ; Print a welcome message
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF

; for (y = <initial_value> ; y <= y_end; y += y_step)
; {
        ld      hl, (y_start)           ; y = y_start
        ld      (y), hl
.outer_loop
        ld      hl, (y_end)             ; Is y <= y_end?
        ld      de, (y)
        and     a                       ; Clear carry
        sbc     hl, de                  ; Perform the comparison
        jp      M, mandel_end           ; End of outer loop reached

;    for (x = x_start; x <= x_end; x += x_step)
;    {
        ld      hl, (x_start)           ; x = x_start
        ld      (x), hl
.inner_loop
        ld      hl, (x_end)             ; Is x <= x_end?
        ld      de, (x)
        and     a
        sbc     hl, de
        jp      M, inner_loop_end       ; End of inner loop reached

;       push af
;       push hl
;       ld hl,(x)
;       call asm_phexwd
;       ld a,':'
;       call asm_pchar
;       ld hl,(y)
;       call asm_phexwd
;       ld a,'>'
;       call asm_pchar
;       pop hl
;       pop af

;      z_0 = z_1 = 0;
        ld      hl, 0
        ld      (z_0), hl
        ld      (z_1), hl

;      for (iteration = iteration_max; iteration; iteration--)
;      {
        ld      a, (iteration_max)
        ld      b, a

.iteration_loop
        push    bc                      ; iteration -> stack

IF _APU
        call    apu_calc                ; HL now contains (z_0 ^ 2 - z_1 ^ 2) / scale
        ld      (z_2),hl                ; save z_2
ELSE

;        z2 = (z_0 * z_0 - z_1 * z_1) / scale;

        ld      hl, (z_1)               ; Compute DE HL = z_1 * z_1
        ld      d, h
        ld      e, l
        call    l_muls_32_16x16
        ld      (z_1_square_low), hl    ; z_1 ** 2 is needed later again
        ld      (z_1_square_high), de

        ld      hl, (z_0)               ; Compute DE HL = z_0 * z_0
        ld      d, h
        ld      e, l
        call    l_muls_32_16x16
        ld      (z_0_square_low), hl    ; z_1 ** 2 will be also needed
        ld      (z_0_square_high), de

        and     a                       ; Compute subtraction
        ld      bc, (z_1_square_low)
        sbc     hl, bc
        push    hl                      ; Save lower 16 bit of result
        ld      h, d
        ld      l, e
        ld      bc, (z_1_square_high)
        sbc     hl, bc
        pop     bc                      ; HL BC = z_0 ^ 2 - z_1 ^ 2

        ld      c, b                    ; Divide by scale = 256
        ld      b, l                    ; Discard the rest
        push    bc                      ; We need BC later

;        z_3 = 2 * z_0 * z_1 / scale;
        ld      hl, (z_0)               ; Compute DE HL = 2 * z_0 * z_1
        add     hl, hl
        ld      de, (z_1)
        call    l_muls_32_16x16

        ld      b, e                    ; Divide by scale (= 256)
        ld      c, h                    ; BC contains now z_3

;        z_1 = z_3 + y;
        ld      hl, (y)
        add     hl, bc
        ld      (z_1), hl

;        z_0 = z_2 + x;
        pop     bc                      ; Here BC is needed again :-)
        ld      hl, (x)
        add     hl, bc
        ld      (z_0), hl

;        if (z_0 * z_0 / scale + z_1 * z_1 / scale > 4 * scale)
        ld      hl, (z_0_square_low)    ; Use the squares computed
        ld      de, (z_1_square_low)    ; above
        add     hl, de
        ld      b, h                    ; BC contains lower word of sum
        ld      c, l

        ld      hl, (z_0_square_high)
        ld      de, (z_1_square_high)
        adc     hl, de

        ld      h, l                    ; HL now contains (z_0 ^ 2 -
        ld      l, b                    ; z_1 ^ 2) / scale

        ld      (z_2),hl                ; save z_2

ENDIF                                   ; IF _APU

        ld      bc,(divergent)
        and     a
        sbc     hl, bc

;          break;
        jr      C, iteration_dec        ; No break
        pop     bc                      ; Get latest iteration counter
        jr      iteration_end           ; Exit loop

;        iteration++;
.iteration_dec
        pop     bc                      ; Get iteration counter
        djnz    iteration_loop          ; We might fall through!
;      }
.iteration_end
;      printf("%c", display[iteration % 7]);
;       call    asciipixel              ; Print the character
        call    colorpixel              ; Print the character

;       ld a,' '
;       call asm_pchar
;       ld hl,(z_2)
;       call asm_phexwd                 ; print final z_2
;       ld a,' '
;       call asm_pchar
;       ld l,b
;       call asm_phex                   ; print the iteration
;       ld a,cr
;       call asm_pchar
;       ld a,lf
;       call asm_pchar
;       call asm_delay
;       call asm_delay

IF _CPM                                 ; cp/m ram model
        ld      c, condio
        call    bdos
ELSE
        ld      a,e
        rst     08
ENDIF

        ld      de, (x_step)            ; x += x_step
        ld      hl, (x)
        add     hl, de
        ld      (x), hl

        jp      inner_loop
;    }
;    printf("\n");
.inner_loop_end
        ld      de, crlf                ; Print a CR/LF pair
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF
        ld      de, (y_step)            ; y += y_step
        ld      hl, (y)
        add     hl, de
        ld      (y), hl                 ; Store new y-value

        jp      outer_loop
; }

.mandel_end
        call    asm_delay
        call    asm_delay
        ld      de, finished            ; Print finished-message
IF _CPM                                 ; cp/m ram model
        jp      prtmesg                 ; Return to CP/M
ELSE
        jp      asm_pstring             ; Return to Basic
ENDIF


.colorpixel
        ld      a,b                     ; iter count in B -> C
        and     $1F                     ; lower five bits only
        ld      c,a
        ld      b,0
        ld      hl, hsv                 ; get ANSI color code
        add     hl, bc
        ld      a,(hl)
        call    setcolor
        ld      e, pixel                ; show pixel
        ret

.asciipixel
        ld      a, b                    ; iter count in B -> L
        and     $07                     ; lower three bits only
        sbc     hl, hl
        ld      l, a
        ld      de, display             ; Get start of character array
        add     hl, de                  ; address and load the
        ld      e, (hl)                 ; character to be printed
        ret

.setcolor
        push    af                      ; save accumulator
        ld      de,ansifg               ; start ANSI control sequence to set foreground color
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF
        pop     af
        call    printdec                ; print ANSI color code
IF _CPM                                 ; cp/m ram model
        ld      e,'m'                   ; finish control sequence
        ld      c, condio
        jp      bdos
ELSE
        ld      a,'m'                   ; finish control sequence
        jp      asm_pchar
ENDIF

.printdec
        ld      c,-100                  ; print 100s place
        call    pd1
        ld      c,-10                   ; 10s place
        call    pd1
        ld      c,-1                    ; 1s place
.pd1
        ld      e,'0'-1                 ; start ASCII right before 0
.pd2
        inc     e                       ; increment ASCII code
        add     a,c                     ; subtract 1 place value
        jr      C,pd2                   ; loop until negative
        sub     c                       ; add back the last value
        push    af                      ; save accumulator
        ld      a,-1                    ; are we in the ones place?
        cp      c
        jr      Z,pd3                   ; if so, skip to output
        ld      a,'0'                   ; don't print leading 0s
        cp      e
        jr      Z,pd4
.pd3
IF _CPM                                 ; cp/m ram model
        ld      c, condio
        call    bdos
ELSE
        ld      a,e
        rst     08h
ENDIF
.pd4
        pop     af                      ; restore accumulator
        ret

;==============================================================================

IF _APU

include "../libsrc/_DEVELOPMENT/target/am9511/config_am9511_private.inc"

EXTERN  asm_am9511_pushl_fastcall, asm_am9511_pushi_fastcall
EXTERN  asm_am9511_popl, asm_am9511_popi

.apu_calc

IF _DOUBLE

;;;;;;; double calc

        ld hl,z_0                       ; Extend 16 bit z_0 to 32 bit
        inc hl
        ld a,(hl)
        add a,a                         ; Put sign bit into carry
        sbc a,a                         ; A = 0 if carry == 0, $FF otherwise
        inc hl
        ld (hl),a
        inc hl
        ld (hl),a

        ld hl,z_1                       ; Extend 16 bit z_1 to 32 bit
        inc hl
        ld a,(hl)
        add a,a                         ; Put sign bit into carry
        sbc a,a                         ; A = 0 if carry == 0, $FF otherwise
        inc hl
        ld (hl),a
        inc hl
        ld (hl),a

;       z_2 = (z_0 * z_0 - z_1 * z_1) / scale;

        ld hl,(z_0+2)
        ex de,hl
        ld hl,(z_0)
        call asm_am9511_pushl_fastcall  ; OPERAND IN DEHL

        ld a,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_DMUL           ; COMMAND for DMUL (multiply lower)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        call asm_am9511_popl            ; RESULT IN DEHL
        ld (z_0_square),hl
        ex de,hl
        ld (z_0_square+2),hl

        ld hl,(z_1+2)
        ex de,hl
        ld hl,(z_1)
        call asm_am9511_pushl_fastcall  ; OPERAND IN DEHL

        ld a,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_DMUL           ; COMMAND for DMUL (multiply lower)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        call asm_am9511_popl            ; RESULT IN DEHL
        ld (z_1_square),hl
        ex de,hl
        ld (z_1_square+2),hl

        ld a,__IO_APU_OP_DSUB           ; COMMAND for DSUB (subtract double)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld hl,(scale+2)
        ex de,hl
        ld hl,(scale)
        call asm_am9511_pushl_fastcall  ; OPERAND IN DEHL

        ld a,__IO_APU_OP_DDIV           ; COMMAND for DDIV (divide double)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_POPS           ; COMMAND for POPS (pop single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

;       z_0 = z_2 + x;

        ld hl,(x)
        call asm_am9511_pushi_fastcall  ; OPERAND IN HL

        ld a,__IO_APU_OP_SADD           ; COMMAND for SADD (add single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

;       z_3 = 2 * z_0 * z_1 / scale;

        ld hl,(z_0+2)
        ex de,hl
        ld hl,(z_0)
        call asm_am9511_pushl_fastcall  ; OPERAND IN DEHL

        ld a,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_DADD           ; COMMAND for DADD (add double)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld hl,(z_1+2)
        ex de,hl
        ld hl,(z_1)
        call asm_am9511_pushl_fastcall  ; OPERAND IN DEHL

        ld a,__IO_APU_OP_DMUL           ; COMMAND for DMUL (multiply lower)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld hl,(scale+2)
        ex de,hl
        ld hl,(scale)
        call asm_am9511_pushl_fastcall  ; OPERAND IN DEHL

        ld a,__IO_APU_OP_DDIV           ; COMMAND for DDIV (divide double)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

;       z_1 = z_3 + y;

        ld a,__IO_APU_OP_POPS           ; COMMAND for POPS (pop single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld hl,(y)
        call asm_am9511_pushi_fastcall  ; OPERAND IN HL

        ld a,__IO_APU_OP_SADD           ; COMMAND for SADD (add single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        call asm_am9511_popi            ; RESULT IN HL
        ld (z_1),hl

        call asm_am9511_popi            ; RESULT IN HL
        ld (z_0),hl

;       if (z_0 * z_0 / scale + z_1 * z_1 / scale > 4 * scale)

        ld hl,(z_0_square+2)
        ex de,hl
        ld hl,(z_0_square)
        call asm_am9511_pushl_fastcall  ; OPERAND IN DEHL

        ld hl,(z_1_square+2)
        ex de,hl
        ld hl,(z_1_square)
        call asm_am9511_pushl_fastcall  ; OPERAND IN DEHL

        ld a,__IO_APU_OP_DADD           ; COMMAND for DADD (add double)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld hl,(scale+2)
        ex de,hl
        ld hl,(scale)
        call asm_am9511_pushl_fastcall  ; OPERAND IN DEHL

        ld a,__IO_APU_OP_DDIV           ; COMMAND for DDIV (divide double)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_POPS           ; COMMAND for POPS (pop single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        call asm_am9511_popi            ; RESULT IN HL

ELSE                                    ; IF !_DOUBLE

;;;;;;; float calc

;       z_2 = (z_0 * z_0 - z_1 * z_1) / scale;

        ld hl,z_0
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_FMUL           ; COMMAND for FMUL (multiply float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld hl,z_0_square
        ld bc,__IO_APU_OP_REM32         ; REMOVE 32 bit (float) to z_0_square
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld hl,z_1
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_FMUL           ; COMMAND for FMUL (multiply float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld hl,z_1_square
        ld bc,__IO_APU_OP_REM32         ; REMOVE 32 bit (float) to z_1_square
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FSUB           ; COMMAND for FSUB (subtract float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld hl,scale
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_FDIV           ; COMMAND for FDIV (divide float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_FIXS           ; COMMAND for FIXS (fix single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

;       z_0 = z_2 + x;

        ld hl,x
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (word)
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_SADD           ; COMMAND for SADD (add single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

;       z_3 = 2 * z_0 * z_1 / scale;

        ld hl,z_0
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_FADD           ; COMMAND for FADD (add float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND


        ld hl,z_1
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_FMUL           ; COMMAND for FMUL (multiply float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld hl,scale
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_FDIV           ; COMMAND for FDIV (divide float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_FIXS           ; COMMAND for FIXS (fix single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

;       z_1 = z_3 + y;

        ld hl,y
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (word)
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_SADD           ; COMMAND for SADD (add single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld hl,z_1
        ld bc,__IO_APU_OP_REM16         ; REMOVE 16 bit (word) to z_1
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld hl,z_0
        ld bc,__IO_APU_OP_REM16         ; REMOVE 16 bit (word) to z_0
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

;       if (z_0 * z_0 / scale + z_1 * z_1 / scale > 4 * scale)

        ld hl,z_0_square
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (float) from z_0_square
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld hl,z_1_square
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (float) from z_1_square
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FADD           ; COMMAND for FADD (add float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld hl,scale
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (word)
        call asm_am9511_opp             ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_FDIV           ; COMMAND for FDIV (divide float)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        ld a,__IO_APU_OP_FIXS           ; COMMAND for FIXS (fix single)
        out (__IO_APU_CONTROL),a        ; ENTER a COMMAND

        call asm_am9511_popi            ; RESULT IN HL

ENDIF                                   ; IF !_DOUBLE

ELSE                                    ; IF !_APU


;
; Compute DEHL = DE * HL (signed): This routine is not too clever but it
; works. It is based on a standard 16-by-16 multiplication routine for unsigned
; integers. At the beginning the sign of the result is determined based on the
; signs of the operands which are negated if necessary. Then the unsigned
; multiplication takes place, followed by negating the result if necessary.
;

.l_muls_32_16x16
       ld b,d                       ; d = MSB of multiplicand
       ld c,h                       ; h = MSB of multiplier
       push bc                      ; save sign info

       bit 7,d
       jr Z,l_pos_de                ; take absolute value of multiplicand

       ld a,e
       cpl
       ld e,a
       ld a,d
       cpl
       ld d,a
       inc de

.l_pos_de
       bit 7,h
       jr Z,l_pos_hl                ; take absolute value of multiplier

       ld a,l
       cpl
       ld l,a
       ld a,h
       cpl
       ld h,a
       inc hl

.l_pos_hl                           ; prepare unsigned dehl = de x hl

;------------------------------------------------------------------------------
;
; Made by Runer112
; Analysed by Zeda
; https://raw.githubusercontent.com/Zeda/z80float/master/common/mul16.z80
; Tested by jacobly
;
; DE*HL --> DEHL
;
; enter : de   = 16-bit multiplicand  = x
;         hl   = 16-bit multiplier = y
;
; exit  : dehl = 32-bit product
;
; uses  : af, bc, de, hl

.l_mulu_32_16x16

        ld a,d
        ld d,0
        ld b,h
        ld c,l

        add a,a
        jr C,bit14
        add a,a
        jr C,bit13
        add a,a
        jr C,bit12
        add a,a
        jr C,bit11
        add a,a
        jr C,bit10
        add a,a
        jr C,bit9
        add a,a
        jr C,bit8
        add a,a
        jr C,bit7

        ld a,e
        and %11111110
        add a,a
        jr C,bit6
        add a,a
        jr C,bit5
        add a,a
        jr C,bit4
        add a,a
        jr C,bit3
        add a,a
        jr C,bit2
        add a,a
        jr C,bit1
        add a,a
        jr C,bit0
        rr e
        jr C,continue

        ld h,d
        ld l,e
        jp continue

.bit14
        add hl,hl
        adc a,a
        jr NC,bit13
        add hl,bc
        adc a,d

.bit13
        add hl,hl
        adc a,a
        jr NC,bit12
        add hl,bc
        adc a,d

.bit12
        add hl,hl
        adc a,a
        jr NC,bit11
        add hl,bc
        adc a,d

.bit11
        add hl,hl
        adc a,a
        jr NC,bit10
        add hl,bc
        adc a,d

.bit10
        add hl,hl
        adc a,a
        jr NC,bit9
        add hl,bc
        adc a,d

.bit9
        add hl,hl
        adc a,a
        jr NC,bit8
        add hl,bc
        adc a,d

.bit8
        add hl,hl
        adc a,a
        jr NC,bit7
        add hl,bc
        adc a,d

.bit7
        ld d,a
        ld a,e
        and %11111110
        add hl,hl
        adc a,a
        jr NC,bit6
        add hl,bc
        adc a,0

.bit6
        add hl,hl
        adc a,a
        jr NC,bit5
        add hl,bc
        adc a,0

.bit5
        add hl,hl
        adc a,a
        jr NC,bit4
        add hl,bc
        adc a,0

.bit4
        add hl,hl
        adc a,a
        jr NC,bit3
        add hl,bc
        adc a,0

.bit3
        add hl,hl
        adc a,a
        jr NC,bit2
        add hl,bc
        adc a,0

.bit2
        add hl,hl
        adc a,a
        jr NC,bit1
        add hl,bc
        adc a,0

.bit1
        add hl,hl
        adc a,a
        jr NC,bit0
        add hl,bc
        adc a,0

.bit0
        add hl,hl
        adc a,a
        jr C,funkyCarry
        rr e
        ld e,a
        jr NC,continue
        add hl,bc
        jr NC,continue
        inc e
        jr NZ,continue
        inc d

.continue
;------------------

        pop bc                      ; recover sign info from multiplicand and multiplier
        ld a,b
        xor c
        ret P                       ; return if positive product

        ld a,l                      ; negate product and return
        cpl
        ld l,a
        ld a,h
        cpl
        ld h,a
        ld a,e
        cpl
        ld e,a
        ld a,d
        cpl
        ld d,a
        inc l
        ret NZ
        inc h
        ret NZ
        inc de
        ret

.funkyCarry
        inc d
        rr e
        ld e,a
        ret NC
        add hl,bc
        ret NC
        inc e
        jp continue

ENDIF                               ; IF _APU

;==============================================================================
;       DELAY SUBROUTINE
;

.asm_delay
        push bc
        ld bc, $00
.asm_delay_loop
        ex (sp), hl
        ex (sp), hl
        djnz asm_delay_loop
        pop bc
        ret

;==============================================================================
;       OUTPUT SUBROUTINES
;

IF _CPM

;   Print message pointed to by (DE). It will end with a '$'.
;   modifies AF, DE, & HL
.prtmesg
        ld      a,(de)              ; Get character from DE address
        cp      eos
        ret     Z
        inc     de
        push    de                  ;otherwise, bump pointer and print it.
        ld      e,a
        ld      c, condio
        call    bdos
        pop     de
        jp      prtmesg

ELSE

defc    asm_pchar = 0008h           ; address of pchar

; print string from location in DE to ACIA, modifies AF, DE, & HL
.asm_pstring
        ld a,(de)                   ; Get character from DE address
        or a                        ; Is it $00 ?
        ret Z                       ; Then return on terminator
        rst 08h                     ; Print it
        inc de                      ; Point to next character
        jp asm_pstring              ; Continue until $00

; print contents of HL as 16 bit number in ASCII HEX, modifies AF & HL
.asm_phexwd
        push hl
        ld l,h                      ; high byte to L
        call asm_phex
        pop hl                      ; recover HL, for low byte in L
        call asm_phex
        ret

; print contents of L as 8 bit number in ASCII HEX to ACIA, modifies AF & HL
.asm_phex
        ld a,l                      ; modifies AF, HL
        push af
        rrca
        rrca
        rrca
        rrca
        call  asm_phex_conv
        pop af
.asm_phex_conv
        and  $0F
        add  a,$90
        daa
        adc  a,$40
        daa
        jp asm_pchar                ; print character on ACIA

ENDIF                               ; IF _CPM

;==============================================================================


## mandel_multi_APU.asm
;
;  Compute a Mandelbrot set on a simple Z80 computer.
;
; From https://rosettacode.org/wiki/Mandelbrot_set#Z80_Assembly
; Adapted to CP/M and colorzied by J.B. Langston
;
; Assemble with z88dk for RC2014 CP/M
; zcc +rc2014 -subtype=cpm -v -m --list --am9511 mandel_multi_APU.asm -o mandel_multi -create-app
;
; Assemble with z88dk for RC2014 BASIC
; zcc +rc2014 -subtype=basic -v -m --list --am9511 mandel_multi_APU.asm -o mandel_multi -create-app
;
; CP/M
; Upload using XMODEM; mandel_multi.bin -> mandel-m.com
;
; MS Basic
; Upload using hload or SCM: cat > /dev/ttyUSB0 < ./mandel_multi.ihx
;
; To calculate the theoretical minimum time at 115200 baud.
; Normally 10 colour codes, and 1 character per point.
; A line is (3 x 80) x 11 + CR + LF = 2642 characters
; There are 10 x 60 / 4 lines = 150 lines
; Therefore 396,300 characters need to be transmitted.
; Serial rate is 115200 baud or 14,400 8 bit characters per second
;
; Therefore the theoretical minimum time is 27.52 seconds.
;
; Results          FCPU         Original    Table mlt   Runer mlt
; RC2014 CP/M    7.432MHz         4'58"       3'48"       2'53"
; RC2014 BASIC   7.432MHz         4'58"       3'55"
;
; Results          FCPU         Original    Optimised   z180 mlt
; YAZ180 CP/M   18.432MHz         1'40"       1'24"       1'00"
; YAZ180 yabios 18.432Mhz                     1'14"         54"
; YAZ180 CP/M   36.864MHz         1'06"         58"         46"
; YAZ180 yabios 36.864MHz                       56"         45"
;
;
; Porting this program to another Z80 platform should be easy and straight-
; forward: The only dependencies on my homebrew machine are the system-calls
; used to print strings and characters. These calls are performed by loading
; IX with the number of the system-call and performing an RST 08. To port this
; program to another operating system just replace these system-calls with
; the appropriate versions. Only three system-calls are used in the following:
; _crlf: Prints a CR/LF, _puts: Prints a 0-terminated string (the adress of
; which is expected in HL), and _putc: Print a single character which is
; expected in A. RST 0 give control back to the monitor.
;

include "config_rc2014_private.inc"

;
; Configuration of CP/M or Basic & APU Double or Float
; NOTE TO DO: APU Float is NOT CONVERTED FROM YAZ180 Calling, won't work as is with RC2014
;

defc _CPM       = 0
defc _APU       = 1
defc _DOUBLE    = 1

defc cr         = $0D                     ; carriage return
defc lf         = $0A                     ; line feed
defc esc        = $1B                     ; escape

IF _CPM                                   ; cp/m ram model
defc bdos       = $05                     ; bdos vector
defc condio     = 6                       ; console direct I/O call
defc prints     = 9                       ; print string bdos call

defc eos        = '$'                     ; end of string marker
ELSE
defc eos        = $00                     ; end of string marker
ENDIF

defc pixel      = '#'                     ; character to output for pixel
defc SCALE      = 256                     ; Do NOT change this - the
                                          ; arithmetic routines rely on
                                          ; this scaling factor! :-)

;------------------------------------------------------------------------------

SECTION  data_user

x:              defw    0                       ; x-coordinate
x_start:        defw    -2 * SCALE              ; Minimum x-coordinate
x_end:          defw    1 * SCALE               ; Maximum x-coordinate
x_step:         defw    SCALE / 80              ; x-coordinate step-width

y:              defw    0                       ; y-coordinate
y_start:        defw    -5 * SCALE / 4          ; Minimum y-coordinate
y_end:          defw    5 * SCALE / 4           ; Maximum y-coordinate
y_step:         defw    SCALE / 60              ; y-coordinate step-width

iteration_max:  defb    30                      ; How many iterations
divergent:      defw    SCALE * 4
scale:          defw    SCALE
                defw    0

z_0:            defs    4,0
z_1:            defs    4,0

z_2:            defw    0

z_0_square:
z_0_square_low: defw    0
z_0_square_high:defw    0
z_1_square:
z_1_square_low: defw    0
z_1_square_high:defw    0


display:        defm    " .-+*=#@"              ; 8 characters for the display

hsv:            defm 0                          ; hsv color table
                defm 201, 200, 199, 198, 197
                defm 196, 202, 208, 214, 220
                defm 226, 190, 154, 118,  82
                defm 46,   47,  48,  49,  50
                defm 51,   45,  39,  33,  27
                defm 21,   57,  93, 129, 165

welcome:        defm    "Generating a Mandelbrot set"
crlf:           defm    cr, lf, eos
finished:       defm    esc, "[0mComputation finished.", cr, lf, eos
ansifg:         defm    esc, "[38;5;", eos
ansibg:         defm    esc, "[48;5;", eos
cls:            defm    esc, "[2J",eos

;------------------------------------------------------------------------------

SECTION code_user

PUBLIC _main

._main
        ld      de, cls                 ; VT100 clear screen
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF

        ld      de, welcome             ; Print a welcome message
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF

; for (y = <initial_value> ; y <= y_end; y += y_step)
; {
        ld      hl, (y_start)           ; y = y_start
        ld      (y), hl
.outer_loop
        ld      hl, (y_end)             ; Is y <= y_end?
        ld      de, (y)
        and     a                       ; Clear carry
        sbc     hl, de                  ; Perform the comparison
        jp      M, mandel_end           ; End of outer loop reached

;    for (x = x_start; x <= x_end; x += x_step)
;    {
        ld      hl, (x_start)           ; x = x_start
        ld      (x), hl
.inner_loop
        ld      hl, (x_end)             ; Is x <= x_end?
        ld      de, (x)
        and     a
        sbc     hl, de
        jp      M, inner_loop_end       ; End of inner loop reached

;       push af
;       push hl
;       ld hl,(x)
;       call asm_phexwd
;       ld a,':'
;       call asm_pchar
;       ld hl,(y)
;       call asm_phexwd
;       ld a,'>'
;       call asm_pchar
;       pop hl
;       pop af

;      z_0 = z_1 = 0;
        ld      hl, 0
        ld      (z_0), hl
        ld      (z_1), hl

;      for (iteration = iteration_max; iteration; iteration--)
;      {
        ld      a, (iteration_max)
        ld      b, a

.iteration_loop
        push    bc                      ; iteration -> stack

IF _APU
        call    apu_calc                ; HL now contains (z_0 ^ 2 - z_1 ^ 2) / scale
        ld      (z_2),hl                ; save z_2
ELSE

;        z2 = (z_0 * z_0 - z_1 * z_1) / scale;

        ld      hl, (z_1)               ; Compute DE HL = z_1 * z_1
        ld      d, h
        ld      e, l
        call    l_muls_32_16x16
        ld      (z_1_square_low), hl    ; z_1 ** 2 is needed later again
        ld      (z_1_square_high), de

        ld      hl, (z_0)               ; Compute DE HL = z_0 * z_0
        ld      d, h
        ld      e, l
        call    l_muls_32_16x16
        ld      (z_0_square_low), hl    ; z_1 ** 2 will be also needed
        ld      (z_0_square_high), de

        and     a                       ; Compute subtraction
        ld      bc, (z_1_square_low)
        sbc     hl, bc
        push    hl                      ; Save lower 16 bit of result
        ld      h, d
        ld      l, e
        ld      bc, (z_1_square_high)
        sbc     hl, bc
        pop     bc                      ; HL BC = z_0 ^ 2 - z_1 ^ 2

        ld      c, b                    ; Divide by scale = 256
        ld      b, l                    ; Discard the rest
        push    bc                      ; We need BC later

;        z_3 = 2 * z_0 * z_1 / scale;
        ld      hl, (z_0)               ; Compute DE HL = 2 * z_0 * z_1
        add     hl, hl
        ld      de, (z_1)
        call    l_muls_32_16x16

        ld      b, e                    ; Divide by scale (= 256)
        ld      c, h                    ; BC contains now z_3

;        z_1 = z_3 + y;
        ld      hl, (y)
        add     hl, bc
        ld      (z_1), hl

;        z_0 = z_2 + x;
        pop     bc                      ; Here BC is needed again :-)
        ld      hl, (x)
        add     hl, bc
        ld      (z_0), hl

;        if (z_0 * z_0 / scale + z_1 * z_1 / scale > 4 * scale)
        ld      hl, (z_0_square_low)    ; Use the squares computed
        ld      de, (z_1_square_low)    ; above
        add     hl, de
        ld      b, h                    ; BC contains lower word of sum
        ld      c, l

        ld      hl, (z_0_square_high)
        ld      de, (z_1_square_high)
        adc     hl, de

        ld      h, l                    ; HL now contains (z_0 ^ 2 -
        ld      l, b                    ; z_1 ^ 2) / scale

        ld      (z_2),hl                ; save z_2

ENDIF                                   ; IF _APU

        ld      bc,(divergent)
        and     a
        sbc     hl, bc

;          break;
        jr      C, iteration_dec        ; No break
        pop     bc                      ; Get latest iteration counter
        jr      iteration_end           ; Exit loop

;        iteration++;
.iteration_dec
        pop     bc                      ; Get iteration counter
        djnz    iteration_loop          ; We might fall through!
;      }
.iteration_end
;      printf("%c", display[iteration % 7]);
;       call    asciipixel              ; Print the character
        call    colorpixel              ; Print the character

;       ld a,' '
;       call asm_pchar
;       ld hl,(z_2)
;       call asm_phexwd                 ; print final z_2
;       ld a,' '
;       call asm_pchar
;       ld l,b
;       call asm_phex                   ; print the iteration
;       ld a,cr
;       call asm_pchar
;       ld a,lf
;       call asm_pchar
;       call asm_delay
;       call asm_delay

IF _CPM                                 ; cp/m ram model
        ld      c, condio
        call    bdos
ELSE
        ld      a,e
        rst     08
ENDIF

        ld      de, (x_step)            ; x += x_step
        ld      hl, (x)
        add     hl, de
        ld      (x), hl

        jp      inner_loop
;    }
;    printf("\n");
.inner_loop_end
        ld      de, crlf                ; Print a CR/LF pair
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF
        ld      de, (y_step)            ; y += y_step
        ld      hl, (y)
        add     hl, de
        ld      (y), hl                 ; Store new y-value

        jp      outer_loop
; }

.mandel_end
        call    asm_delay
        call    asm_delay
        ld      de, finished            ; Print finished-message
IF _CPM                                 ; cp/m ram model
        jp      prtmesg                 ; Return to CP/M
ELSE
        jp      asm_pstring             ; Return to Basic
ENDIF


.colorpixel
        ld      a,b                     ; iter count in B -> C
        and     $1F                     ; lower five bits only
        ld      c,a
        ld      b,0
        ld      hl, hsv                 ; get ANSI color code
        add     hl, bc
        ld      a,(hl)
        call    setcolor
        ld      e, pixel                ; show pixel
        ret

.asciipixel
        ld      a, b                    ; iter count in B -> L
        and     $07                     ; lower three bits only
        sbc     hl, hl
        ld      l, a
        ld      de, display             ; Get start of character array
        add     hl, de                  ; address and load the
        ld      e, (hl)                 ; character to be printed
        ret

.setcolor
        push    af                      ; save accumulator
        ld      de,ansifg               ; start ANSI control sequence to set foreground color
IF _CPM                                 ; cp/m ram model
        call    prtmesg
ELSE
        call    asm_pstring
ENDIF
        pop     af
        call    printdec                ; print ANSI color code
IF _CPM                                 ; cp/m ram model
        ld      e,'m'                   ; finish control sequence
        ld      c, condio
        jp      bdos
ELSE
        ld      a,'m'                   ; finish control sequence
        jp      asm_pchar
ENDIF

.printdec
        ld      c,-100                  ; print 100s place
        call    pd1
        ld      c,-10                   ; 10s place
        call    pd1
        ld      c,-1                    ; 1s place
.pd1
        ld      e,'0'-1                 ; start ASCII right before 0
.pd2
        inc     e                       ; increment ASCII code
        add     a,c                     ; subtract 1 place value
        jr      C,pd2                   ; loop until negative
        sub     c                       ; add back the last value
        push    af                      ; save accumulator
        ld      a,-1                    ; are we in the ones place?
        cp      c
        jr      Z,pd3                   ; if so, skip to output
        ld      a,'0'                   ; don't print leading 0s
        cp      e
        jr      Z,pd4
.pd3
IF _CPM                                 ; cp/m ram model
        ld      c, condio
        call    bdos
ELSE
        ld      a,e
        rst     08h
ENDIF
.pd4
        pop     af                      ; restore accumulator
        ret

;==============================================================================

IF _APU

include "../libsrc/_DEVELOPMENT/target/am9511/config_am9511_private.inc"

EXTERN  asm_am9511_0_pushl_fastcall, asm_am9511_0_pushi_fastcall
EXTERN  asm_am9511_0_popl, asm_am9511_0_popi

EXTERN  asm_am9511_1_pushl_fastcall, asm_am9511_1_pushi_fastcall
EXTERN  asm_am9511_1_popl, asm_am9511_1_popi

.apu_calc

IF _DOUBLE

;;;;;;; double calc

        ld hl,z_0                       ; Extend 16 bit z_0 to 32 bit
        inc hl
        ld a,(hl)
        add a,a                         ; Put sign bit into carry
        sbc a,a                         ; A = 0 if carry == 0, $FF otherwise
        inc hl
        ld (hl),a
        inc hl
        ld (hl),a

        ld hl,z_1                       ; Extend 16 bit z_1 to 32 bit
        inc hl
        ld a,(hl)
        add a,a                         ; Put sign bit into carry
        sbc a,a                         ; A = 0 if carry == 0, $FF otherwise
        inc hl
        ld (hl),a
        inc hl
        ld (hl),a

;       z_2 = (z_0 * z_0 - z_1 * z_1) / scale;
;       z_0 = z_2 + x;

;       z_3 = 2 * z_0 * z_1 / scale;
;       z_1 = z_3 + y;

        ld hl,(z_0+2)
        ex de,hl
        ld hl,(z_0)
        call asm_am9511_0_pushl_fastcall; OPERAND IN DEHL

        ld a,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_DMUL           ; COMMAND for DMUL (multiply lower)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld hl,(z_0+2)
        ex de,hl
        ld hl,(z_0)
        call asm_am9511_1_pushl_fastcall; OPERAND IN DEHL

        ld a,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        out (__IO_APU1_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_DADD           ; COMMAND for DADD (add double)
        out (__IO_APU1_CONTROL),a       ; ENTER a COMMAND

        ld hl,(z_1+2)
        ex de,hl
        ld hl,(z_1)
        call asm_am9511_1_pushl_fastcall; OPERAND IN DEHL

        ld a,__IO_APU_OP_DMUL           ; COMMAND for DMUL (multiply lower)
        out (__IO_APU1_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        call asm_am9511_0_popl          ; RESULT IN DEHL
        ld (z_0_square),hl
        ex de,hl
        ld (z_0_square+2),hl


        ld hl,(z_1+2)
        ex de,hl
        ld hl,(z_1)
        call asm_am9511_0_pushl_fastcall; OPERAND IN DEHL

        ld a,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_DMUL           ; COMMAND for DMUL (multiply lower)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld hl,(scale+2)
        ex de,hl
        ld hl,(scale)
        call asm_am9511_1_pushl_fastcall; OPERAND IN DEHL

        ld a,__IO_APU_OP_DDIV           ; COMMAND for DDIV (divide double)
        out (__IO_APU1_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOD           ; COMMAND for PTOD (push double)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        call asm_am9511_0_popl          ; RESULT IN DEHL
        ld (z_1_square),hl
        ex de,hl
        ld (z_1_square+2),hl

        ld a,__IO_APU_OP_DSUB           ; COMMAND for DSUB (subtract double)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld hl,(scale+2)
        ex de,hl
        ld hl,(scale)
        call asm_am9511_0_pushl_fastcall; OPERAND IN DEHL

        ld a,__IO_APU_OP_DDIV           ; COMMAND for DDIV (divide double)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_POPS           ; COMMAND for POPS (pop single)
        out (__IO_APU1_CONTROL),a       ; ENTER a COMMAND

        ld hl,(y)
        call asm_am9511_1_pushi_fastcall; POINTER TO OPERAND IN HL

        ld a,__IO_APU_OP_SADD           ; COMMAND for SADD (add single)
        out (__IO_APU1_CONTROL),a       ; ENTER a COMMAND

        call asm_am9511_1_popi          ; RESULT IN HL
        ld (z_1),hl

        ld a,__IO_APU_OP_POPS           ; COMMAND for POPS (pop single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld hl,(x)
        call asm_am9511_0_pushi_fastcall; POINTER TO OPERAND IN HL

        ld a,__IO_APU_OP_SADD           ; COMMAND for SADD (add single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        call asm_am9511_0_popi          ; RESULT IN HL
        ld (z_0),hl

;       if (z_0 * z_0 / scale + z_1 * z_1 / scale > 4 * scale)

        ld hl,(z_0_square+2)
        ex de,hl
        ld hl,(z_0_square)
        call asm_am9511_0_pushl_fastcall; OPERAND IN DEHL

        ld hl,(z_1_square+2)
        ex de,hl
        ld hl,(z_1_square)
        call asm_am9511_0_pushl_fastcall ; OPERAND IN DEHL

        ld a,__IO_APU_OP_DADD           ; COMMAND for DADD (add double)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld hl,(scale+2)
        ex de,hl
        ld hl,(scale)
        call asm_am9511_0_pushl_fastcall; OPERAND IN DEHL

        ld a,__IO_APU_OP_DDIV           ; COMMAND for DDIV (divide double)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_POPS           ; COMMAND for POPS (pop single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        call asm_am9511_0_popi          ; RESULT IN HL

ELSE                                    ; IF !_DOUBLE

;;;;;;; float calc

;       z_2 = (z_0 * z_0 - z_1 * z_1) / scale;

        ld hl,z_0
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_FMUL           ; COMMAND for FMUL (multiply float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld hl,z_0_square
        ld bc,__IO_APU_OP_REM32         ; REMOVE 32 bit (float) to z_0_square
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld hl,z_1
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_FMUL           ; COMMAND for FMUL (multiply float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld hl,z_1_square
        ld bc,__IO_APU_OP_REM32         ; REMOVE 32 bit (float) to z_1_square
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FSUB           ; COMMAND for FSUB (subtract float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld hl,scale
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_FDIV           ; COMMAND for FDIV (divide float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_FIXS           ; COMMAND for FIXS (fix single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

;       z_0 = z_2 + x;

        ld hl,x
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (word)
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_SADD           ; COMMAND for SADD (add single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

;       z_3 = 2 * z_0 * z_1 / scale;

        ld hl,z_0
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_PTOF           ; COMMAND for PTOF (push float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_FADD           ; COMMAND for FADD (add float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND


        ld hl,z_1
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_FMUL           ; COMMAND for FMUL (multiply float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld hl,scale
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (single)
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_FDIV           ; COMMAND for FDIV (divide float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_FIXS           ; COMMAND for FIXS (fix single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

;       z_1 = z_3 + y;

        ld hl,y
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (word)
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_SADD           ; COMMAND for SADD (add single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld hl,z_1
        ld bc,__IO_APU_OP_REM16         ; REMOVE 16 bit (word) to z_1
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld hl,z_0
        ld bc,__IO_APU_OP_REM16         ; REMOVE 16 bit (word) to z_0
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

;       if (z_0 * z_0 / scale + z_1 * z_1 / scale > 4 * scale)

        ld hl,z_0_square
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (float) from z_0_square
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld hl,z_1_square
        ld bc,__IO_APU_OP_ENT32         ; ENTER 32 bit (float) from z_1_square
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FADD           ; COMMAND for FADD (add float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld hl,scale
        ld bc,__IO_APU_OP_ENT16         ; ENTER 16 bit (word)
        call asm_am9511_0_opp           ; POINTER TO OPERAND IN OPERAND BUFFER

        ld a,__IO_APU_OP_FLTS           ; COMMAND for FLTS (float single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_FDIV           ; COMMAND for FDIV (divide float)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        ld a,__IO_APU_OP_FIXS           ; COMMAND for FIXS (fix single)
        out (__IO_APU0_CONTROL),a       ; ENTER a COMMAND

        call asm_am9511_0_popi          ; RESULT IN HL

ENDIF                                   ; IF !_DOUBLE

ELSE                                    ; IF !_APU


;
; Compute DEHL = DE * HL (signed): This routine is not too clever but it
; works. It is based on a standard 16-by-16 multiplication routine for unsigned
; integers. At the beginning the sign of the result is determined based on the
; signs of the operands which are negated if necessary. Then the unsigned
; multiplication takes place, followed by negating the result if necessary.
;

.l_muls_32_16x16
       ld b,d                       ; d = MSB of multiplicand
       ld c,h                       ; h = MSB of multiplier
       push bc                      ; save sign info

       bit 7,d
       jr Z,l_pos_de                ; take absolute value of multiplicand

       ld a,e
       cpl
       ld e,a
       ld a,d
       cpl
       ld d,a
       inc de

.l_pos_de
       bit 7,h
       jr Z,l_pos_fastcall          ; take absolute value of multiplier

       ld a,l
       cpl
       ld l,a
       ld a,h
       cpl
       ld h,a
       inc hl

.l_pos_fastcall                     ; prepare unsigned dehl = de x hl

;------------------------------------------------------------------------------
;
; Made by Runer112
; Analysed by Zeda
; https://raw.githubusercontent.com/Zeda/z80float/master/common/mul16.z80
; Tested by jacobly
;
; DE*HL --> DEHL
;
; enter : de   = 16-bit multiplicand  = x
;         hl   = 16-bit multiplier = y
;
; exit  : dehl = 32-bit product
;
; uses  : af, bc, de, hl

.l_mulu_32_16x16

        ld a,d
        ld d,0
        ld b,h
        ld c,l

        add a,a
        jr C,bit14
        add a,a
        jr C,bit13
        add a,a
        jr C,bit12
        add a,a
        jr C,bit11
        add a,a
        jr C,bit10
        add a,a
        jr C,bit9
        add a,a
        jr C,bit8
        add a,a
        jr C,bit7

        ld a,e
        and %11111110
        add a,a
        jr C,bit6
        add a,a
        jr C,bit5
        add a,a
        jr C,bit4
        add a,a
        jr C,bit3
        add a,a
        jr C,bit2
        add a,a
        jr C,bit1
        add a,a
        jr C,bit0
        rr e
        jr C,continue

        ld h,d
        ld l,e
        jp continue

.bit14
        add hl,hl
        adc a,a
        jr NC,bit13
        add hl,bc
        adc a,d

.bit13
        add hl,hl
        adc a,a
        jr NC,bit12
        add hl,bc
        adc a,d

.bit12
        add hl,hl
        adc a,a
        jr NC,bit11
        add hl,bc
        adc a,d

.bit11
        add hl,hl
        adc a,a
        jr NC,bit10
        add hl,bc
        adc a,d

.bit10
        add hl,hl
        adc a,a
        jr NC,bit9
        add hl,bc
        adc a,d

.bit9
        add hl,hl
        adc a,a
        jr NC,bit8
        add hl,bc
        adc a,d

.bit8
        add hl,hl
        adc a,a
        jr NC,bit7
        add hl,bc
        adc a,d

.bit7
        ld d,a
        ld a,e
        and %11111110
        add hl,hl
        adc a,a
        jr NC,bit6
        add hl,bc
        adc a,0

.bit6
        add hl,hl
        adc a,a
        jr NC,bit5
        add hl,bc
        adc a,0

.bit5
        add hl,hl
        adc a,a
        jr NC,bit4
        add hl,bc
        adc a,0

.bit4
        add hl,hl
        adc a,a
        jr NC,bit3
        add hl,bc
        adc a,0

.bit3
        add hl,hl
        adc a,a
        jr NC,bit2
        add hl,bc
        adc a,0

.bit2
        add hl,hl
        adc a,a
        jr NC,bit1
        add hl,bc
        adc a,0

.bit1
        add hl,hl
        adc a,a
        jr NC,bit0
        add hl,bc
        adc a,0

.bit0
        add hl,hl
        adc a,a
        jr C,funkyCarry
        rr e
        ld e,a
        jr NC,continue
        add hl,bc
        jr NC,continue
        inc e
        jr NZ,continue
        inc d

.continue
;------------------

        pop bc                      ; recover sign info from multiplicand and multiplier
        ld a,b
        xor c
        ret P                       ; return if positive product

        ld a,l                      ; negate product and return
        cpl
        ld l,a
        ld a,h
        cpl
        ld h,a
        ld a,e
        cpl
        ld e,a
        ld a,d
        cpl
        ld d,a
        inc l
        ret NZ
        inc h
        ret NZ
        inc de
        ret

.funkyCarry
        inc d
        rr e
        ld e,a
        ret NC
        add hl,bc
        ret NC
        inc e
        jp continue

ENDIF                               ; IF _APU

;==============================================================================
;       DELAY SUBROUTINE
;

.asm_delay
        push bc
        ld bc, $00
.asm_delay_loop
        ex (sp), hl
        ex (sp), hl
        djnz asm_delay_loop
        pop bc
        ret

;==============================================================================
;       OUTPUT SUBROUTINES
;

IF _CPM

;   Print message pointed to by (DE). It will end with a '$'.
;   modifies AF, DE, & HL
.prtmesg
        ld      a,(de)              ; Get character from DE address
        cp      eos
        ret     Z
        inc     de
        push    de                  ;otherwise, bump pointer and print it.
        ld      e,a
        ld      c, condio
        call    bdos
        pop     de
        jp      prtmesg

ELSE

defc    asm_pchar = 0008h           ; address of pchar

; print string from location in DE to ACIA, modifies AF, DE, & HL
.asm_pstring
        ld a,(de)                   ; Get character from DE address
        or a                        ; Is it $00 ?
        ret Z                       ; Then return on terminator
        rst 08h                     ; Print it
        inc de                      ; Point to next character
        jp asm_pstring              ; Continue until $00

; print contents of HL as 16 bit number in ASCII HEX, modifies AF & HL
.asm_phexwd
        push hl
        ld l,h                      ; high byte to L
        call asm_phex
        pop hl                      ; recover HL, for low byte in L
        call asm_phex
        ret

; print contents of L as 8 bit number in ASCII HEX to ACIA, modifies AF & HL
.asm_phex
        ld a,l                      ; modifies AF, HL
        push af
        rrca
        rrca
        rrca
        rrca
        call  asm_phex_conv
        pop af
.asm_phex_conv
        and  $0F
        add  a,$90
        daa
        adc  a,$40
        daa
        jp asm_pchar                ; print character on ACIA

ENDIF                               ; IF _CPM

;==============================================================================


## Screenshot from 2018-05-23 01-00-22.png

      
    Raw
  

              Screenshot from 2018-05-23 01-00-22.png
            
          
## slowprint.py
import sys
import serial

from time import sleep

path = '/dev/ttyUSB0'
baud = 115200

ser = serial.Serial(path, baud, dsrdtr=False)

# print(ser)

for line in sys.stdin:

    for ch in line:
        ser.write( ch );

    ser.write('\r');
    ser.flush();
    sleep(0.1);

ser.close()
	;
	; Compute a Mandelbrot set on a simple Z80 computer.
	;
	; From https://rosettacode.org/wiki/Mandelbrot_set#Z80_Assembly
	; Adapted to CP/M and colorzied by J.B. Langston
	;
	; Assemble with z88dk for RC2014 CP/M
	; zcc +rc2014 -subtype=cpm -v -m --list mandel-feilipu-rc2014-lut.asm -o mand-lut -create-app
	;
	; Assemble with z88dk for RC2014 BASIC
	; zcc +rc2014 -subtype=basic -v -m --list mandel-feilipu-rc2014-lut.asm -o mand-lut -create-app
	;
	; CP/M
	; Upload using XMODEM; mandel-lut.bin -> mand-lut.com
	;
	; MS Basic
	; Upload using hload or SCM: cat > /dev/ttyUSB0 < ./mand-lut.ihx
	;
	; To calculate the theoretical minimum time at 115200 baud.
	; Normally 10 colour codes, and 1 character per point.
	; A line is (3 x 80) x 11 + CR + LF = 2642 characters
	; There are 10 x 60 / 4 lines = 150 lines
	; Therefore 396,300 characters need to be transmitted.
	; Serial rate is 115200 baud or 14,400 8 bit characters per second
	;
	; Therefore the theoretical minimum time is 27.52 seconds.
	;
	; Results FCPU Original Table mlt LUT mlt
	; RC2014 CP/M 7.432MHz 4'58" 3'48" 2'55"
	; RC2014 BASIC 7.432MHz 4'58" 3'55"
	;
	; Results FCPU Original Optimised z180 mlt
	; YAZ180 CP/M 18.432MHz 1'40" 1'24" 1'00"
	; YAZ180 yabios 18.432Mhz 1'14" 54"
	; YAZ180 CP/M 36.864MHz 1'06" 58" 46"
	; YAZ180 yabios 36.864MHz 56" 45"
	;
	;
	; Porting this program to another Z80 platform should be easy and straight-
	; forward: The only dependencies on my homebrew machine are the system-calls
	; used to print strings and characters. These calls are performed by loading
	; IX with the number of the system-call and performing an RST 08. To port this
	; program to another operating system just replace these system-calls with
	; the appropriate versions. Only three system-calls are used in the following:
	; _crlf: Prints a CR/LF, _puts: Prints a 0-terminated string (the adress of
	; which is expected in HL), and _putc: Print a single character which is
	; expected in A. RST 0 give control back to the monitor.
	;

	include "config_rc2014_private.inc"


	defc _CPM = 0

	defc cr = $0D ; carriage return
	defc lf = $0A ; line feed
	defc esc = $1B ; escape

	IF _CPM ; cp/m ram model
	defc bdos = $05 ; bdos vector
	defc condio = 6 ; console direct I/O call
	defc prints = 9 ; print string bdos call

	defc eos = '$' ; end of string marker
	ELSE
	defc eos = $00 ; end of string marker
	ENDIF

	defc pixel = '#' ; character to output for pixel
	defc SCALE = 256 ; Do NOT change this - the
	; arithmetic routines rely on
	; this scaling factor! :-)

	;------------------------------------------------------------------------------

	SECTION data_user

	x: defw 0 ; x-coordinate
	x_start: defw -2 * SCALE ; Minimum x-coordinate
	x_end: defw 1 * SCALE ; Maximum x-coordinate
	x_step: defw SCALE / 80 ; x-coordinate step-width

	y: defw 0 ; y-coordinate
	y_start: defw -5 * SCALE / 4 ; Minimum y-coordinate
	y_end: defw 5 * SCALE / 4 ; Maximum y-coordinate
	y_step: defw SCALE / 60 ; y-coordinate step-width

	iteration_max: defb 30 ; How many iterations
	divergent: defw SCALE * 4
	scale: defw SCALE
	defw 0

	z_0: defs 4,0
	z_1: defs 4,0

	z_2: defw 0

	z_0_square:
	z_0_square_low: defw 0
	z_0_square_high:defw 0
	z_1_square:
	z_1_square_low: defw 0
	z_1_square_high:defw 0


	display: defm " .-+*=#@" ; 8 characters for the display

	hsv: defm 0 ; hsv color table
	defm 201, 200, 199, 198, 197
	defm 196, 202, 208, 214, 220
	defm 226, 190, 154, 118, 82
	defm 46, 47, 48, 49, 50
	defm 51, 45, 39, 33, 27
	defm 21, 57, 93, 129, 165

	welcome: defm "Generating a Mandelbrot set"
	crlf: defm cr, lf, eos
	finished: defm esc, "[0mComputation finished.", cr, lf, eos
	ansifg: defm esc, "[38;5;", eos
	ansibg: defm esc, "[48;5;", eos
	cls: defm esc, "[2J",eos

	;------------------------------------------------------------------------------

	SECTION code_user

	PUBLIC _main

	._main
	ld de, cls ; VT100 clear screen
	IF _CPM ; cp/m ram model
	call prtmesg
	ELSE
	call asm_pstring
	ENDIF

	ld de, welcome ; Print a welcome message
	IF _CPM ; cp/m ram model
	call prtmesg
	ELSE
	call asm_pstring
	ENDIF

	; for (y = <initial_value> ; y <= y_end; y += y_step)
	; {
	ld hl, (y_start) ; y = y_start
	ld (y), hl
	.outer_loop
	ld hl, (y_end) ; Is y <= y_end?
	ld de, (y)
	and a ; Clear carry
	sbc hl, de ; Perform the comparison
	jp M, mandel_end ; End of outer loop reached

	; for (x = x_start; x <= x_end; x += x_step)
	; {
	ld hl, (x_start) ; x = x_start
	ld (x), hl
	.inner_loop
	ld hl, (x_end) ; Is x <= x_end?
	ld de, (x)
	and a
	sbc hl, de
	jp M, inner_loop_end ; End of inner loop reached

	; z_0 = z_1 = 0;
	ld hl, 0
	ld (z_0), hl
	ld (z_1), hl

	; for (iteration = iteration_max; iteration; iteration--)
	; {
	ld a, (iteration_max)
	ld b, a

	.iteration_loop
	push bc ; iteration -> stack

	; z2 = (z_0 * z_0 - z_1 * z_1) / scale;

	ld hl, (z_1) ; Compute DE HL = z_1 * z_1
	ld d, h
	ld e, l
	call l_muls_32_16x16
	ld (z_1_square_low), hl ; z_1 ** 2 is needed later again
	ld (z_1_square_high), de

	ld hl, (z_0) ; Compute DE HL = z_0 * z_0
	ld d, h
	ld e, l
	call l_muls_32_16x16
	ld (z_0_square_low), hl ; z_1 ** 2 will be also needed
	ld (z_0_square_high), de

	and a ; Compute subtraction
	ld bc, (z_1_square_low)
	sbc hl, bc
	push hl ; Save lower 16 bit of result
	ld h, d
	ld l, e
	ld bc, (z_1_square_high)
	sbc hl, bc
	pop bc ; HL BC = z_0 ^ 2 - z_1 ^ 2

	ld c, b ; Divide by scale = 256
	ld b, l ; Discard the rest
	push bc ; We need BC later

	; z_3 = 2 * z_0 * z_1 / scale;
	ld hl, (z_0) ; Compute DE HL = 2 * z_0 * z_1
	add hl, hl
	ld de, (z_1)
	call l_muls_32_16x16

	ld b, e ; Divide by scale (= 256)
	ld c, h ; BC contains now z_3

	; z_1 = z_3 + y;
	ld hl, (y)
	add hl, bc
	ld (z_1), hl

	; z_0 = z_2 + x;
	pop bc ; Here BC is needed again :-)
	ld hl, (x)
	add hl, bc
	ld (z_0), hl

	; if (z_0 * z_0 / scale + z_1 * z_1 / scale > 4 * scale)
	ld hl, (z_0_square_low) ; Use the squares computed
	ld de, (z_1_square_low) ; above
	add hl, de
	ld b, h ; BC contains lower word of sum
	ld c, l

	ld hl, (z_0_square_high)
	ld de, (z_1_square_high)
	adc hl, de

	ld h, l ; HL now contains (z_0 ^ 2 -
	ld l, b ; z_1 ^ 2) / scale

	ld (z_2),hl ; save z_2

	ld bc,(divergent)
	and a
	sbc hl, bc

	; break;
	jr C, iteration_dec ; No break
	pop bc ; Get latest iteration counter
	jr iteration_end ; Exit loop

	; iteration++;
	.iteration_dec
	pop bc ; Get iteration counter
	djnz iteration_loop ; We might fall through!
	; }
	.iteration_end
	; printf("%c", display[iteration % 7]);
	; call asciipixel ; Print the character
	call colorpixel ; Print the character

	IF _CPM ; cp/m ram model
	ld c, condio
	call bdos
	ELSE
	ld a,e
	rst 08
	ENDIF

	ld de, (x_step) ; x += x_step
	ld hl, (x)
	add hl, de
	ld (x), hl

	jp inner_loop
	; }
	; printf("\n");
	.inner_loop_end
	ld de, crlf ; Print a CR/LF pair
	IF _CPM ; cp/m ram model
	call prtmesg
	ELSE
	call asm_pstring
	ENDIF
	ld de, (y_step) ; y += y_step
	ld hl, (y)
	add hl, de
	ld (y), hl ; Store new y-value

	jp outer_loop
	; }

	.mandel_end
	call asm_delay
	call asm_delay
	ld de, finished ; Print finished-message
	IF _CPM ; cp/m ram model
	jp prtmesg ; Return to CP/M
	ELSE
	jp asm_pstring ; Return to Basic
	ENDIF


	.colorpixel
	ld a,b ; iter count in B -> C
	and $1F ; lower five bits only
	ld c,a
	ld b,0
	ld hl, hsv ; get ANSI color code
	add hl, bc
	ld a,(hl)
	call setcolor
	ld e, pixel ; show pixel
	ret

	.asciipixel
	ld a, b ; iter count in B -> L
	and $07 ; lower three bits only
	sbc hl, hl
	ld l, a
	ld de, display ; Get start of character array
	add hl, de ; address and load the
	ld e, (hl) ; character to be printed
	ret

	.setcolor
	push af ; save accumulator
	ld de,ansifg ; start ANSI control sequence to set foreground color
	IF _CPM ; cp/m ram model
	call prtmesg
	ELSE
	call asm_pstring
	ENDIF
	pop af
	call printdec ; print ANSI color code
	IF _CPM ; cp/m ram model
	ld e,'m' ; finish control sequence
	ld c, condio
	jp bdos
	ELSE
	ld a,'m' ; finish control sequence
	jp asm_pchar
	ENDIF

	.printdec
	ld c,-100 ; print 100s place
	call pd1
	ld c,-10 ; 10s place
	call pd1
	ld c,-1 ; 1s place
	.pd1
	ld e,'0'-1 ; start ASCII right before 0
	.pd2
	inc e ; increment ASCII code
	add a,c ; subtract 1 place value
	jr C,pd2 ; loop until negative
	sub c ; add back the last value
	push af ; save accumulator
	ld a,-1 ; are we in the ones place?
	cp c
	jr Z,pd3 ; if so, skip to output
	ld a,'0' ; don't print leading 0s
	cp e
	jr Z,pd4
	.pd3
	IF _CPM ; cp/m ram model
	ld c, condio
	call bdos
	ELSE
	ld a,e
	rst 08h
	ENDIF
	.pd4
	pop af ; restore accumulator
	ret

	;==============================================================================
	;
	; Compute DEHL = DE * HL (signed): This routine is not too clever but it
	; works. It is based on a standard 16-by-16 multiplication routine for unsigned
	; integers. At the beginning the sign of the result is determined based on the
	; signs of the operands which are negated if necessary. Then the unsigned
	; multiplication takes place, followed by negating the result if necessary.
	;

	.l_muls_32_16x16
	ld b,d ; d = MSB of multiplicand
	ld c,h ; h = MSB of multiplier
	push bc ; save sign info

	bit 7,d
	jr Z,l_pos_de ; take absolute value of multiplicand

	ld a,e
	cpl
	ld e,a
	ld a,d
	cpl
	ld d,a
	inc de

	.l_pos_de
	bit 7,h
	jr Z,l_pos_hl ; take absolute value of multiplier

	ld a,l
	cpl
	ld l,a
	ld a,h
	cpl
	ld h,a
	inc hl

	.l_pos_hl ; prepare unsigned dehl = de x hl

	.l_mulu_32_16x16
	; multiplication of two 16-bit numbers into a 32-bit product
	;
	; enter : de = 16-bit multiplicand = x
	; hl = 16-bit multiplicand = y
	;
	; exit : dehl = 32-bit product
	; carry reset
	;
	; uses : af, bc, de, hl

	ld b,h ; 4 b = y1
	ld c,d ; 4 c = x1
	push bc ; 11 preserve y1*x1

	ld b,l ; 4 b = y0
	ld c,e ; 4 c = x0
	push bc ; 11 preserve y0*x0

	;;; MLT HE (xBC) ;;;;;;;;;;;;;;;; y1*x0
	ld c,__IO_LUT_OPERAND_LATCH ; 7 operand latch address
	ld b,h ; 4 operand Y in B
	out (c),e ; 12 operand X from E
	in e,(c) ; 12 result Z LSB to E
	inc c ; 4 result MSB address
	in h,(c) ; 12 result Z MSB to H

	;;; MLT DL (xBC) ;;;;;;;;;;;;;;;; x1*y0
	dec c ; 4 operand latch address
	ld b,d ; 4 operand Y in B
	out (c),l ; 12 operand X from L
	in l,(c) ; 12 result Z LSB to L
	inc c ; 4 result MSB address
	in d,(c) ; 12 result Z MSB to D

	xor a ; 4 zero A
	add hl,de ; 11 add cross products
	adc a,a ; 4 capture carry

	pop de ; 10 restore y0*x0

	;;; MLT DE (xBC) ;;;;;;;;;;;;;;;; y0*x0
	dec c ; 4 operand latch address
	ld b,d ; 4 operand Y in B
	out (c),e ; 12 operand X from A
	in e,(c) ; 12 result Z LSB to E
	inc c ; 4 result MSB address
	in d,(c) ; 12 result Z MSB to D

	ld b,a ; 4 carry from cross products

	ld a,d ; 4
	add a,l ; 4
	ld d,a ; 4 DE = final LSW

	ld l,h ; 4 LSB of MSW from cross products
	ld h,b ; 4 carry from cross products

	ex (sp),hl ; 19 restore y1*x1, stack interim p3 p2

	;;; MLT HL (xBC) ;;;;;;;;;;;;;;;; x1*y1
	dec c ; 4 operand latch address
	ld b,h ; 4 operand Y in B
	out (c),l ; 12 operand X from L
	in l,(c) ; 12 result Z LSB to L
	inc c ; 4 result MSB address
	in h,(c) ; 12 result Z MSB to H

	pop bc ; 10 destack interim p3 p2

	adc hl,bc ; 15 hl = final MSW
	ex de,hl ; 4 DEHL = final product

	;------------------

	pop bc ; recover sign info from multiplicand and multiplier
	ld a,b
	xor c
	ret P ; return if positive product

	ld a,l ; negate product and return
	cpl
	ld l,a
	ld a,h
	cpl
	ld h,a
	ld a,e
	cpl
	ld e,a
	ld a,d
	cpl
	ld d,a
	inc l
	ret NZ
	inc h
	ret NZ
	inc de
	ret

	;==============================================================================
	; DELAY SUBROUTINE
	;

	.asm_delay
	push bc
	ld bc, $00
	.asm_delay_loop
	ex (sp), hl
	ex (sp), hl
	djnz asm_delay_loop
	pop bc
	ret

	;==============================================================================
	; OUTPUT SUBROUTINES
	;

	IF _CPM

	; Print message pointed to by (DE). It will end with a '$'.
	; modifies AF, DE, & HL
	.prtmesg
	ld a,(de) ; Get character from DE address
	cp eos
	ret Z
	inc de
	push de ;otherwise, bump pointer and print it.
	ld e,a
	ld c, condio
	call bdos
	pop de
	jp prtmesg

	ELSE

	defc asm_pchar = 0008h ; address of pchar

	; print string from location in DE to ACIA, modifies AF, DE, & HL
	.asm_pstring
	ld a,(de) ; Get character from DE address
	or a ; Is it $00 ?
	ret Z ; Then return on terminator
	rst 08h ; Print it
	inc de ; Point to next character
	jp asm_pstring ; Continue until $00

	; print contents of HL as 16 bit number in ASCII HEX, modifies AF & HL
	.asm_phexwd
	push hl
	ld l,h ; high byte to L
	call asm_phex
	pop hl ; recover HL, for low byte in L
	call asm_phex
	ret

	; print contents of L as 8 bit number in ASCII HEX to ACIA, modifies AF & HL
	.asm_phex
	ld a,l ; modifies AF, HL
	push af
	rrca
	rrca
	rrca
	rrca
	call asm_phex_conv
	pop af
	.asm_phex_conv
	and $0F
	add a,$90
	daa
	adc a,$40
	daa
	jp asm_pchar ; print character on ACIA

	ENDIF ; IF _CPM

	;==============================================================================
	import sys
	import serial

	from time import sleep

	path = '/dev/ttyUSB0'
	baud = 115200

	ser = serial.Serial(path, baud, dsrdtr=False)

	# print(ser)

	for line in sys.stdin:

	for ch in line:
	ser.write( ch );

	ser.write('\r');
	ser.flush();
	sleep(0.1);

	ser.close()