|
;******************************* FLBOOT.TAL ******************************** |
|
; (C) Copyright 1987-1993 Computer System Architects, Provo UT. * |
|
; This program is the property of Computer System Architects (CSA) * |
|
; and is provided only as an example of a transputer/PC program for * |
|
; use with CSA's Transputer Education Kit and other transputer products. * |
|
; You may freely distribute copies or modifiy the program as a whole or in * |
|
; part, provided you insert in each copy appropriate copyright notices and * |
|
; disclaimer of warranty and send to CSA a copy of any modifications which * |
|
; you plan to distribute. * |
|
; This program is provided as is without warranty of any kind. CSA is not * |
|
; responsible for any damages arising out of the use of this program. * |
|
;***************************************************************************/ |
|
;**************************************************************************** |
|
; This program boots a transputer, does necessary initialization and |
|
; boots other transputers connected to it with an exact copy of itself. It |
|
; keeps track of links through which other trnasputers are booted. |
|
; After booting process, it loads the loader and sends copies of the loader |
|
; to other transputers connected. Then it starts executing the loader. |
|
;*************************************************************************** |
|
|
|
.t800 |
|
; val definition; |
|
|
|
.val RESERVE,16 |
|
.val CALLWSP,-4 |
|
.val INITIME,0 |
|
.val DELAY,16*2 |
|
.val T805, 0x0a ; Transputer type constants |
|
.val T800D, 0x60 |
|
.val T414B, 0x61 |
|
.val LOCTOP,16 ; Size of this program's workspace |
|
|
|
; These constants define offsets into the workspace for FLBOOT and FLLOAD too. |
|
|
|
.val MININT,1 ; Minimum integer |
|
.val MEMSTART,2 ; Memory location of program start |
|
.val BOOTIN,3 ; Transputer link that booted us |
|
.val BOOTOUT,4 ; Other 3 xputer links ;5 and 6 contain links out |
|
.val LDSTART,7 ; FLLOAD: load here! ;zeroed during BOOTOUT save |
|
.val ENTRYP,8 ; FLLOAD: start running loaded code from here! |
|
.val WSPACE,9 ; FLLOAD: loaded code uses this workspace address! |
|
.val LDADDR,10 ; FLLOAD's pointer to where to load the next chunk |
|
.val TRANTYPE,11 ; What kind of transputer are we using? |
|
.val CODELEN,12 ; Code (chunk) size ;high order bytes must be 0 |
|
.val BIDX,12 ; Temporary index into BOOTOUT. |
|
.val LOOPA,13 ; Loop counter for lend instruction, 1 of 2 |
|
.val LOOPB,14 ; Loop counter for lend instruction, 2 of 2 |
|
.val TLINK,15 ; Temporary pointer to channel control word |
|
.val WSP,15 ; Used to make workspaces for OUTBUF processes |
|
|
|
; Constants for OUTBUF process. |
|
|
|
.val OBUFWS,6 ; Space allocated to OUTBUF workspaces |
|
.val OLOCAL,3 ; OUTBUF wksp starts in the middle of those 6 words |
|
.val OSTATIC,1 ; OUTBUF arg: static ptr to FLBOOT workspace |
|
.val OLINK,2 ; OUTBUF arg: ptr to outbound channel control word |
|
|
|
|
|
.pub START |
|
; initialization |
|
|
|
.align |
|
|
|
; Welcome to the starting place. Code is loaded and per |
|
; https://www.transputer.net/iset/isbn-063201689-2/inside.pdf PDF page 31, |
|
; the workspace pointer points at the first empty word past the end of the |
|
; loaded code. The next call with an unusual *positive* argument to ajw |
|
; moves the workspace pointer 16 words up past the end of the code. The call |
|
; just afterwards saves Areg, Breg, and Creg in the top three words of that |
|
; workspace, but we'll only be using Creg in the code that follows here: it's |
|
; BOOTIN, the link that gave us the code. |
|
; |
|
; I think the choice of 16 words only *coincidentally* matches the 16 local |
|
; offset values found in the val definitions above. The call will put the |
|
; workspace pointer at end-of-code plus 12 words, which I suppose must be |
|
; adequate for any subroutine calls we might like to make. |
|
|
|
; Reserves 16 words of workspace, and perform code sizing. |
|
START: ajw RESERVE ;reserve work space |
|
call 0 ;save registers into workspace |
|
; Workspace is now shifted down four words and has stored: |
|
; +0: address of that call instr +1: Areg +2: Breg +3: Creg |
|
; at what was the top of the workspace. |
|
; The next two lines compute the absolute memory address of START. Ignore the |
|
; commented stuff; this is what's in Areg right now. We store it in MEMSTART. |
|
ldc @START - @S1 |
|
ldpi |
|
;ldl 0 ;return address of call |
|
;adc -3 ;compute memory start |
|
S1: stl MEMSTART ;save memory start |
|
; Now to compute the end of the code, which given all of the above is the |
|
; location of the workspace pointer minus 12. |
|
ldlp 4-RESERVE ;calc code length |
|
ldl MEMSTART |
|
diff |
|
stl CODELEN |
|
|
|
; Initialise process queues to empty. I'm not sure why we need MININT as a |
|
; local value, but we save that, too. (It's probably because ldl MININT uses |
|
; one byte to mint's two, although it's two cycles instead of one.) |
|
mint |
|
stl MININT ;save MIN INTEGER |
|
ldl MININT ;init process front pointers |
|
stlf |
|
ldl MININT |
|
sthf |
|
|
|
; Here's some mumbo-jumbo to identify what kind of transputer we're using. |
|
; The key is understanding operation 0x17c, the "lddevid" instruction. On |
|
; T414, T212, T222 it's a noop; on T800 it pops and leaves garbage in A, and |
|
; on any other it pushes a special device identifier. Best info so far here: |
|
; http://www.transputer.net/tn/61/tn61.html#x1-40003 |
|
ldc T800D ;find transtputer type |
|
ldc T414B |
|
ldc 0 |
|
opr 0x17c |
|
stl LOOPA |
|
; At this point, LOOPA now contains: 0 if T414, T212, T222 |
|
; Garbage if T800 |
|
; ID if anything else. |
|
stl LOOPB |
|
; At this point, LOOPB now contains: T414B if T414, T212, T222 |
|
; T800D if T800 |
|
; 0 if anything else |
|
; If LOOPB is 0, then LOOPA has the ID, so branch to store LOOPA in TRANTYPE if |
|
; LOOPB is 0. |
|
ldl LOOPB |
|
cj @nfp1 |
|
; Otherwise LOOPB contains our type; store it in TRANTYPE and go clear errors. |
|
ldl LOOPB |
|
stl TRANTYPE |
|
j @nfp2 |
|
nfp1 ldl LOOPA |
|
stl TRANTYPE |
|
|
|
; Now we clear all errors and make certain that we don't halt on error either. |
|
nfp2 |
|
testerr ;clear error flag |
|
clrhalterr ;clear halt on error |
|
ldl TRANTYPE ;clear fpu error flag if T800 or T805 |
|
eqc T800D |
|
ldl TRANTYPE |
|
eqc T805 |
|
or |
|
cj @I1 |
|
fptesterr |
|
|
|
; Puts MININT in the first 11 words of on-chip memory, which initialises all |
|
; four channel control words, the event control word, and the two timer process |
|
; queue pointers. |
|
I1: ldc 0 ;init links and event |
|
stl LOOPA |
|
ldc 11 |
|
stl LOOPB |
|
I2: ldl MININT |
|
ldl LOOPA |
|
ldl MININT |
|
wsub |
|
stnl 0 |
|
ldlp LOOPA |
|
ldc I3-I2 |
|
lend |
|
|
|
; Now start the timer at time 0. |
|
I3: ldc INITIME ;start timer |
|
sttimer |
|
|
|
|
|
|
|
|
|
|
|
; bootstrap neighbors |
|
|
|
; Point WSP at LOOPA. WSP is a pointer that we're using to refer to new |
|
; workspaces we'll construct for separate OUTBUF processes that are going to |
|
; bootstrap our neighbours. Each time we create a new workspace, we advance WSP |
|
; by six words. I think this makes room for the two argument words that OUTBUF |
|
; requires (at WSP+0 and WSP+1) plus whatever other words the Transputer wants |
|
; to use for its own bookkeeping (at WSP-4 to WSP-1). (Note also that WSP-1 is |
|
; used for the Iptr value for the new process.) |
|
ldlp LOCTOP-OLOCAL ;init new work space ptr |
|
stl WSP |
|
; Setting up to loop over all four neighbour links. |
|
ldc 0 ;loop i:= 0 for 4 each link |
|
stl LOOPA |
|
ldc 4 |
|
stl LOOPB |
|
; Evidently we want to skip over bootstrapping the neighbour that booted us. |
|
; We compare the pointers to memory locations that identify different links. |
|
; The four lowest+4 words in memory are the channel control words for the |
|
; physical input links, so we identify each iteration of the loop with |
|
; i-(MININT+4)... |
|
B1: ldl LOOPA ;if LINKIN[i] <> BOOTIN |
|
ldl MININT |
|
ldnlp 4 |
|
wsub |
|
; ...and compare it to BOOTIN. If it was the same, we skip the loop body |
|
; and prepare for the next iteration. |
|
ldl BOOTIN |
|
diff |
|
cj @B3 |
|
; Set WSP to six words past its current location. If this is the first time |
|
; through this loop, it'll point at Wptr+19, four words past WSP. |
|
ldl WSP ;alloc OUTBUF work space |
|
ldnlp OBUFWS |
|
stl WSP |
|
; Compute the link identifier of the outbound link we're going to bootstrap, |
|
; then store that in the workspace we're constructing for a separate process |
|
; that will send a copy of ourselves over the link. |
|
ldl LOOPA ;save LINKOUT[i] |
|
ldl MININT |
|
wsub |
|
ldl WSP |
|
stnl OLINK |
|
; We'll also save a pointer from that workspace to our workspace, a "static |
|
; link". This will allow the copying process to refer to values there. |
|
ldlp 0 ;save static link |
|
ldl WSP |
|
stnl OSTATIC |
|
; Compute the absolute memory location of the OUTBUF code. Stash it at WSP-1, |
|
; which is where the runp instruction will take the initial instruction pointer |
|
; value for the new process. We now have everything we need to start an OUTBUF. |
|
ldc OUTBUF-B2 ;save iptr to OUTBUF |
|
ldpi |
|
B2: ldl WSP |
|
stnl -1 |
|
ldl WSP ;load new work space ptr |
|
runp ;run output buffer at high pri |
|
; Bottom of the loop; point at loop counters, issue lend. That instruction also |
|
; can cause a task switch over to the output buffer processes we spawned. |
|
B3: ldlp LOOPA |
|
ldc B4-B1 |
|
lend ;end loop |
|
; Now waiting for spawned processes to finish. I have to assume that those |
|
; talking down links with no endpoint are just frozen, which is probably fine? |
|
; Also, what if we had a topology that had two routes to the same processor; |
|
; would we try to boot it twice? |
|
B4: ldtimer ;wait for any com. to finish |
|
adc DELAY |
|
tin |
|
|
|
|
|
|
|
; save BOOTOUT links and reset others except BOOTIN |
|
|
|
; BIDX is going to be an index into the BOOTOUT array. Naturally, we start it |
|
; at 0. |
|
ldc 0 ;BIDX:= 0 |
|
stl BIDX |
|
; We're going to loop over each input link and each output link. That's 8 links |
|
; in total, four pairs. |
|
ldc 0 ;loop i:= 0 for 8 each hard link |
|
stl LOOPA |
|
ldc 8 |
|
stl LOOPB |
|
R1: ldl LOOPA ;TLINK:= LINK[i] |
|
; TLINK is the pointer to the channel control word we're interested in during |
|
; this iteration. |
|
ldl MININT |
|
wsub |
|
stl TLINK |
|
; If we're currently dealing with an input link (links 4 through 7), skip ahead |
|
; past updating the BOOTOUT array and reset this channel. So the "if i < 4" |
|
; below refers to updating BOOTOUT. |
|
ldc 4 ;if i < 4 |
|
ldl LOOPA |
|
gt |
|
cj @R2 |
|
; As noted, set BOOTOUT[i] to 0. BOOTOUT is a 3-element array and we're going |
|
; to do this for i/LOOPA in 0..4, so we're going past the end of the array and |
|
; zeroing out LDSTART, but happily the val definitions above let us know that |
|
; this might happen. No worries, we're not using LDSTART yet. |
|
ldc 0 ;BOOTOUT[i]:= 0 |
|
ldl LOOPA |
|
ldlp BOOTOUT |
|
wsub |
|
stnl 0 |
|
; As noted, if TLINK (not "i"/LOOPA) is the output link whose paired input link |
|
; is BOOTIN, continue to the next loop iteration. |
|
ldl TLINK ;if TLINK+4 <> BOOTIN |
|
ldnlp 4 |
|
ldl BOOTIN |
|
diff |
|
cj @R3 |
|
; ## BEGIN MOD ## |
|
; For B008, let us skip Link 1 altogether! This will prevent us from attempting |
|
; to use the board's T212, which is accessed via Link 1 on slot 0. So if no |
|
; Transputers can use Link 1 as a downstream link, hopefully we can avoid the |
|
; freezing that happens when we try to use it for mandelzooming. |
|
ldl LOOPA ; Load the loop index |
|
adc -1 ; Decrement it by 1; if it was 1, it will be 0 now |
|
cj @R3 ; And if it's 0 now, skip to the next loop iteration |
|
; ## END MOD ## |
|
; If the value in the channel control word is NOT equal to MININT (that is, |
|
; NotProcess.p), skip ahead to reset the channel. I think that the logic here |
|
; is that a non-MININT channel control word means that there's still a process |
|
; waiting on the channel, and that's something that happens if a process is |
|
; hung on either (a) a channel that doesn't connect to anything or (b) maybe |
|
; a channel that has already received a copy of the loader, and therefore isn't |
|
; listenting to channels anymore? |
|
ldl TLINK ;if *TLINK = MININT |
|
ldnl 0 |
|
ldl MININT |
|
diff |
|
eqc 0 |
|
cj @R2 |
|
; If here, we know that the current channel control word connects to a |
|
; downstream Transputer. Save the address of that channel control word to |
|
; BOOTOUT[BIDX], then increment BIDX. |
|
ldl TLINK ;BOOTOUT[BIDX]:= TLINK |
|
ldl BIDX |
|
ldlp BOOTOUT |
|
wsub |
|
stnl 0 |
|
ldl BIDX ;BIDX:= BIDX+1 |
|
adc 1 |
|
stl BIDX |
|
; Then jump ahead to the next iteration because we DON'T want to reset this |
|
; channel to a known-downstream Transputer. |
|
j @R3 |
|
; Reset this channel. Either it's an output link to nothing, an output link |
|
; to a Transputer that isn't downstream of this Transputer (maybe it has |
|
; another neighbour that sent it FLBOOT first), or an input link that (thanks |
|
; to the check just below) isn't BOOTIN. |
|
R2: ldl TLINK ;if TLINK <> BOOTIN |
|
ldl BOOTIN |
|
diff |
|
cj @R3 |
|
ldl TLINK ;resetch(TLINK) |
|
resetch |
|
; Jump back and iterate over the BOOTOUT-update/resetting loop again. |
|
R3: ldlp LOOPA |
|
ldc R4-R1 |
|
lend ;end loop |
|
R4: |
|
|
|
|
|
|
|
; At this stage FLBOOT should be loaded and running on the self-configured |
|
; Transputer network, which must be (at its most complicated) a tree, but may |
|
; be as simple as a stick (okay, let's call it a chain or a pipe). They are |
|
; now all awaiting MORE CODE (that is, FLLOAD), which must be sent to the root |
|
; of the tree (chain) by the host computer and then passed along network |
|
; downlinks to all other Transputers. |
|
|
|
; TODO: Is there a race condition here? Think about this more --- how do we |
|
; know for sure that downstream Transputers are right here? |
|
|
|
; loader and executer setup |
|
|
|
; First, receive FLLOAD from the upstream link. Get the length as a byte (hey, |
|
; how come there's no INBYTE?) and then load that many bytes on top of |
|
; ourselves at MEMSTART? Seems risky --- we'd better not overwrite the code |
|
; we're just now about to execute! Fortunately, FLLOAD is pretty tiny. |
|
ldlp CODELEN ;input length of flood loader |
|
ldl BOOTIN |
|
ldc 1 |
|
in |
|
ldl MEMSTART ;input fload loader |
|
ldl BOOTIN |
|
ldl CODELEN |
|
in |
|
; We send out code to each of the (up to) 3 neighbours listed in BOOTOUT. This |
|
; is accomplished one after another in the "main thread" here --- no |
|
; subprocesses this time. |
|
; send loader to neighbor; |
|
H2: ldc 0 ;loop i:= 0 for 3 each BOOTOUT link |
|
stl LOOPA |
|
ldc 3 |
|
stl LOOPB |
|
; As indicated, put the LOOPAth element of BOOTOUT into TLINK. |
|
H3: ldl LOOPA ;TLINK:= BOOTOUT[i] |
|
ldlp BOOTOUT |
|
wsub |
|
ldnl 0 |
|
stl TLINK |
|
; Skip this BOOTOUT entry if it's empty (if it equals 0). |
|
ldl TLINK ;if TLINK <> 0 |
|
cj @H4 |
|
; Transfer the new code in the same way we got it: byte length first followed |
|
; by the code itself. (Would OUTBYTE have been longer? Hmm...) |
|
ldlp CODELEN ;send out length to neighbors |
|
ldl TLINK |
|
ldc 1 |
|
out |
|
ldl MEMSTART ;send out code to neighbors |
|
ldl TLINK |
|
ldl CODELEN |
|
out |
|
; Jump back and iterate over the code-sharing loop again. |
|
H4: ldlp LOOPA |
|
ldc H5-H3 |
|
lend ;end loop |
|
H5 |
|
; Construct arguments to FLLOAD. We tell it that it should load the code to |
|
; just past the end of our workspace (which will also be FLLOAD's workspace), |
|
; that it should jump to that location, and that the workspace for the code |
|
; that it loads should start four words prior to the our workspace. See |
|
; FLLOAD for how it uses those four words. |
|
ldlp LOCTOP ;init load start address |
|
stl LDSTART |
|
ldl LDSTART ;init entry point |
|
stl ENTRYP |
|
ldlp CALLWSP ;init work space |
|
stl WSPACE |
|
; At last, go run FLLOAD! |
|
ldl MEMSTART ;go to loader |
|
gcall |
|
|
|
|
|
; This is the buffer outputting process, and here we do use outbyte, go figure. |
|
; Attempts to send code of length OSTATIC->CODELEN starting from |
|
; OSTATIC->MEMSTART down OLINK, then quits. |
|
; output buffer |
|
|
|
OUTBUF: ldl OLINK ;load output link |
|
ldl OSTATIC ;load code length |
|
ldnl CODELEN |
|
outbyte ;output code length |
|
ldl OSTATIC ;load code start |
|
ldnl MEMSTART |
|
ldl OLINK ;load output link |
|
ldl OSTATIC ;load code length |
|
ldnl CODELEN |
|
out ;output code |
|
stopp |
|
|
|
.align |
|
END: .end |
Cut off from the description: A slightly-modified version of the Mandelzoom source code can be browsed at https://github.com/axelmuhr/T-Mandel
Important: This annotated code contains a modification that limits the ability of the worm to use outbound link 1 out of any Transputer but that allows Mandelzoom to run on a B008-based Transputer network without attempting to use the B008's built-in T212 (a 16-bit device). A "stock" version of Mandelzoom would lock up if it attempted to use a B212.
More details of this change here: https://groups.google.com/g/comp.sys.transputer/c/Hy7RYG-6bSk/m/UySgqvP3BAAJ