onelivesleft/gist:6b91d51c13f38170659cd16739e184f4

## gistfile1.txt
/*
Original c code; this is a routine from stbsprintf distilled down to the main flow control
Lots of lines were omitted; where it looks like there is redundant flow, imagine lines of code exist
between the redundant statements.
*/

original :: () {
    for (;;) {
        if (0 == (bf = buf = callback(buf, user, len)))
            goto done;

        for (;;) {
            while (((stbsp__uintptr)f) & 3) {
            schk1:
            if (f[0] == '%')
                goto scandd;
            schk2:
            if (f[0] == 0)
                goto endfmt;
            }
            for (;;) {
            if (((v ^ 0x25252525) - 0x01010101) & c)
                goto schk1;
            if ((v - 0x01010101) & c)
                goto schk2;
            if (callback)
                if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
                    goto schk1;
            }
        }

    scandd:
        for (;;) {
            switch (f[0]) {
                case '-':
                    continue;
                case '0':
                    goto flags_done;
                default:
                    goto flags_done;
            }
        }

    flags_done:
        switch (f[0]) {
            case 's':
                goto scopy;

            case 'G': // float
                goto dofloatfromg;

            case 'E': // float
            case 'e': // float
            doexpfromg:
                if (dp == STBSP__SPECIAL) {
                    goto scopy;
                }
                goto flt_lead;

            case 'f': // float
            doafloat:
            dofloatfromg:
                if (dp == STBSP__SPECIAL) {
                    goto scopy;
                }

            flt_lead:
                goto scopy;

            case 'B': // upper binary
                goto radixnum;

            case 'p': // pointer
            radixnum:
                if (n64 == 0) {
                    goto scopy;
                }
                goto scopy;

            case 'u': // unsigned
            case 'i':
            case 'd': // integer
                if (fl & STBSP__METRIC_SUFFIX) {
                    goto doafloat;
                }

            scopy:
                break;

            default: // unknown, just copy code
                goto scopy;
        }
    endfmt:
    done:
}


/*
First a rewrite with a hypothetical `skip` statement, which is a goto that only goes forwards, to a #label.
We also pretend that we can label while loops (you can fake this with for currently).
This required a small amount of reordering in the switch, but was a pretty direct conversion.
*/

with_skip :: () {
    while main: true {
        if (0 == (bf = buf = callback(buf, user, len)))
            skip done;

        while scandd: true {
            while schk1: (((stbsp__uintptr)f) & 3) {
                if (f[0] == '%')
                    break scandd;
                while schk2: true {
                    if (f[0] == 0)
                        break main;

                    while true {
                        if (((v ^ 0x25252525) - 0x01010101) & c)
                            continue schk1;
                        if ((v - 0x01010101) & c)
                            continue schk2;
                        if (callback)
                            if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
                                continue schk1;
                    }
                }
            }
        }

        while flags: true {
            switch (f[0]) {
                case '-':
                    continue;
                case '0':
                    break flags;
                default:
                    break flags;
            }
        }

        switch (f[0]) {
            case 's':
                skip scopy;

            case 'G': // float
                skip dofloatfromg;

            case 'u': // unsigned
            case 'i':
            case 'd': // integer
                if (fl & STBSP__METRIC_SUFFIX) {
                    skip doafloat;
                }
                skip scopy
                // The code for scopy now sits in the default section, so there is a change in performance characteristics:
                // in the original this block is favoured, while default needs a jump, here it is the other way around
                // Is there a way to write the code this way, while being 100% true to the original? The reason we moved this
                // block is to reorder for doafloat - we could duplicate doafloat instead, which would restore performance, but
                // incur some duplication.  Perhaps its the case that for 100% accuracy to c performance, we need a full goto...

            case 'E': // float
            case 'e': // float
            doexpfromg:
                if (dp == STBSP__SPECIAL) {
                    skip scopy;
                }
                skip flt_lead;

            case 'f': // float
                #label doafloat;
                #label dofloatfromg;
                if (dp == STBSP__SPECIAL) {
                    skip scopy;
                }

                #label flt_lead;
                skip scopy;

            case 'B': // upper binary
                skip radixnum;

            case 'p': // pointer
                #label radixnum;
                if (n64 == 0) {
                    skip scopy;
                }
                skip scopy;

            default: // unknown, just copy code
                #label scopy;
        }
    }
    #label done;
}


/*
Next a rewrite where instead of a `skip` statement, we can label any block,
and use `break` to break out of it.
This required much more restructuring of the code, and resulted in a much deeper nesting (flat is better than nested!).
The nesting is also deceptive;  #label doafloat and #label radixnum are really conceptually the same "depth".
*/

with_labelled_blocks :: () {
    {
        while main: true {
            if (0 == (bf = buf = callback(buf, user, len)))
                break done;

            while scandd: true {
                while schk1: (((stbsp__uintptr)f) & 3) {
                    if (f[0] == '%')
                        break scandd;
                    while schk2: true {
                        if (f[0] == 0)
                            break main;

                        while true {
                            if (((v ^ 0x25252525) - 0x01010101) & c)
                                continue schk1;
                            if ((v - 0x01010101) & c)
                                continue schk2;
                            if (callback)
                                if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
                                    continue schk1;
                        }
                    }
                }
            }

            while flags: true {
                switch (f[0]) {
                    case '-':
                        continue;
                    case '0':
                        break flags;
                    default:
                        break flags;
                }
            }

            {
                {
                    {
                        {
                            {
                                switch (f[0]) {
                                    case 's':
                                        break scopy;

                                    case 'G': // float
                                        break dofloatfromg;

                                    case 'u': // unsigned
                                    case 'i':
                                    case 'd': // integer
                                        if (fl & STBSP__METRIC_SUFFIX) {
                                            break doafloat;
                                        }
                                        break scopy
                                        // same performance difference as with skip version, except here we incur it in default too...

                                    case 'E': // float
                                    case 'e': // float
                                    doexpfromg:
                                        if (dp == STBSP__SPECIAL) {
                                            break scopy;
                                        }
                                        break flt_lead;

                                    case 'f': // float
                                        break doafloat;

                                    case 'B': // upper binary
                                        break radixnum;

                                    case 'p': // pointer
                                        break radixnum;

                                    default: // unknown, just copy code
                                        break scopy;
                                }
                            } #label doafloat;
                        } #label dofloatfromg;
                        if (dp == STBSP__SPECIAL) {
                            break scopy;
                        }
                    } #label flt_lead;
                    break scopy;
                } #label radixnum;
                if (n64 == 0) {
                    break scopy;
                }
                break scopy;
            } #label scopy;
        }
    } label done;
}


/*
The above version is pretty bad, but it does seem to get some things a little bit right:
for instance, it puts scopy at the bottom, where the other labels flow into it.
Maybe we could have something a bit more structured?  The key property of this code which
goto is being used for is to have sections of code which have a single exit point but multiple
entry points.  Can we create a structure which embodies this pattern?
This is probably a bit abstract for this language:
We use a theoretical `flow` statement (name and syntax off the top of my head, are not great),
which lets us define blocks of code which are conceptually parallel and/or sequential to each other.
See note at relevant point.
*/

with_flow :: () {
    while main: true {
        if (0 == (bf = buf = callback(buf, user, len)))
            goto done;

        while scandd: true {
            while schk1: (((stbsp__uintptr)f) & 3) {
                if (f[0] == '%')
                    break scandd;
                while schk2: true {
                    if (f[0] == 0)
                        break main;

                    while true {
                        if (((v ^ 0x25252525) - 0x01010101) & c)
                            continue schk1;
                        if ((v - 0x01010101) & c)
                            continue schk2;
                        if (callback)
                            if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
                                continue schk1;
                    }
                }
            }
        }

        while flags: true {
            switch (f[0]) {
                case '-':
                    continue;
                case '0':
                    break flags;
                default:
                    break flags;
            }
        }

        switch (f[0]) {
            case 's':
                goto scopy;

            case 'G': // float
                goto dofloatfromg;

            case 'u': // unsigned
            case 'i':
            case 'd': // integer
                if (fl & STBSP__METRIC_SUFFIX) {
                    goto doafloat;
                }
                goto inline scopy;

            case 'E': // float
            case 'e': // float
                goto doexpfromg

            case 'f': // float
                goto inline doafloat

            case 'B': // upper binary
                goto radixnum;

            case 'p': // pointer
                goto inline radixnum;

            default: // unknown, just copy code
                goto scopy;
        }

        // Each `flow` construct is a list of blocks; each block being either
        // another flow construct or a labelled code block.
        // When we enter a flow block we start executing the first named code block
        // defined, unless the flow statement declares a label, in which case we
        // jump to it.  When a block finishes executing, control continues in the parent
        // block.
        //
        // For example, if in the above code we `goto dofloatFromg` then first we execute
        // the block `dofloatfromg`.  `doexpfromg` is its sibling; it is *not* executed next.
        // Instead flow continues through for the rest of `doafloat`, into `flt_lead`.  After that
        // `radixnum` is ignored (its on the same level as `doafloat`), and we end by executing
        // `scopy`.
        //
        // We put the flow construct at the point in our program where we want the final section
        // to exit.  The position of the flow construct is not binding on the compiler; the compiler
        // chooses where to put each code block inside the construct, generating the necessary jumps
        // to each entry point and from each exit point.
        // However, as can be seen above, the user can use a `goto inline` statement to tell the
        // compiler they want the code for that block to be placed at that location.

        flow scopy {
            flow [
                doafloat {
                    flow [
                        dofloatfromg {
                            if (dp == STBSP__SPECIAL) {
                                break doafloat;
                            }
                        },

                        doexpfromg {
                            if (dp == STBSP__SPECIAL) {
                                break doafloat;
                            }
                        }
                    ];

                    flow flt_lead {

                    }
                },

                radixnum {
                    if (n64 == 0) {
                        break radixnum;
                    }
                }
            ];

            flow scopy {

            }
        }
    }

    flow done {

    }
}
	/*
	Original c code; this is a routine from stbsprintf distilled down to the main flow control
	Lots of lines were omitted; where it looks like there is redundant flow, imagine lines of code exist
	between the redundant statements.
	*/

	original :: () {
	for (;;) {
	if (0 == (bf = buf = callback(buf, user, len)))
	goto done;

	for (;;) {
	while (((stbsp__uintptr)f) & 3) {
	schk1:
	if (f[0] == '%')
	goto scandd;
	schk2:
	if (f[0] == 0)
	goto endfmt;
	}
	for (;;) {
	if (((v ^ 0x25252525) - 0x01010101) & c)
	goto schk1;
	if ((v - 0x01010101) & c)
	goto schk2;
	if (callback)
	if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
	goto schk1;
	}
	}

	scandd:
	for (;;) {
	switch (f[0]) {
	case '-':
	continue;
	case '0':
	goto flags_done;
	default:
	goto flags_done;
	}
	}

	flags_done:
	switch (f[0]) {
	case 's':
	goto scopy;

	case 'G': // float
	goto dofloatfromg;

	case 'E': // float
	case 'e': // float
	doexpfromg:
	if (dp == STBSP__SPECIAL) {
	goto scopy;
	}
	goto flt_lead;

	case 'f': // float
	doafloat:
	dofloatfromg:
	if (dp == STBSP__SPECIAL) {
	goto scopy;
	}

	flt_lead:
	goto scopy;

	case 'B': // upper binary
	goto radixnum;

	case 'p': // pointer
	radixnum:
	if (n64 == 0) {
	goto scopy;
	}
	goto scopy;

	case 'u': // unsigned
	case 'i':
	case 'd': // integer
	if (fl & STBSP__METRIC_SUFFIX) {
	goto doafloat;
	}

	scopy:
	break;

	default: // unknown, just copy code
	goto scopy;
	}
	endfmt:
	done:
	}


	/*
	First a rewrite with a hypothetical `skip` statement, which is a goto that only goes forwards, to a #label.
	We also pretend that we can label while loops (you can fake this with for currently).
	This required a small amount of reordering in the switch, but was a pretty direct conversion.
	*/

	with_skip :: () {
	while main: true {
	if (0 == (bf = buf = callback(buf, user, len)))
	skip done;

	while scandd: true {
	while schk1: (((stbsp__uintptr)f) & 3) {
	if (f[0] == '%')
	break scandd;
	while schk2: true {
	if (f[0] == 0)
	break main;

	while true {
	if (((v ^ 0x25252525) - 0x01010101) & c)
	continue schk1;
	if ((v - 0x01010101) & c)
	continue schk2;
	if (callback)
	if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
	continue schk1;
	}
	}
	}
	}

	while flags: true {
	switch (f[0]) {
	case '-':
	continue;
	case '0':
	break flags;
	default:
	break flags;
	}
	}

	switch (f[0]) {
	case 's':
	skip scopy;

	case 'G': // float
	skip dofloatfromg;

	case 'u': // unsigned
	case 'i':
	case 'd': // integer
	if (fl & STBSP__METRIC_SUFFIX) {
	skip doafloat;
	}
	skip scopy
	// The code for scopy now sits in the default section, so there is a change in performance characteristics:
	// in the original this block is favoured, while default needs a jump, here it is the other way around
	// Is there a way to write the code this way, while being 100% true to the original? The reason we moved this
	// block is to reorder for doafloat - we could duplicate doafloat instead, which would restore performance, but
	// incur some duplication. Perhaps its the case that for 100% accuracy to c performance, we need a full goto...

	case 'E': // float
	case 'e': // float
	doexpfromg:
	if (dp == STBSP__SPECIAL) {
	skip scopy;
	}
	skip flt_lead;

	case 'f': // float
	#label doafloat;
	#label dofloatfromg;
	if (dp == STBSP__SPECIAL) {
	skip scopy;
	}

	#label flt_lead;
	skip scopy;

	case 'B': // upper binary
	skip radixnum;

	case 'p': // pointer
	#label radixnum;
	if (n64 == 0) {
	skip scopy;
	}
	skip scopy;

	default: // unknown, just copy code
	#label scopy;
	}
	}
	#label done;
	}


	/*
	Next a rewrite where instead of a `skip` statement, we can label any block,
	and use `break` to break out of it.
	This required much more restructuring of the code, and resulted in a much deeper nesting (flat is better than nested!).
	The nesting is also deceptive; #label doafloat and #label radixnum are really conceptually the same "depth".
	*/

	with_labelled_blocks :: () {
	{
	while main: true {
	if (0 == (bf = buf = callback(buf, user, len)))
	break done;

	while scandd: true {
	while schk1: (((stbsp__uintptr)f) & 3) {
	if (f[0] == '%')
	break scandd;
	while schk2: true {
	if (f[0] == 0)
	break main;

	while true {
	if (((v ^ 0x25252525) - 0x01010101) & c)
	continue schk1;
	if ((v - 0x01010101) & c)
	continue schk2;
	if (callback)
	if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
	continue schk1;
	}
	}
	}
	}

	while flags: true {
	switch (f[0]) {
	case '-':
	continue;
	case '0':
	break flags;
	default:
	break flags;
	}
	}

	{
	{
	{
	{
	{
	switch (f[0]) {
	case 's':
	break scopy;

	case 'G': // float
	break dofloatfromg;

	case 'u': // unsigned
	case 'i':
	case 'd': // integer
	if (fl & STBSP__METRIC_SUFFIX) {
	break doafloat;
	}
	break scopy
	// same performance difference as with skip version, except here we incur it in default too...

	case 'E': // float
	case 'e': // float
	doexpfromg:
	if (dp == STBSP__SPECIAL) {
	break scopy;
	}
	break flt_lead;

	case 'f': // float
	break doafloat;

	case 'B': // upper binary
	break radixnum;

	case 'p': // pointer
	break radixnum;

	default: // unknown, just copy code
	break scopy;
	}
	} #label doafloat;
	} #label dofloatfromg;
	if (dp == STBSP__SPECIAL) {
	break scopy;
	}
	} #label flt_lead;
	break scopy;
	} #label radixnum;
	if (n64 == 0) {
	break scopy;
	}
	break scopy;
	} #label scopy;
	}
	} label done;
	}


	/*
	The above version is pretty bad, but it does seem to get some things a little bit right:
	for instance, it puts scopy at the bottom, where the other labels flow into it.
	Maybe we could have something a bit more structured? The key property of this code which
	goto is being used for is to have sections of code which have a single exit point but multiple
	entry points. Can we create a structure which embodies this pattern?
	This is probably a bit abstract for this language:
	We use a theoretical `flow` statement (name and syntax off the top of my head, are not great),
	which lets us define blocks of code which are conceptually parallel and/or sequential to each other.
	See note at relevant point.
	*/

	with_flow :: () {
	while main: true {
	if (0 == (bf = buf = callback(buf, user, len)))
	goto done;

	while scandd: true {
	while schk1: (((stbsp__uintptr)f) & 3) {
	if (f[0] == '%')
	break scandd;
	while schk2: true {
	if (f[0] == 0)
	break main;

	while true {
	if (((v ^ 0x25252525) - 0x01010101) & c)
	continue schk1;
	if ((v - 0x01010101) & c)
	continue schk2;
	if (callback)
	if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
	continue schk1;
	}
	}
	}
	}

	while flags: true {
	switch (f[0]) {
	case '-':
	continue;
	case '0':
	break flags;
	default:
	break flags;
	}
	}

	switch (f[0]) {
	case 's':
	goto scopy;

	case 'G': // float
	goto dofloatfromg;

	case 'u': // unsigned
	case 'i':
	case 'd': // integer
	if (fl & STBSP__METRIC_SUFFIX) {
	goto doafloat;
	}
	goto inline scopy;

	case 'E': // float
	case 'e': // float
	goto doexpfromg

	case 'f': // float
	goto inline doafloat

	case 'B': // upper binary
	goto radixnum;

	case 'p': // pointer
	goto inline radixnum;

	default: // unknown, just copy code
	goto scopy;
	}

	// Each `flow` construct is a list of blocks; each block being either
	// another flow construct or a labelled code block.
	// When we enter a flow block we start executing the first named code block
	// defined, unless the flow statement declares a label, in which case we
	// jump to it. When a block finishes executing, control continues in the parent
	// block.
	//
	// For example, if in the above code we `goto dofloatFromg` then first we execute
	// the block `dofloatfromg`. `doexpfromg` is its sibling; it is not executed next.
	// Instead flow continues through for the rest of `doafloat`, into `flt_lead`. After that
	// `radixnum` is ignored (its on the same level as `doafloat`), and we end by executing
	// `scopy`.
	//
	// We put the flow construct at the point in our program where we want the final section
	// to exit. The position of the flow construct is not binding on the compiler; the compiler
	// chooses where to put each code block inside the construct, generating the necessary jumps
	// to each entry point and from each exit point.
	// However, as can be seen above, the user can use a `goto inline` statement to tell the
	// compiler they want the code for that block to be placed at that location.

	flow scopy {
	flow [
	doafloat {
	flow [
	dofloatfromg {
	if (dp == STBSP__SPECIAL) {
	break doafloat;
	}
	},

	doexpfromg {
	if (dp == STBSP__SPECIAL) {
	break doafloat;
	}
	}
	];

	flow flt_lead {

	}
	},

	radixnum {
	if (n64 == 0) {
	break radixnum;
	}
	}
	];

	flow scopy {

	}
	}
	}

	flow done {

	}
	}