rgchris/clean-script.reb

## clean-script.reb
Rebol [
    Title: "Rebol Script Cleaner (Pretty Printer)"
    Date: 10-Feb-2021
    File: %clean-script.reb
    Version: 1.2.3
    Type: module
    Name: rgchris.clean-script
    Exports: [clean-script]
    Author: "Christopher Ross-Gill"
    Purpose: {
        Cleans (pretty prints) Rebol scripts by parsing the Rebol code
        and supplying standard indentation and spacing.
    }
    History: [
        "Christopher Ross-Gill" 1.2.3 10-Feb-2021 "Clears extra newlines at start/end of containers"
        "Christopher Ross-Gill" 1.2.2 23-Jan-2021 "Further binary handling, introduce options"
        "Christopher Ross-Gill" 1.2.1 05-Jan-2020 "Indent binary values"
        "Christopher Ross-Gill" 1.2.0 05-Jan-2020 "Rewrite"
        "Carl Sassenrath" 1.1.0 29-May-2003 {Fixes indent and parse rule.}
        "Carl Sassenrath" 1.0.0 27-May-2000 "Original program."
    ]
]

list: make object! [
    new: does [
        copy [
            first _
            last _
            ; count 0
        ]
    ]

    make-node: func [] [
        copy [
            parent _
            back _
            next _
            type _
            value _
            is-split _
            is-open _
        ]
    ]

    insert-before: func [
        item [block!]
        <local> node
    ][
        node: make-node

        node/parent: item/parent
        ; node/parent/count: node/parent/count + 1
        node/back: item/back
        node/next: item

        either blank? item/back [
            item/parent/first: node
        ][
            item/back/next: node
        ]

        item/back: node
    ]

    insert-after: func [
        item [block!]
        <local> node
    ][
        node: make-node

        node/parent: item/parent
        ; node/parent/count: node/parent/count + 1
        node/back: item
        node/next: item/next

        either blank? item/next [
            item/parent/last: node
        ][
            item/next/back: node
        ]

        item/next: node
    ]

    insert: func [
        list [block!]
    ][
        either list/first [
            insert-before list/first
        ][
            list/first: list/last: make-node
            list/first/parent: list
            list/first
        ]
    ]

    append: func [
        list [block!]
    ][
        either list/last [
            insert-after list/last
        ][
            insert list
        ]
    ]

    remove: func [
        item [block!]
    ][
        either item/back [
            item/back/next: item/next
        ][
            item/parent/first: item/next
        ]

        either item/next [
            item/next/back: item/back
        ][
            item/parent/last: item/back
        ]

        item/parent: _
        item/next: _
        item/back: _

        item
    ]
]

mapify: func [
    spec [<opt> block! map!]
    <local> map keys
][
    switch type-of :spec [
        null [make map! []]

        map! [spec]

        block! [
            map: make map! []

            keys: collect [
                do map-each value spec [
                    either match set-word! value [
                        append to set-path! [map] keep to word! value
                    ][
                        :value
                    ]
                ]
            ]

            for-each key difference keys words-of map [
                put map key _
            ]

            map
        ]
    ]
]

clean-script: use [
    indents max-indent space chars
][
    max-indent: 16  ; adjust this number for more indent levels
    indents: collect [
        repeat x max-indent [
            keep rejoin collect [
                repeat x x [
                    keep either x = 1 [#{}] [#{20202020}]
                ]
            ]
        ]
    ]

    space: charset "^- "
    chars: complement charset "^/^- [](){}"

    func [
        "Returns source with standard spacing (pretty printed)."
        source [binary!] "Original source"
        /with "Specify optional parameters"
        options [map! block!] "Optional parameters"
        <local> tokens part mark here token indent in-binary
    ][
        options: mapify :options

        options/indent: switch type-of :options/indent [
            binary! [options/indent]
            text! [to binary! options/indent]
            default [#{}]
        ]

        tokens: list/new
        in-binary: no

        parse source [
            any [
                copy part [
                    "#!" some [
                        some chars | space
                        |
                        #"[" | #"]" | #"(" | #")" | #"{" | #"}"
                    ] newline
                ]
                (
                    token: list/append tokens
                    token/type: <comment>
                    token/value: part
                )
            ]

            any [
                mark:
                newline
                (
                    token: list/append tokens
                    token/type: <newline>
                    token/value: newline
                )
                |
                any space #"^@" to end
                (
                    token: list/append tokens
                    token/type: <null>
                    token/value: mark
                )
                |
                some space
                |
                #{E2808C}  ; used by Replpad
                |
                copy part [
                    #"[" | #"(" | "#[" | "#(" | "#{" | "2#{" | "16#{" | "64#{"
                ]
                (
                    token: list/append tokens

                    token/type: switch to text! part [
                        "[" [<block>]
                        "(" [<group>]
                        "#[" [<construct>]
                        "#(" [<map>]

                        "#{" "2#{" "16#{" "64#{" [
                            in-binary: yes
                            <binary>
                        ]
                    ]

                    token/value: part
                    token/is-open: yes
                )
                |
                copy part [#"]" | #")" | #"}"]
                (
                    token: list/append tokens
                    token/value: part

                    here: :token

                    until [
                        case [
                            not here: here/back [
                                ; this token is a orphan, reported as comment
                                token/type: <comment>
                                token/value: rejoin [#{3B20} token/value to binary! " ; DETECTED ORPHAN (No Match)"]

                                token: list/append tokens
                                token/type: <newline>
                                token/value: newline

                                true  ; at head -- no need to continue
                            ]

                            here/type = <newline> [
                                token/is-split: yes

                                false  ; continue looking
                            ]

                            not here/is-open [
                                false  ; continue looking
                            ]

                            all [
                                token/value = #{5D}  ; close bracket
                                find [<group> <map> <binary>] here/type
                            ][
                                ; the current tag is an widow
                                here/type: <comment>
                                here/value: rejoin [#{3B20} here/value to binary! " ; DETECTED WIDOW (Bad Opener)"]
                                here/is-open: no

                                here: list/insert-after here
                                here/type: <newline>
                                here/value: newline

                                here: here/back
                                token/is-split: yes

                                false  ; skip the orphan and continue looking
                            ]

                            all [
                                token/value = #{29}  ; close paren
                                find [<block> <construct> <binary>] here/type
                            ][
                                ; this token is a orphan
                                token/type: <comment>
                                token/value: rejoin [#{3B20} token/value to binary! " ; DETECTED ORPHAN (No Group/Map Opener)"]

                                token: list/append tokens
                                token/type: <newline>
                                token/value: newline

                                true  ; we're done
                            ]

                            all [
                                token/value = #{7D}  ; close curly brace
                                find [<block> <construct> <group> <map>] here/type
                            ][
                                ; this token is a orphan
                                token/type: <comment>
                                token/value: rejoin [#{3B20} token/value to binary! " ; DETECTED ORPHAN (No Binary Opener)"]

                                token: list/append tokens
                                token/type: <newline>
                                token/value: newline

                                true  ; we're done
                            ]

                            <else> [
                                ; we have a closer
                                here/is-open: no

                                token/type: switch here/type [
                                    <block> [</block>]
                                    <group> [</group>]
                                    <construct> [</construct>]
                                    <map> [</map>]

                                    <binary> [
                                        in-binary: no
                                        </binary>
                                    ]
                                ]

                                if token/is-split [
                                    if here/next/type = <comment> [
                                        here: here/next
                                    ]

                                    ; remove extraneous newlines at the start of this container
                                    while [
                                        here/next/type = <newline>
                                    ][
                                        list/remove here/next
                                    ]

                                    ; remove extraneous newlines at the end of this container
                                    while [
                                        token/back/type = <newline>
                                    ][
                                        list/remove token/back
                                    ]

                                    if not same? here token/back [
                                        here: list/insert-after here
                                        here/type: <newline>
                                        here/value: newline
                                    ]

                                    here: list/insert-before token
                                    here/type: <newline>
                                    here/value: newline
                                ]

                                true  ; we've found a match
                            ]
                        ]
                    ]
                )
                |
                #";" any space copy part any [
                    some chars | space
                    |
                    #"[" | #"]" | #"(" | #")" | #"{" | #"}"
                ]
                (
                    replace/all part #{E2808C} #{}  ; used by Replpad

                    token: list/append tokens
                    token/type: <comment>
                    token/value: either all [part not empty? part] [
                        rejoin [#{3B20} part]  ; comment
                    ][
                        #{3B}
                    ]
                )
                |
                skip
                (
                    token: list/append tokens
                    case [
                        ; might need more of these exceptions
                        did parse mark [
                            [
                                "<-" here: space
                                |
                                "@" some chars here:
                            ]
                        ][
                            token/type: <text>
                            token/value: copy/part mark here
                        ]

                        ; an indulgence--standardize case for 'Rebol'
                        did parse mark [
                            [#"R" | #"r"]
                            [#"E" | #"e"]
                            [#"B" | #"b"]
                            [#"O" | #"o"]
                            [#"L" | #"l"]
                            here: space
                        ][
                            token/type: <text>
                            token/value: "Rebol"
                        ]


                        all [
                            not in-binary
                            not error? trap [
                                set [part here] transcode/next mark
                            ]
                            ; Transcode/Date bug: https://github.com/metaeducation/ren-c/issues/1109
                            not date? part
                        ][
                            token/type: <text>
                            token/value: copy/part mark here
                        ]

                        did parse mark [some chars here:] [
                            ; binary blobs or kwatz!
                            token/type: <text>
                            token/value: copy/part mark here
                        ]

                        here: next mark [  ; ?!?
                            token/type: <comment>
                            rejoin [#{3B203F213F20} copy/part mark here]  ; "; ?!? "

                            token: list/insert-after token
                            token/type: <newline>
                            token/value: newline
                        ]
                    ]
                )

                :here
            ]
        ]

        rejoin collect [
            token: tokens/first
            indent: 1

            keep options/indent

            while [token] [
                if find [</block> </group> </construct> </map> </binary>] token/type [
                    indent: me - 1
                ]

                if token/is-open [
                    token/type: <comment>
                    token/value: rejoin [#{3B20} token/value to binary! " ; DETECTED WIDOW (Unmatched Opener)"]

                    token: list/insert-after token
                    token/type: <newline>
                    token/value: newline

                    token: token/back
                ]

                case [
                    blank? token/back []

                    token/type = <null> []

                    token/type = <newline> []

                    token/back/type = <newline> [
                        keep options/indent
                        keep pick indents min max-indent indent
                    ]

                    token/type = <comment> [
                        keep #{2020}
                    ]

                    token/back/type = <null> [
                        keep #{20}
                    ]

                    find [<block> <group> <construct> <map> <binary>] token/back/type []

                    find [</block> </group> </construct> </map> </binary>] token/type []

                    all [
                        token/type = <block>
                        find [</block> </construct>] token/back/type
                        token/back/back/type = <newline>
                    ][]

                    <else> [
                        keep #{20}
                    ]
                ]

                if find [<block> <group> <construct> <map> <binary>] token/type [
                    indent: me + 1
                ]

                keep token/value
                token: token/next
            ]

            if all [
                tokens/last
                tokens/last/type <> <newline>
            ][
                keep newline
            ]
        ]
    ]
]
	Rebol [
	Title: "Rebol Script Cleaner (Pretty Printer)"
	Date: 10-Feb-2021
	File: %clean-script.reb
	Version: 1.2.3
	Type: module
	Name: rgchris.clean-script
	Exports: [clean-script]
	Author: "Christopher Ross-Gill"
	Purpose: {
	Cleans (pretty prints) Rebol scripts by parsing the Rebol code
	and supplying standard indentation and spacing.
	}
	History: [
	"Christopher Ross-Gill" 1.2.3 10-Feb-2021 "Clears extra newlines at start/end of containers"
	"Christopher Ross-Gill" 1.2.2 23-Jan-2021 "Further binary handling, introduce options"
	"Christopher Ross-Gill" 1.2.1 05-Jan-2020 "Indent binary values"
	"Christopher Ross-Gill" 1.2.0 05-Jan-2020 "Rewrite"
	"Carl Sassenrath" 1.1.0 29-May-2003 {Fixes indent and parse rule.}
	"Carl Sassenrath" 1.0.0 27-May-2000 "Original program."
	]
	]

	list: make object! [
	new: does [
	copy [
	first _
	last _
	; count 0
	]
	]

	make-node: func [] [
	copy [
	parent _
	back _
	next _
	type _
	value _
	is-split _
	is-open _
	]
	]

	insert-before: func [
	item [block!]
	<local> node
	][
	node: make-node

	node/parent: item/parent
	; node/parent/count: node/parent/count + 1
	node/back: item/back
	node/next: item

	either blank? item/back [
	item/parent/first: node
	][
	item/back/next: node
	]

	item/back: node
	]

	insert-after: func [
	item [block!]
	<local> node
	][
	node: make-node

	node/parent: item/parent
	; node/parent/count: node/parent/count + 1
	node/back: item
	node/next: item/next

	either blank? item/next [
	item/parent/last: node
	][
	item/next/back: node
	]

	item/next: node
	]

	insert: func [
	list [block!]
	][
	either list/first [
	insert-before list/first
	][
	list/first: list/last: make-node
	list/first/parent: list
	list/first
	]
	]

	append: func [
	list [block!]
	][
	either list/last [
	insert-after list/last
	][
	insert list
	]
	]

	remove: func [
	item [block!]
	][
	either item/back [
	item/back/next: item/next
	][
	item/parent/first: item/next
	]

	either item/next [
	item/next/back: item/back
	][
	item/parent/last: item/back
	]

	item/parent: _
	item/next: _
	item/back: _

	item
	]
	]

	mapify: func [
	spec [<opt> block! map!]
	<local> map keys
	][
	switch type-of :spec [
	null [make map! []]

	map! [spec]

	block! [
	map: make map! []

	keys: collect [
	do map-each value spec [
	either match set-word! value [
	append to set-path! [map] keep to word! value
	][
	:value
	]
	]
	]

	for-each key difference keys words-of map [
	put map key _
	]

	map
	]
	]
	]

	clean-script: use [
	indents max-indent space chars
	][
	max-indent: 16 ; adjust this number for more indent levels
	indents: collect [
	repeat x max-indent [
	keep rejoin collect [
	repeat x x [
	keep either x = 1 [#{}] [#{20202020}]
	]
	]
	]
	]

	space: charset "^- "
	chars: complement charset "^/^- [](){}"

	func [
	"Returns source with standard spacing (pretty printed)."
	source [binary!] "Original source"
	/with "Specify optional parameters"
	options [map! block!] "Optional parameters"
	<local> tokens part mark here token indent in-binary
	][
	options: mapify :options

	options/indent: switch type-of :options/indent [
	binary! [options/indent]
	text! [to binary! options/indent]
	default [#{}]
	]

	tokens: list/new
	in-binary: no

	parse source [
	any [
	copy part [
	"#!" some [
	some chars \| space
	\|
	#"[" \| #"]" \| #"(" \| #")" \| #"{" \| #"}"
	] newline
	]
	(
	token: list/append tokens
	token/type: <comment>
	token/value: part
	)
	]

	any [
	mark:
	newline
	(
	token: list/append tokens
	token/type: <newline>
	token/value: newline
	)
	\|
	any space #"^@" to end
	(
	token: list/append tokens
	token/type: <null>
	token/value: mark
	)
	\|
	some space
	\|
	#{E2808C} ; used by Replpad
	\|
	copy part [
	#"[" \| #"(" \| "#[" \| "#(" \| "#{" \| "2#{" \| "16#{" \| "64#{"
	]
	(
	token: list/append tokens

	token/type: switch to text! part [
	"[" [<block>]
	"(" [<group>]
	"#[" [<construct>]
	"#(" [<map>]

	"#{" "2#{" "16#{" "64#{" [
	in-binary: yes
	<binary>
	]
	]

	token/value: part
	token/is-open: yes
	)
	\|
	copy part [#"]" \| #")" \| #"}"]
	(
	token: list/append tokens
	token/value: part

	here: :token

	until [
	case [
	not here: here/back [
	; this token is a orphan, reported as comment
	token/type: <comment>
	token/value: rejoin [#{3B20} token/value to binary! " ; DETECTED ORPHAN (No Match)"]

	token: list/append tokens
	token/type: <newline>
	token/value: newline

	true ; at head -- no need to continue
	]

	here/type = <newline> [
	token/is-split: yes

	false ; continue looking
	]

	not here/is-open [
	false ; continue looking
	]

	all [
	token/value = #{5D} ; close bracket
	find [<group> <map> <binary>] here/type
	][
	; the current tag is an widow
	here/type: <comment>
	here/value: rejoin [#{3B20} here/value to binary! " ; DETECTED WIDOW (Bad Opener)"]
	here/is-open: no

	here: list/insert-after here
	here/type: <newline>
	here/value: newline

	here: here/back
	token/is-split: yes

	false ; skip the orphan and continue looking
	]

	all [
	token/value = #{29} ; close paren
	find [<block> <construct> <binary>] here/type
	][
	; this token is a orphan
	token/type: <comment>
	token/value: rejoin [#{3B20} token/value to binary! " ; DETECTED ORPHAN (No Group/Map Opener)"]

	token: list/append tokens
	token/type: <newline>
	token/value: newline

	true ; we're done
	]

	all [
	token/value = #{7D} ; close curly brace
	find [<block> <construct> <group> <map>] here/type
	][
	; this token is a orphan
	token/type: <comment>
	token/value: rejoin [#{3B20} token/value to binary! " ; DETECTED ORPHAN (No Binary Opener)"]

	token: list/append tokens
	token/type: <newline>
	token/value: newline

	true ; we're done
	]

	<else> [
	; we have a closer
	here/is-open: no

	token/type: switch here/type [
	<block> [</block>]
	<group> [</group>]
	<construct> [</construct>]
	<map> [</map>]

	<binary> [
	in-binary: no
	</binary>
	]
	]

	if token/is-split [
	if here/next/type = <comment> [
	here: here/next
	]

	; remove extraneous newlines at the start of this container
	while [
	here/next/type = <newline>
	][
	list/remove here/next
	]

	; remove extraneous newlines at the end of this container
	while [
	token/back/type = <newline>
	][
	list/remove token/back
	]

	if not same? here token/back [
	here: list/insert-after here
	here/type: <newline>
	here/value: newline
	]

	here: list/insert-before token
	here/type: <newline>
	here/value: newline
	]

	true ; we've found a match
	]
	]
	]
	)
	\|
	#";" any space copy part any [
	some chars \| space
	\|
	#"[" \| #"]" \| #"(" \| #")" \| #"{" \| #"}"
	]
	(
	replace/all part #{E2808C} #{} ; used by Replpad

	token: list/append tokens
	token/type: <comment>
	token/value: either all [part not empty? part] [
	rejoin [#{3B20} part] ; comment
	][
	#{3B}
	]
	)
	\|
	skip
	(
	token: list/append tokens
	case [
	; might need more of these exceptions
	did parse mark [
	[
	"<-" here: space
	\|
	"@" some chars here:
	]
	][
	token/type: <text>
	token/value: copy/part mark here
	]

	; an indulgence--standardize case for 'Rebol'
	did parse mark [
	[#"R" \| #"r"]
	[#"E" \| #"e"]
	[#"B" \| #"b"]
	[#"O" \| #"o"]
	[#"L" \| #"l"]
	here: space
	][
	token/type: <text>
	token/value: "Rebol"
	]


	all [
	not in-binary
	not error? trap [
	set [part here] transcode/next mark
	]
	; Transcode/Date bug: https://github.com/metaeducation/ren-c/issues/1109
	not date? part
	][
	token/type: <text>
	token/value: copy/part mark here
	]

	did parse mark [some chars here:] [
	; binary blobs or kwatz!
	token/type: <text>
	token/value: copy/part mark here
	]

	here: next mark [ ; ?!?
	token/type: <comment>
	rejoin [#{3B203F213F20} copy/part mark here] ; "; ?!? "

	token: list/insert-after token
	token/type: <newline>
	token/value: newline
	]
	]
	)

	:here
	]
	]

	rejoin collect [
	token: tokens/first
	indent: 1

	keep options/indent

	while [token] [
	if find [</block> </group> </construct> </map> </binary>] token/type [
	indent: me - 1
	]

	if token/is-open [
	token/type: <comment>
	token/value: rejoin [#{3B20} token/value to binary! " ; DETECTED WIDOW (Unmatched Opener)"]

	token: list/insert-after token
	token/type: <newline>
	token/value: newline

	token: token/back
	]

	case [
	blank? token/back []

	token/type = <null> []

	token/type = <newline> []

	token/back/type = <newline> [
	keep options/indent
	keep pick indents min max-indent indent
	]

	token/type = <comment> [
	keep #{2020}
	]

	token/back/type = <null> [
	keep #{20}
	]

	find [<block> <group> <construct> <map> <binary>] token/back/type []

	find [</block> </group> </construct> </map> </binary>] token/type []

	all [
	token/type = <block>
	find [</block> </construct>] token/back/type
	token/back/back/type = <newline>
	][]

	<else> [
	keep #{20}
	]
	]

	if find [<block> <group> <construct> <map> <binary>] token/type [
	indent: me + 1
	]

	keep token/value
	token: token/next
	]

	if all [
	tokens/last
	tokens/last/type <> <newline>
	][
	keep newline
	]
	]
	]
	]