Last active
February 10, 2021 23:25
-
-
Save rgchris/7d307bd6b23341370e53573b14467285 to your computer and use it in GitHub Desktop.
Clean Script for R3C
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Rebol [ | |
Title: "Rebol Script Cleaner (Pretty Printer)" | |
Date: 10-Feb-2021 | |
File: %clean-script.reb | |
Version: 1.2.3 | |
Type: module | |
Name: rgchris.clean-script | |
Exports: [clean-script] | |
Author: "Christopher Ross-Gill" | |
Purpose: { | |
Cleans (pretty prints) Rebol scripts by parsing the Rebol code | |
and supplying standard indentation and spacing. | |
} | |
History: [ | |
"Christopher Ross-Gill" 1.2.3 10-Feb-2021 "Clears extra newlines at start/end of containers" | |
"Christopher Ross-Gill" 1.2.2 23-Jan-2021 "Further binary handling, introduce options" | |
"Christopher Ross-Gill" 1.2.1 05-Jan-2020 "Indent binary values" | |
"Christopher Ross-Gill" 1.2.0 05-Jan-2020 "Rewrite" | |
"Carl Sassenrath" 1.1.0 29-May-2003 {Fixes indent and parse rule.} | |
"Carl Sassenrath" 1.0.0 27-May-2000 "Original program." | |
] | |
] | |
list: make object! [ | |
new: does [ | |
copy [ | |
first _ | |
last _ | |
; count 0 | |
] | |
] | |
make-node: func [] [ | |
copy [ | |
parent _ | |
back _ | |
next _ | |
type _ | |
value _ | |
is-split _ | |
is-open _ | |
] | |
] | |
insert-before: func [ | |
item [block!] | |
<local> node | |
][ | |
node: make-node | |
node/parent: item/parent | |
; node/parent/count: node/parent/count + 1 | |
node/back: item/back | |
node/next: item | |
either blank? item/back [ | |
item/parent/first: node | |
][ | |
item/back/next: node | |
] | |
item/back: node | |
] | |
insert-after: func [ | |
item [block!] | |
<local> node | |
][ | |
node: make-node | |
node/parent: item/parent | |
; node/parent/count: node/parent/count + 1 | |
node/back: item | |
node/next: item/next | |
either blank? item/next [ | |
item/parent/last: node | |
][ | |
item/next/back: node | |
] | |
item/next: node | |
] | |
insert: func [ | |
list [block!] | |
][ | |
either list/first [ | |
insert-before list/first | |
][ | |
list/first: list/last: make-node | |
list/first/parent: list | |
list/first | |
] | |
] | |
append: func [ | |
list [block!] | |
][ | |
either list/last [ | |
insert-after list/last | |
][ | |
insert list | |
] | |
] | |
remove: func [ | |
item [block!] | |
][ | |
either item/back [ | |
item/back/next: item/next | |
][ | |
item/parent/first: item/next | |
] | |
either item/next [ | |
item/next/back: item/back | |
][ | |
item/parent/last: item/back | |
] | |
item/parent: _ | |
item/next: _ | |
item/back: _ | |
item | |
] | |
] | |
mapify: func [ | |
spec [<opt> block! map!] | |
<local> map keys | |
][ | |
switch type-of :spec [ | |
null [make map! []] | |
map! [spec] | |
block! [ | |
map: make map! [] | |
keys: collect [ | |
do map-each value spec [ | |
either match set-word! value [ | |
append to set-path! [map] keep to word! value | |
][ | |
:value | |
] | |
] | |
] | |
for-each key difference keys words-of map [ | |
put map key _ | |
] | |
map | |
] | |
] | |
] | |
clean-script: use [ | |
indents max-indent space chars | |
][ | |
max-indent: 16 ; adjust this number for more indent levels | |
indents: collect [ | |
repeat x max-indent [ | |
keep rejoin collect [ | |
repeat x x [ | |
keep either x = 1 [#{}] [#{20202020}] | |
] | |
] | |
] | |
] | |
space: charset "^- " | |
chars: complement charset "^/^- [](){}" | |
func [ | |
"Returns source with standard spacing (pretty printed)." | |
source [binary!] "Original source" | |
/with "Specify optional parameters" | |
options [map! block!] "Optional parameters" | |
<local> tokens part mark here token indent in-binary | |
][ | |
options: mapify :options | |
options/indent: switch type-of :options/indent [ | |
binary! [options/indent] | |
text! [to binary! options/indent] | |
default [#{}] | |
] | |
tokens: list/new | |
in-binary: no | |
parse source [ | |
any [ | |
copy part [ | |
"#!" some [ | |
some chars | space | |
| | |
#"[" | #"]" | #"(" | #")" | #"{" | #"}" | |
] newline | |
] | |
( | |
token: list/append tokens | |
token/type: <comment> | |
token/value: part | |
) | |
] | |
any [ | |
mark: | |
newline | |
( | |
token: list/append tokens | |
token/type: <newline> | |
token/value: newline | |
) | |
| | |
any space #"^@" to end | |
( | |
token: list/append tokens | |
token/type: <null> | |
token/value: mark | |
) | |
| | |
some space | |
| | |
#{E2808C} ; used by Replpad | |
| | |
copy part [ | |
#"[" | #"(" | "#[" | "#(" | "#{" | "2#{" | "16#{" | "64#{" | |
] | |
( | |
token: list/append tokens | |
token/type: switch to text! part [ | |
"[" [<block>] | |
"(" [<group>] | |
"#[" [<construct>] | |
"#(" [<map>] | |
"#{" "2#{" "16#{" "64#{" [ | |
in-binary: yes | |
<binary> | |
] | |
] | |
token/value: part | |
token/is-open: yes | |
) | |
| | |
copy part [#"]" | #")" | #"}"] | |
( | |
token: list/append tokens | |
token/value: part | |
here: :token | |
until [ | |
case [ | |
not here: here/back [ | |
; this token is a orphan, reported as comment | |
token/type: <comment> | |
token/value: rejoin [#{3B20} token/value to binary! " ; DETECTED ORPHAN (No Match)"] | |
token: list/append tokens | |
token/type: <newline> | |
token/value: newline | |
true ; at head -- no need to continue | |
] | |
here/type = <newline> [ | |
token/is-split: yes | |
false ; continue looking | |
] | |
not here/is-open [ | |
false ; continue looking | |
] | |
all [ | |
token/value = #{5D} ; close bracket | |
find [<group> <map> <binary>] here/type | |
][ | |
; the current tag is an widow | |
here/type: <comment> | |
here/value: rejoin [#{3B20} here/value to binary! " ; DETECTED WIDOW (Bad Opener)"] | |
here/is-open: no | |
here: list/insert-after here | |
here/type: <newline> | |
here/value: newline | |
here: here/back | |
token/is-split: yes | |
false ; skip the orphan and continue looking | |
] | |
all [ | |
token/value = #{29} ; close paren | |
find [<block> <construct> <binary>] here/type | |
][ | |
; this token is a orphan | |
token/type: <comment> | |
token/value: rejoin [#{3B20} token/value to binary! " ; DETECTED ORPHAN (No Group/Map Opener)"] | |
token: list/append tokens | |
token/type: <newline> | |
token/value: newline | |
true ; we're done | |
] | |
all [ | |
token/value = #{7D} ; close curly brace | |
find [<block> <construct> <group> <map>] here/type | |
][ | |
; this token is a orphan | |
token/type: <comment> | |
token/value: rejoin [#{3B20} token/value to binary! " ; DETECTED ORPHAN (No Binary Opener)"] | |
token: list/append tokens | |
token/type: <newline> | |
token/value: newline | |
true ; we're done | |
] | |
<else> [ | |
; we have a closer | |
here/is-open: no | |
token/type: switch here/type [ | |
<block> [</block>] | |
<group> [</group>] | |
<construct> [</construct>] | |
<map> [</map>] | |
<binary> [ | |
in-binary: no | |
</binary> | |
] | |
] | |
if token/is-split [ | |
if here/next/type = <comment> [ | |
here: here/next | |
] | |
; remove extraneous newlines at the start of this container | |
while [ | |
here/next/type = <newline> | |
][ | |
list/remove here/next | |
] | |
; remove extraneous newlines at the end of this container | |
while [ | |
token/back/type = <newline> | |
][ | |
list/remove token/back | |
] | |
if not same? here token/back [ | |
here: list/insert-after here | |
here/type: <newline> | |
here/value: newline | |
] | |
here: list/insert-before token | |
here/type: <newline> | |
here/value: newline | |
] | |
true ; we've found a match | |
] | |
] | |
] | |
) | |
| | |
#";" any space copy part any [ | |
some chars | space | |
| | |
#"[" | #"]" | #"(" | #")" | #"{" | #"}" | |
] | |
( | |
replace/all part #{E2808C} #{} ; used by Replpad | |
token: list/append tokens | |
token/type: <comment> | |
token/value: either all [part not empty? part] [ | |
rejoin [#{3B20} part] ; comment | |
][ | |
#{3B} | |
] | |
) | |
| | |
skip | |
( | |
token: list/append tokens | |
case [ | |
; might need more of these exceptions | |
did parse mark [ | |
[ | |
"<-" here: space | |
| | |
"@" some chars here: | |
] | |
][ | |
token/type: <text> | |
token/value: copy/part mark here | |
] | |
; an indulgence--standardize case for 'Rebol' | |
did parse mark [ | |
[#"R" | #"r"] | |
[#"E" | #"e"] | |
[#"B" | #"b"] | |
[#"O" | #"o"] | |
[#"L" | #"l"] | |
here: space | |
][ | |
token/type: <text> | |
token/value: "Rebol" | |
] | |
all [ | |
not in-binary | |
not error? trap [ | |
set [part here] transcode/next mark | |
] | |
; Transcode/Date bug: https://github.com/metaeducation/ren-c/issues/1109 | |
not date? part | |
][ | |
token/type: <text> | |
token/value: copy/part mark here | |
] | |
did parse mark [some chars here:] [ | |
; binary blobs or kwatz! | |
token/type: <text> | |
token/value: copy/part mark here | |
] | |
here: next mark [ ; ?!? | |
token/type: <comment> | |
rejoin [#{3B203F213F20} copy/part mark here] ; "; ?!? " | |
token: list/insert-after token | |
token/type: <newline> | |
token/value: newline | |
] | |
] | |
) | |
:here | |
] | |
] | |
rejoin collect [ | |
token: tokens/first | |
indent: 1 | |
keep options/indent | |
while [token] [ | |
if find [</block> </group> </construct> </map> </binary>] token/type [ | |
indent: me - 1 | |
] | |
if token/is-open [ | |
token/type: <comment> | |
token/value: rejoin [#{3B20} token/value to binary! " ; DETECTED WIDOW (Unmatched Opener)"] | |
token: list/insert-after token | |
token/type: <newline> | |
token/value: newline | |
token: token/back | |
] | |
case [ | |
blank? token/back [] | |
token/type = <null> [] | |
token/type = <newline> [] | |
token/back/type = <newline> [ | |
keep options/indent | |
keep pick indents min max-indent indent | |
] | |
token/type = <comment> [ | |
keep #{2020} | |
] | |
token/back/type = <null> [ | |
keep #{20} | |
] | |
find [<block> <group> <construct> <map> <binary>] token/back/type [] | |
find [</block> </group> </construct> </map> </binary>] token/type [] | |
all [ | |
token/type = <block> | |
find [</block> </construct>] token/back/type | |
token/back/back/type = <newline> | |
][] | |
<else> [ | |
keep #{20} | |
] | |
] | |
if find [<block> <group> <construct> <map> <binary>] token/type [ | |
indent: me + 1 | |
] | |
keep token/value | |
token: token/next | |
] | |
if all [ | |
tokens/last | |
tokens/last/type <> <newline> | |
][ | |
keep newline | |
] | |
] | |
] | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment