Last active
January 26, 2021 14:54
-
-
Save rgchris/b31ce985f5260e5f5bd0be76255669ab to your computer and use it in GitHub Desktop.
Parse Machine for R3C
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Rebol [ | |
Title: "Parse Machine" | |
Author: "Christopher Ross-Gill" | |
Date: 24-Jan-2021 | |
Home: https://github.com/rgchris/Scripts | |
File: %parser.reb | |
Version: 0.2.2 | |
Purpose: { | |
Provides a versatile wrapper around PARSE for handling diverse | |
data formats. | |
} | |
Rights: http://opensource.org/licenses/Apache-2.0 | |
Type: module | |
Name: rgchris-parser | |
Exports: [parser] | |
Notes: { | |
The design of Parse Machine is intended to anticipate stream parsing, | |
but will be dependent on supplied rules to fulfil that potential. | |
As rules are just Parse fragments, there's not yet a useful way to | |
express the differing handling of end-of-stream vs. end-of-buffer | |
(the latter assuming there might be more to come). | |
} | |
] | |
parser: :self | |
new: func [ | |
"Register a new Parse Machine-based codec" | |
grammar [word!] "Grammar Name" | |
suffixes [block!] "File Suffix(es)" | |
options [block!] "Option flags" | |
rules [block! map!] "Parse Machine States" | |
][ | |
if not in system/codecs grammar [ | |
extend system/codecs grammar _ | |
] | |
options: make map! collect [ | |
keep [] ; force collect to return a block | |
for-each option options [ | |
keep option | |
keep _ | |
] | |
] | |
set in system/codecs grammar make object! compose/only [ | |
name: quote (grammar) | |
suffixes: (suffixes) | |
identify?: _ | |
rules: (bind rules parser) | |
options: (options) | |
decode: _ | |
encode: _ | |
] | |
] | |
cue: func [ | |
saved-state [map!] | |
][ | |
state: :saved-state | |
] | |
init: func [ | |
"Create a new Parse Machine instance" | |
grammar [word! path!] "Language(/State) to use" | |
source [binary! text!] "Series to use" | |
<local> state | |
][ | |
if path? grammar [ | |
if not parse grammar [set grammar word! set state word! end] [ | |
fail "Could not engage Parse Machine" | |
] | |
] | |
if not in system/codecs grammar [ | |
fail ["Grammar <" uppercase form grammar "> not available"] | |
] | |
if null? :state [ | |
state: first either map? get in system/codecs/:grammar 'rules [ | |
words of system/codecs/:grammar/rules | |
][ | |
system/codecs/:grammar/rules | |
] | |
] | |
cue make map! reduce [ | |
'grammar grammar | |
'current state | |
'index source | |
'prior _ | |
'return _ | |
'end-of-stream true | |
'is-paused false | |
'is-done false | |
'rule _ | |
'active _ | |
'options system/codecs/:grammar/options | |
'errors _ | |
'emit _ | |
'warn _ | |
'bomb _ | |
] | |
] | |
emit: func [ | |
value | |
][ | |
state/emit value state | |
value | |
] | |
warn: func [ | |
message [text! block!] | |
][ | |
state/warn ensure text! unspaced message state | |
message | |
] | |
bomb: func [ | |
message [text! block!] | |
][ | |
state/bomb ensure text! unspaced message state | |
] | |
state: _ | |
use: func [ | |
'target [path! word!] | |
/mark | |
<local> grammar | |
][ | |
if select state/options 'debug [ | |
probe to tag! form target | |
] | |
; assert [not set-word? probe target] | |
state/prior: state/current | |
if mark [state/return: state/current] | |
state/current: target | |
state/rule: state/active: any [ | |
select system/codecs/(state/grammar)/rules :target | |
fail ["No Such Parser State:" uppercase form target] | |
] | |
] | |
return: func [ | |
"Return to the Marked State" | |
][ | |
state/rule: use :state/return | |
state/return: _ | |
state/rule | |
] | |
cycle: [ | |
while [state/index: state/active] | |
] | |
start: func [ | |
"Start the parser cycle" | |
][ | |
case [ | |
not any-series? state/index [ | |
fail "Parser not initialized correctly" | |
] | |
not state/current [ | |
fail "No active parser state" | |
] | |
] | |
use :state/current | |
parse/case state/index cycle | |
self | |
] | |
stop: func [ | |
"Pause the parser cycle" | |
][ | |
state/is-done: true | |
state/active: [fail] | |
self | |
] | |
pause: func [ | |
"Pause the parser cycle" | |
][ | |
state/is-paused: true | |
state/active: [fail] | |
self | |
] | |
resume: func [ | |
"Resume the parser cycle" | |
][ | |
case [ | |
not any-series? state/index [ | |
fail "Parser not initialized correctly" | |
] | |
not state/current [ | |
fail "No active parser state" | |
] | |
state/is-done [ | |
fail "Parser has already concluded" | |
] | |
] | |
state/is-paused: false | |
state/active: state/rule | |
parse/case state/index cycle | |
self | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment