Skip to content

Instantly share code, notes, and snippets.

@meijeru
Created March 5, 2018 21:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save meijeru/202c25f124e075ff5be85dff1beeda15 to your computer and use it in GitHub Desktop.
Save meijeru/202c25f124e075ff5be85dff1beeda15 to your computer and use it in GitHub Desktop.
REBOL [
Title: "Red Lexical Scanner"
Author: "Nenad Rakocevic"
File: %lexer.r
Tabs: 4
Rights: "Copyright (C) 2011-2015 Nenad Rakocevic. All rights reserved."
License: "BSD-3 - https://github.com/red/red/blob/master/BSD-3-License.txt"
Comments: {Adapted by Rudolf W. Meijer for making the Red concordance.
This involves parsing Red, Red/System and REBOL programs by one lexer.
The changes are:
- the header rule accepts any word or path followed by a block
- the escaped rule accepts anything within #[ and ]
- throw-error prints the filename; global source-file must be set
It is a testimony to the syntactic similarity of all three languages
that the changes are so few ...
}]
;-- Patch NEW-LINE and NEW-LINE? natives to accept paren! --
append first find third :new-line block! paren!
append first find third :new-line? block! paren!
lexer: context [
verbose: 0
old-line: none
line: none ;-- source code lines counter
count?: yes ;-- if TRUE, lines counter is enabled
cnt: none ;-- counts nested {} in multi-line strings
pos: none ;-- source input position (error reporting)
mark: none ;-- use for keeping input cursor at same position
path: none ;-- path input position (error reporting)
s: none ;-- mark start position of new value
e: none ;-- mark end position of new value
series: none ;-- temporary hold last stack series
value: none ;-- new value
value2: none ;-- secondary new value
fail?: none ;-- used for failing some parsing rules
type: none ;-- define the type of the new value
rs?: no ;-- if TRUE, do lexing for Red/System
neg?: no ;-- if TRUE, denotes a negative number value
base: 16 ;-- binary base
otag: none
ot: none
ct: none
sep: none
year: none
month: none
day: none
hour: none
mn: none
sec: none
date: none
ee: none
;====== Parsing rules ======
four: charset "01234"
half: charset "012345"
non-zero: charset "123456789"
digit: union non-zero charset "0"
dot: #"."
comma: #","
byte: [
"25" half
| #"2" four digit
| #"1" digit digit
| opt #"0" non-zero digit
| 0 2 #"0" digit
| 1 2 #"0"
]
hexa: union digit charset "ABCDEF"
hexa-char: union hexa charset "abcdef"
alpha: charset [#"A" - #"Z" #"a" - #"z"]
base64-char: union digit union alpha charset "+/="
;-- UTF-8 encoding rules from: http://tools.ietf.org/html/rfc3629#section-4
UTF-8-BOM: #{EFBBBF}
ws-ASCII: charset " ^-^M" ;-- ASCII common whitespaces
ws-U+2k: charset [#"^(80)" - #"^(8A)"] ;-- Unicode spaces in the U+2000-U+200A range
UTF8-tail: charset [#"^(80)" - #"^(BF)"]
UTF8-1: charset [#"^(00)" - #"^(7F)"]
UTF8-2: reduce [
charset [#"^(C2)" - #"^(DF)"]
UTF8-tail
]
UTF8-3: reduce [
#{E0} charset [#"^(A0)" - #"^(BF)"] UTF8-tail
'| charset [#"^(E1)" - #"^(EC)"] 2 UTF8-tail
'| #{ED} charset [#"^(80)" - #"^(9F)"] UTF8-tail
'| charset [#"^(EE)" - #"^(EF)"] 2 UTF8-tail
]
UTF8-4: reduce [
#{F0} charset [#"^(90)" - #"^(BF)"] 2 UTF8-tail
'| charset [#"^(F1)" - #"^(F3)"] 3 UTF8-tail
'| #{F4} charset [#"^(80)" - #"^(8F)"] 2 UTF8-tail
]
UTF8-char: [pos: UTF8-1 | UTF8-2 | UTF8-3 | UTF8-4]
not-word-char: charset {/\^^,[](){}"#%$@:;}
not-word-1st: union union not-word-char digit charset {'}
not-file-char: charset {[](){}"@:;}
not-url-char: charset {[](){}";}
not-email-char: union not-file-char union ws-ASCII charset "<^/"
not-str-char: #"^""
not-mstr-char: #"}"
not-tag-1st: complement union ws-ASCII charset "=><[](){};^""
not-tag-char: complement charset ">"
tag-char: charset "<>"
caret-char: charset [#"^(40)" - #"^(5F)"]
non-printable-char: charset [#"^(00)" - #"^(1F)"]
pair-end: charset {^{"[]();:}
integer-end: charset {^{"[]();:xX}
path-end: charset {^{"[]();}
file-end: charset {^{[]();}
date-sep: charset "/-"
time-sep: charset "T/"
stop: none
control-char: reduce [ ;-- Control characters
charset [#"^(00)" - #"^(1F)"] ;-- C0 control codes
'| #"^(C2)" charset [#"^(80)" - #"^(9F)"] ;-- C1 control codes (UTF-8 encoded)
]
UTF8-filtered-char: [
[pos: stop :pos (fail?: [end skip]) | UTF8-char e: (fail?: none)]
fail?
]
UTF8-printable: [
[non-printable-char | not-str-char (fail?: [end skip]) | UTF8-char (fail?: none)]
fail?
]
;-- Whitespaces list from: http://en.wikipedia.org/wiki/Whitespace_character
ws: [
#"^/" (
if count? [
line: line + 1
stack/nl?: yes
]
)
| ws-ASCII ;-- only the common whitespaces are matched
| #{C2} [
#{85} ;-- U+0085 (Newline)
| #{A0} ;-- U+00A0 (No-break space)
]
;| #{E1} [
; #{9A80} ;-- U+1680 (Ogham space mark)
; | #{A08E} ;-- U+180E (Mongolian vowel separator)
;]
;| #{E2} [
; #{80} [
; ws-U+2k ;-- U+2000-U+200A range
; | #{A8} ;-- U+2028 (Line separator)
; | #{A9} ;-- U+2029 (Paragraph separator)
; | #{AF} ;-- U+202F (Narrow no-break space)
; ]
; | #{819F} ;-- U+205F (Medium mathematical space)
;]
;| #{E38080} ;-- U+3000 (Ideographic space)
]
newline-char: [
#"^/"
| #{C285} ;-- U+0085 (Newline)
| #{E280} [
#{A8} ;-- U+2028 (Line separator)
| #{A9} ;-- U+2029 (Paragraph separator)
]
]
counted-newline: [pos: #"^/" (line: line + 1)]
ws-no-count: [(count?: no) ws (count?: yes)]
any-ws: [pos: any ws]
symbol-rule: [
(stop: [not-word-char | ws-no-count | control-char | tag-char] otag: #"<" ot: none)
some [
otag ot: [#"/" (otag: [end skip] ot: back ot) :ot | none] ;-- a</b>
| #">" ct: (if ot [otag: [end skip] ct: back ot]) :ct ;-- a<b>
| UTF8-filtered-char
] e:
]
begin-symbol-rule: [ ;-- 1st char in symbols is restricted
(stop: [not-word-1st | ws-no-count | control-char])
UTF8-filtered-char
opt symbol-rule
]
path-rule: [
pos: slash :pos ( ;-- path detection barrier
stack/allocate block! 4
stack/push to type copy/part s e ;-- push 1st path element
)
some [
slash
s: [
integer-number-rule
| begin-symbol-rule (type: word!)
| paren-rule (type: paren!)
| #":" s: begin-symbol-rule (type: get-word!)
;@@ add more datatypes here
] (
stack/push either type = paren! [ ;-- append path element
value
][
to type copy/part s e
]
type: path!
)
]
opt [#":" (type: set-path!)]
e: [path-end | ws-no-count | end | (pos: path throw-error)] :e ;-- detect invalid tail characters
(value: stack/pop type)
]
word-rule: [
(type: word!)
#"%" [ws-no-count | pos: file-end :pos | end] (value: "%") ;-- special case for remainder op!
| path: s: begin-symbol-rule [
url-rule
| path-rule ;-- path matched
| (value: copy/part s e) ;-- word matched
opt [#":" (type: set-word!)]
]
]
get-word-rule: [
#":" (type: get-word!) s: begin-symbol-rule [
path-rule (
value/1: to get-word! value/1 ;-- workaround missing get-path! in R2
)
| (
type: get-word!
value: copy/part s e ;-- word matched
)
]
]
lit-word-rule: [
#"'" (type: word!) [
#"/" (type: lit-word! value: "/")
| s: begin-symbol-rule [
path-rule (type: lit-path!) ;-- path matched
| (
type: lit-word!
value: copy/part s e ;-- word matched
)
]
][s: #":" :s (throw-error) | none]
]
map-rule: [
"#(" (stack/allocate block! 10) any-value #")" (
stack/prefix #!map!
value: stack/pop block!
)
]
issue-rule: [#"#" (type: issue!) s: symbol-rule]
refinement-rule: [slash (type: refinement!) s: symbol-rule]
slash-rule: [s: [slash opt slash] e:]
hexa-rule: [2 8 hexa e: #"h" pos: [integer-end | ws-no-count | end ] :pos (type: integer!)]
sticky-word-rule: [ ;-- protect from sticky words typos
mark: [integer-end | ws-no-count | end | (pos: s throw-error)] :mark
]
tuple-value-rule: [
(type: tuple!)
byte dot byte 1 12 [dot byte] e:
]
tuple-rule: [tuple-value-rule sticky-word-rule]
time-rule: [
s: positive-integer-rule [
decimal-number-rule (value: as-time 0 value load-number copy/part s e neg?) ;-- mm:ss.dd
| (value2: load-number copy/part s e) [
#":" s: positive-integer-rule opt decimal-number-rule
(value: as-time value value2 load-number copy/part s e neg?) ;-- hh:mm:ss[.dd]
| (value: as-time value value2 0 neg?) ;-- hh:mm
]
] (type: time!)
]
month-rule: [(m: none)
"January" (m: 1)
| "February" (m: 2)
| "March" (m: 3)
| "April" (m: 4)
| "May" (m: 5)
| "June" (m: 6)
| "July" (m: 7)
| "August" (m: 8)
| "September" (m: 9)
| "October" (m: 10)
| "November" (m: 11)
| "December" (m: 12)
]
mon-rule: [(m: none)
"Jan" (m: 1)
| "Feb" (m: 2)
| "Mar" (m: 3)
| "Apr" (m: 4)
| "May" (m: 5)
| "Jun" (m: 6)
| "Jul" (m: 7)
| "Aug" (m: 8)
| "Sep" (m: 9)
| "Oct" (m: 10)
| "Nov" (m: 11)
| "Dec" (m: 12)
]
day-year-rule: [
(neg?: no) opt [#"-" (neg?: yes)]
s: 3 4 digit e: (year: load-number copy/part s e if neg? [year: 65536 - year])
| 1 2 digit e: (
value: load-number copy/part s e no
either day [year: value + pick [2000 1900] 50 > value][day: value]
)
]
date-rule: [
pos: [opt #"-" 1 4 digit date-sep | 8 digit #"T"] :pos [ ;-- quick lookhead
s: 8 digit ee: #"T" ( ;-- yyyymmddT
year: load-number copy/part s 4
month: load-number copy/part skip s 4 2
day: load-number copy/part skip s 6 2
date: make date! reduce [day month year]
) :ee
| day-year-rule sep: date-sep (sep: sep/1) [
s: 1 2 digit e: (month: load-number copy/part s e no)
| some alpha e: (
fail?: either all [parse/all copy/part s e [month-rule | mon-rule] m][month: m none][[end skip]]
) fail?
]
sep day-year-rule (
fail?: either all [day month year][
date: make date! reduce [day month year]
none
][[end skip]]
) fail?
| s: 4 digit #"-" (
year: load-number copy/part s 4
date: make date! reduce [1 1 year]
)[
"W" s: 2 digit (ee: none) opt [#"-" ee: non-zero] ( ;-- yyyy-Www
date: to-iso-week date load-number copy/part s 2
if ee [date: to-weekday date to integer! s/4 - #"0"] ;-- yyyy-Www-d
)
| s: 3 digit (date: date + (load-number copy/part s 3) - 1) ;-- yyyy-ddd
] (month: -1)
](
type: date!
if all [
month <> -1 any [date/year <> year date/month <> month date/day <> day]
][throw-error]
day: month: year: none
) opt [
time-sep (ee: no) [
s: 6 digit opt [#"." 1 9 digit ee:] ( ;-- Thhmmss[.sss]
hour: load-number copy/part s 2
mn: load-number copy/part skip s 2 2
sec: load-number either ee [copy/part skip s 4 ee][copy/part skip s 4 2]
date/time: as-time hour mn sec no
)
| 4 digit ( ;-- Thhmm
hour: load-number copy/part s 2
mn: load-number copy/part skip s 2 2
date/time: as-time hour mn 0 no
)
| s: positive-integer-rule (value: load-number copy/part s e)
#":" [(neg?: no) time-rule (date/time: value) | (throw-error)]
]
opt [
#"Z" | [#"-" (neg?: yes) | #"+" (neg?: no)][
s: 4 digit ( ;-- +/-hhmm
hour: load-number copy/part s e: skip s 2
mn: load-number copy/part e e: skip e 2
)
| 1 2 digit e: (hour: load-number copy/part s e mn: none) ;-- +/-h, +/-hh
opt [#":" s: 2 digit e: (mn: load-number copy/part s e)]
](
either all [mn find [15 45] mn: round/floor/to mn 15][
date: reduce [#!date! date as-time hour mn 0 neg?] ;-- special encoding for 15/45
][
date/zone: as-time hour any [mn 0] 0 neg?
]
)
]
] sticky-word-rule (value: date)
]
positive-integer-rule: [digit any digit e: (type: integer!)]
integer-number-rule: [
opt [#"-" (neg?: yes) | #"+" (neg?: no)] digit any [digit | #"'" digit] e:
(type: integer!)
]
integer-rule: [
pos: decimal-special e: ;-- escape path for NaN, INFs
(type: issue! value: load-number copy/part s e)
| (neg?: no) integer-number-rule
opt [decimal-number-rule | decimal-exp-rule e: (type: decimal!)]
opt [#"%" e: (type: issue!)]
sticky-word-rule
(value: load-number copy/part s e)
opt [
[#"x" | #"X"] (
type: pair!
value2: to pair! reduce [value 0]
)
[s: integer-number-rule | (type: pair! throw-error)]
mark: [pair-end | ws-no-count | end | (type: pair! throw-error)] :mark
(value2/2: load-number copy/part s e value: value2)
]
opt [#":" [time-rule | (throw-error)]]
]
decimal-special: [
s: "-0.0" | (neg?: no) opt [#"-" (neg?: yes)] "1.#" s: [
[[#"N" | #"n"] [#"a" | #"A"] [#"N" | #"n"]]
| [[#"I" | #"i"] [#"N" | #"n"] [#"F" | #"f"]]
]
]
decimal-exp-rule: [
[[#"e" | #"E"] opt [#"-" | #"+"] 1 3 digit]
]
decimal-number-rule: [
[dot | comma] digit any [digit | #"'" digit]
opt decimal-exp-rule e: (type: decimal!)
]
decimal-rule: [
decimal-number-rule opt [#"%" e: (type: issue!)]
sticky-word-rule
]
block-rule: [#"[" (stack/allocate block! 10) any-value #"]" (value: stack/pop block!)]
paren-rule: [#"(" (stack/allocate paren! 10) any-value #")" (value: stack/pop paren!)]
escaped-char: [
"^^(" [
[ ;-- special case first
"null" (value: #"^(00)")
| "back" (value: #"^(08)")
| "tab" (value: #"^(09)")
| "line" (value: #"^(0A)")
| "page" (value: #"^(0C)")
| "esc" (value: #"^(1B)")
| "del" (value: #"^~")
]
| pos: [2 6 hexa-char] e: ( ;-- Unicode values allowed up to 10FFFFh
either rs? [
value: to-char to-integer debase/base copy/part pos e 16
][value: encode-UTF8-char pos e]
)
] #")"
| #"^^" [
[
#"/" (value: #"^/")
| #"-" (value: #"^-")
| #"~" (value: #"^(del)")
| #"^^" (value: #"^^") ;-- caret escaping case
| #"{" (value: #"{")
| #"}" (value: #"}")
| #"^"" (value: #"^"")
]
| pos: caret-char (value: pos/1 - 64)
]
]
char-rule: [
{#"} (type: char!) [
s: escaped-char
| copy value UTF8-printable (value: as-binary value)
| #"^-" (value: s/1)
] {"}
]
line-string: [
{"} s: (type: string! stop: [not-str-char | newline-char])
any [{^^"} | escaped-char | UTF8-filtered-char]
e: {"}
]
nested-curly-braces: [
(cnt: 1 fail?: none)
any [[
counted-newline
| "^^^^"
| "^^{"
| "^^}"
| #"{" (cnt: cnt + 1)
| e: #"}" (if zero? cnt: cnt - 1 [fail?: [end skip]])
| UTF8-char
] fail?
]
#"}" (old-line: line)
]
multiline-string: [#"{" s: (type: string!) nested-curly-braces]
string-rule: [line-string | multiline-string]
tag-rule: [
#"<" s: not-tag-1st (type: tag!)
any [#"^"" thru #"^"" | #"'" thru #"'" | not-tag-char] e: #">"
]
email-rule: [
(stop: [not-email-char])
s: opt [some UTF8-filtered-char] #"@" (type: email!)
any UTF8-filtered-char e: (value: dehex copy/part s e)
]
base-2-rule: [
"2#{" (type: binary!) [
s: any [counted-newline | 8 [#"0" | #"1" ] | ws-no-count | comment-rule]
e: #"}" (base: 2)
| (pos: skip s -3 throw-error)
]
]
base-16-rule: [
opt "16" "#{" (type: binary!) [
s: any [counted-newline | 2 hexa-char | ws-no-count | comment-rule]
e: #"}" (base: 16)
| (pos: skip s -2 throw-error)
]
]
base-64-rule: [
"64#{" (type: binary!) [
s: any [counted-newline | base64-char | ws-no-count | comment-rule]
e: #"}" (
cnt: offset? s e
if all [0 < cnt cnt < 4][pos: skip s -4 throw-error]
base: 64
)
| (pos: skip s -4 throw-error)
]
]
binary-rule: [[base-16-rule | base-64-rule | base-2-rule] (old-line: line)]
file-rule: [
pos: #"%" (type: file! stop: [not-file-char | ws-no-count]) [
#"{" (throw-error)
| line-string e: (value: encode-file s e)
| s: any UTF8-filtered-char e: (value: copy/part s e)
]
]
url-rule: [
#":" (type: url! stop: [not-url-char | ws-no-count])
some UTF8-filtered-char e: (value: dehex copy/part s e)
]
escaped-rule: [
"#["
any-value
; any-ws [
; "true" (value: true)
; | "false" (value: false)
; | s: [
; "none!" | "logic!" | "block!" | "integer!" | "word!"
; | "set-word!" | "get-word!" | "lit-word!" | "refinement!"
; | "binary!" | "string!" | "char!" | "bitset!" | "path!"
; | "set-path!" | "lit-path!" | "native!" | "action!"
; | "issue!" | "paren!" | "function!"
; ] e: (value: get to word! copy/part s e)
; | "none" (value: none)
; ] any-ws
#"]"
]
comment-rule: [#";" [to #"^/" | to end]]
wrong-end: [(
ending: either 1 < length? stack/stk [
value: switch type?/word stack/top [
block! [#"]"]
paren! [#")"]
]
first [(throw-error/with ["missing" mold value "character"])]
][none]
)
ending
]
literal-value: [
pos: (e: none) s: [
comment-rule
| escaped-rule (stack/push value)
| tuple-rule (stack/push load-tuple copy/part s e)
| hexa-rule (stack/push decode-hexa copy/part s e)
| binary-rule (stack/push load-binary s e base)
| email-rule (stack/push to email! value)
| date-rule (stack/push value)
| integer-rule (stack/push value)
| decimal-rule (stack/push load-decimal copy/part s e)
| tag-rule (stack/push to tag! copy/part s e)
| word-rule (stack/push to type value)
| lit-word-rule (stack/push to type value)
| get-word-rule (stack/push to type value)
| refinement-rule (stack/push to refinement! copy/part s e)
| slash-rule (stack/push to word! copy/part s e)
| file-rule (stack/push load-file value)
| char-rule (stack/push decode-UTF8-char value)
| block-rule (stack/push value)
| paren-rule (stack/push value)
| string-rule (stack/push load-string s e)
| map-rule (stack/push value)
| issue-rule (stack/push to issue! copy/part s e)
]
]
any-value: [pos: any [literal-value | ws]]
header: [
pos: ["Red/System" | "Red" | "REBOL"]
any-ws block-rule (stack/push value)
| (throw-error/with "Invalid Red/REBOL program") end skip
]
program: [
pos: opt UTF-8-BOM
header
any-value
opt wrong-end
]
;====== Helper functions ======
stack: context [
stk: []
nl?: no
allocate: func [type [datatype!] size [integer!] /local new pos][
pos: insert/only tail stk new: make type size
if nl? [new-line back pos yes nl?: no]
new
]
prefix: func [value][insert/only last stk :value]
push: func [value][
value: insert/only tail last stk :value
if nl? [new-line back value yes nl?: no]
value
]
pop: func [type [datatype!] /local pos][
pos: back tail stk
nl?: new-line? pos
either any [type = path! type = set-path!][
change/only pos to type pos/1
][
if type <> type? pos/1 [
throw-error/with ["invalid" mold type "closing delimiter"]
]
]
also pos/1 remove pos
]
top: does [last stk]
reset: does [clear stk]
]
throw-error: func [/with msg [string! block!]][
print rejoin [
"*** Syntax Error: " either with [
uppercase/part reform msg 1
][
reform ["Invalid" mold type "value"]
]
"^/*** line: " line
"^/*** at: " mold copy/part pos 40
"^/*** in: " mold source-file
]
; either encap? [quit][halt]
]
pad-head: func [s [string!]][
head insert/dup s #"0" 8 - length? s
]
encode-UTF8-char: func [s [string!] e [string!] /local c code new][
c: debase/base pad-head copy/part s e 16
while [c/1 = 0][c: next c] ;-- trim heading zeros
code: to integer! c
case [
code <= 127 [
new: to char! code ;-- c <= 7Fh
]
code <= 2047 [ ;-- c <= 07FFh
new: (shift/left (shift code 6) or #"^(C0)" 8)
or (code and #"^(3F)") or #"^(80)"
]
code <= 65535 [ ;-- c <= FFFFh
new: (shift/left (shift code 12) or #"^(E0)" 16)
or (shift/left (shift code 6) and #"^(3F)" or #"^(80)" 8)
or (code and #"^(3F)") or #"^(80)"
]
code <= 1114111 [ ;-- c <= 10FFFFh
new: (shift/left (shift code 18) or #"^(F0)" 24)
or (shift/left (shift code 12) and #"^(3F)" or #"^(80)" 16)
or (shift/left (shift code 6) and #"^(3F)" or #"^(80)" 8)
or (code and #"^(3F)") or #"^(80)"
]
'else [
throw-error/with "Codepoints above U+10FFFF are not supported"
]
]
if integer? new [
new: debase/base to-hex new 16
remove-each byte new [byte = #"^(null)"]
]
new
]
decode-UTF8-char: func [value /redbin][
if all [not redbin char? value][return encode-char to integer! value]
value: switch/default length? value [
1 [value]
2 [
value: value and #{1F3F}
value: add shift/left value/1 6 value/2
]
3 [
value: value and #{0F3F3F}
value: add add
shift/left value/1 12
shift/left value/2 6
value/3
]
4 [
value: value and #{073F3F3F}
value: add add add
shift/left value/1 18
shift/left value/2 12
shift/left value/3 6
value/4
]
][
throw-error/with "Unsupported or invalid UTF-8 encoding"
]
either redbin [value][
encode-char to integer! value ;-- special encoding for Unicode char!
]
]
decode-UTF8-string: func [str [string!] /local new s e][
new: make string! length? str
parse/all str [
some [
s: UTF8-char e: (
append new debase/base skip decode-UTF8-char as-binary copy/part s e 7 16
)
]
]
head change/part str new tail str
]
encode-char: func [value [integer!]][
head insert to-hex value #"'"
]
decode-hexa: func [s [string!]][
to integer! debase/base s 16
]
as-time: func [h [integer!] m [integer!] s [integer! decimal!] neg? [logic!] /local t][
if any [all [h <> 0 m < 0] all [s s < 0]][type: time! throw-error]
t: to time! reduce [abs h abs m abs s]
either neg? [negate t][t]
]
to-weekday: func [d [date!] wd [integer!]][
if negative? wd: wd - d/weekday [wd: 7 + wd]
d + wd
]
to-iso-week: func [d [date!] w [integer!] /local wd d1][
d1: make date! reduce [1 1 d/year]
wd: d1/weekday
d1 + (w - 1 * 7 + (either wd < 5 [1][8]) - wd)
]
load-tuple: func [s [string!] /local new byte p e][
new: join make issue! 1 + length? s #"~"
byte: [p: 1 3 digit e: (append new skip to-hex load copy/part p e 6)]
unless parse s [byte 2 11 [dot byte]][throw-error]
new
]
load-number: func [s [string!]][
switch/default type [
#[datatype! decimal!][s: load-decimal s]
#[datatype! issue! ][
if s = "-0.0" [s: "0-"] ;-- re-encoded for consistency
s: to issue! either #"%" = last s [s][join "." s]
if neg? [append s #"-"]
]
][
unless find [integer! decimal!] type?/word s: to integer! s [throw-error]
]
s
]
load-decimal: func [s [string!]][
unless attempt [s: to decimal! s][throw-error]
s
]
load-string: func [s [string!] e [string!] /local new filter][
new: make string! offset? s e ;-- allocated size close to final size
filter: get pick [UTF8-char UTF8-filtered-char] s/-1 = #"{"
parse/all/case copy/part s e [
any [
escaped-char (insert tail new value)
| s: filter e: (insert/part tail new s e)
] ;-- exit on matching " or }
]
new
]
load-binary: func [s [string!] e [string!] base [integer!] /local new str][
new: make string! offset? s e ;-- allocated size above final size
parse/all/case s [
some [
copy str some base64-char (insert tail new str)
| ws | comment-rule
| #"}" end skip
]
]
new: debase/base new base
if none? new [throw-error]
new
]
load-file: func [s [string!]][
parse s [any [#"%" [2 hexa | (pos: skip pos negate 1 + length? s throw-error)] | skip]]
to file! replace/all dehex s #"\" #"/"
]
encode-file: func [s [string!] e [string!]][
replace/all copy/part s back e "%" "%25"
]
process: func [src [string! binary!] /local blk][
old-line: line: 1
count?: yes
blk: stack/allocate block! 100 ;-- root block
unless parse/all/case src program [throw-error]
stack/reset
blk
]
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment