Skip to content

Instantly share code, notes, and snippets.

@greggirwin
Last active June 2, 2016 03:55
Show Gist options
  • Save greggirwin/66d7c6892fc310097cd91ab354189542 to your computer and use it in GitHub Desktop.
Save greggirwin/66d7c6892fc310097cd91ab354189542 to your computer and use it in GitHub Desktop.
An updated `split` function for Red
map-each: function [
"Evaluates body for each value(s) in a series, returning all results."
'word [word! block!] "Word, or words, to set on each iteration"
data [series! map!]
body [block!]
] [
collect [
foreach :word data [
if not unset? set/any 'tmp do body [keep/only :tmp]
]
]
]
collect: function [
"Collect in a new block all the values passed to KEEP function from the body block"
body [block!] "Block to evaluate"
/into "Insert into a buffer instead (returns position after insert)"
collected [series!] "The buffer series (modified)"
][
keep: func [v /only][either only [append/only collected v][append collected v]]
unless collected [collected: make block! 16]
parse body rule: [ ;-- selective binding (needs BIND/ONLY support)
any [pos: ['keep | 'collected] (pos/1: bind pos/1 'keep) | any-string! | into rule | skip]
]
do body
either into [collected][head collected]
]
split: function [
"Split a series into pieces; fixed or variable size, fixed number, or at delimiters"
series [series!] "The series to split"
;!! If we support /at, dlm could be any-value.
dlm ;[block! integer! char! bitset! any-string! any-function!] "Split size, delimiter(s), predicate, or rule(s)."
/parts "If dlm is an integer, split into n pieces, rather than pieces of length n."
/at "Split into 2, at the index position if an integer or the first occurrence of the dlm"
][
if any-function? :dlm [
res: reduce [ copy [] copy [] ]
foreach value series [
append/only pick res make logic! dlm :value :value
]
return res
]
if at [
return reduce either integer? dlm [
[
copy/part series dlm
copy system/words/at series dlm + 1
]
][
;-- Without adding a /tail refinement, we don't know if they want
; to split at the head or tail of the delimiter, so we'll exclude
; the delimiter from the result entirely. They know what the dlm
; was that they passed in, so they can add it back to either side
; of the result if they want to.
[
copy/part series find series :dlm
copy find/tail series :dlm
]
]
]
;print ['split 'parts? parts mold series mold dlm]
either all [block? dlm parse dlm [some integer!]][
map-each len dlm [
either positive? len [
copy/part series series: skip series len
][
series: skip series negate len
() ;-- return unset so that nothing is added to output
]
]
][
size: dlm ;-- alias for readability
res: collect [
;print ['split 'parts? parts mold series mold dlm newline]
parse series case [
all [integer? dlm parts][
if size < 1 [cause-error 'Script 'invalid-arg size]
count: size - 1
piece-size: to integer! round/down divide length? series size
if zero? piece-size [piece-size: 1]
[
count [copy series piece-size skip (keep/only series)]
copy series to end (keep/only series)
]
]
integer? dlm [
if size < 1 [cause-error 'Script 'invalid-arg size]
[any [copy series 1 size skip (keep/only series)]]
]
'else [ ;-- = any [bitset? dlm any-string? dlm char? dlm]
[any [mk1: some [mk2: dlm break | skip] (keep/only copy/part mk1 mk2)]]
]
]
]
;-- Special processing, to handle cases where they spec'd more items in
; /parts than the series contains (so we want to append empty items),
; or where the dlm was a char/string/charset and it was the last char
; (so we want to append an empty field that the above rule misses).
fill-val: does [copy either any-block? series [ [] ][ "" ]]
add-fill-val: does [append/only res fill-val]
case [
all [integer? size parts][
;-- If the result is too short, i.e., less items than 'size, add
; empty items to fill it to 'size.
; We loop here, because insert/dup doesn't copy the value inserted.
if size > length? res [
loop (size - length? res) [add-fill-val]
]
]
;-- integer? size
; If they spec'd an integer size, but did not use /parts, there is
; no special filing to be done. The final element may be less than
; size, which is intentional.
;--
'else [ ;-- = any [bitset? dlm any-string? dlm char? dlm]
;-- If the last thing in the series is a delimiter, there is an
; implied empty field after it, which we add here.
case [
bitset? dlm [
;-- ATTEMPT is here because LAST will return NONE for an
; empty series, and finding none in a bitest is not allowed.
if attempt [find dlm last series][add-fill-val]
]
char? dlm [
if dlm = last series [add-fill-val]
]
string? dlm [
if all [
find series dlm
empty? find/last/tail series dlm
][add-fill-val]
]
]
]
]
res
]
]
test: func [block expected-result /local res err] [
if error? set/any 'err try [
print [mold/only :block newline tab mold res: do block]
if res <> expected-result [print [tab 'FAILED! tab 'expected mold expected-result]]
][
print [mold/only :block newline tab "ERROR!" mold err]
]
]
test [split "1234567812345678" 4] ["1234" "5678" "1234" "5678"]
test [split "1234567812345678" 3] ["123" "456" "781" "234" "567" "8"]
test [split "1234567812345678" 5] ["12345" "67812" "34567" "8"]
test [split/parts [1 2 3 4 5 6] 2] [[1 2 3] [4 5 6]]
test [split/parts "1234567812345678" 2] ["12345678" "12345678"]
test [split/parts "1234567812345678" 3] ["12345" "67812" "345678"]
test [split/parts "1234567812345678" 5] ["123" "456" "781" "234" "5678"]
; Dlm longer than series
test [split/parts "123" 6] ["1" "2" "3" "" "" ""] ;or ["1" "2" "3"]
test [split/parts [1 2 3] 6] [[1] [2] [3] [] [] []] ;or [1 2 3]
;test [split/parts [1 2 3] 6] [[1] [2] [3] none none none] ;or [1 2 3]
test [split [1 2 3 4 5 6] [2 1 3]] [[1 2] [3] [4 5 6]]
test [split "1234567812345678" [4 4 2 2 1 1 1 1]] ["1234" "5678" "12" "34" "5" "6" "7" "8"]
test [split first [(1 2 3 4 5 6 7 8 9)] 3] [(1 2 3) (4 5 6) (7 8 9)]
;!! Red doesn't have binary! yet
;test [split #{0102030405060708090A} [4 3 1 2]] [#{01020304} #{050607} #{08} #{090A}]
test [split [1 2 3 4 5 6] [2 1]] [[1 2] [3]]
test [split [1 2 3 4 5 6] [2 1 3 5]] [[1 2] [3] [4 5 6] []]
test [split [1 2 3 4 5 6] [2 1 6]] [[1 2] [3] [4 5 6]]
; Old design for negative skip vals
;test [split [1 2 3 4 5 6] [3 2 2 -2 2 -4 3]] [[1 2 3] [4 5] [6] [5 6] [3 4 5]]
; New design for negative skip vals
test [split [1 2 3 4 5 6] [2 -2 2]] [[1 2] [5 6]]
test [split "abc,de,fghi,jk" #","] ["abc" "de" "fghi" "jk"]
;!! Red doesn't have tag! yet
;test [split "abc<br>de<br>fghi<br>jk" <br>] ["abc" "de" "fghi" "jk"]
test [split "a.b.c" "."] ["a" "b" "c"]
test [split "c c" " "] ["c" "c"]
test [split "1,2,3" " "] ["1,2,3"]
test [split "1,2,3" ","] ["1" "2" "3"]
test [split "1,2,3," ","] ["1" "2" "3" ""]
test [split "1,2,3," charset ",."] ["1" "2" "3" ""]
test [split "1.2,3." charset ",."] ["1" "2" "3" ""]
test [split "-a-a" ["a"]] ["-" "-"]
test [split "-a-a'" ["a"]] ["-" "-" "'"]
;-------------------------------------------------------------------------------
; to/thru bitset! is broken in R3 now.
test [split "abc|de/fghi:jk" charset "|/:"] ["abc" "de" "fghi" "jk"]
; to/thru block! is broken in R3 now.
test [split "abc^M^Jde^Mfghi^Jjk" [crlf | #"^M" | newline]] ["abc" "de" "fghi" "jk"]
test [split "abc de fghi jk" [some #" "]] ["abc" "de" "fghi" "jk"]
;-------------------------------------------------------------------------------
test [split [1 2 3 4 5 6] :even?] [[2 4 6] [1 3 5]]
test [split [1 2 3 4 5 6] :odd?] [[1 3 5] [2 4 6]]
test [split [1 2.3 /a word "str" #iss x: :y] :refinement?] [[/a] [1 2.3 word "str" #iss x: :y]]
test [split [1 2.3 /a word "str" #iss x: :y] :number?] [[1 2.3] [/a word "str" #iss x: :y]]
test [split [1 2.3 /a word "str" #iss x: :y] :any-word?] [[/a word #iss x: :y] [1 2.3 "str"]]
;-------------------------------------------------------------------------------
test [split/at [1 2.3 /a word "str" #iss x: :y] 4] [[1 2.3 /a word] ["str" #iss x: :y]]
;!! Splitting /at with a non-integer excludes the delimiter from the result
test [split/at [1 2.3 /a word "str" #iss x: :y] "str"] [[1 2.3 /a word] [#iss x: :y]]
test [split/at [1 2.3 /a word "str" #iss x: :y] 'word] [[1 2.3 /a] ["str" #iss x: :y]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment