Red [
Title: "Partial grep implementation"
Purpose: {To search the input for lines containing a match
to the given pattern, specified as a regular expression}
Author: "Rudolf W. MEIJER"
Version: 0.4.0
Date: "24-Nov-2016"
Rights: "(c) Copyright 2016 Rudolf W. MEIJER"
History: [
[0.0.0 "2-Nov-2017" {Start of project}]
[0.1.0 "9-Nov-2017" {First incomplete working version}]
[0.2.0 "22-Nov-2016" {Ranges and repetition implemented}]
[0.3.0 "23-Nov-2016" {Refactoring}]
[0.4.0 "24-Nov-2017" {Added /quiet option}]
Notes: {see GNU Grep 2.26,,
also the grep(1) Linux man page, e.g.,
Language: 'English
grep-impl: context [
; charset constants
_ASCII: charset [#"^(00)" - #"^(7F)"]
_upper: charset [#"A" - #"Z"]
_lower: charset [#"a" - #"z"]
_alpha: union _upper _lower
_digit: charset [#"0" - #"9"]
_xdigit: union _digit charset [#"A" - #"F" #"a" - #"f"]
_alnum: union _alpha _digit
_blank: charset " ^-"
_space: union _blank charset [#"^(0A)" - #"^(0D)"]
_cntrl: charset [#"^(00)" - #"^(1F)" #"^(7F)"]
_punct: charset {!"#$%&'()*+,-./:;<=>?@[\]^^_`{|}~}
_graph: union _alnum _punct
_print: union _graph charset " "
_meta: charset "|()?*+.[]{}\"
_escape: charset "|()?*+.[{\"
_atom: intersect _ASCII complement _meta
char-classes: reduce [
"upper" _upper
"lower" _lower
"alpha" _alpha
"digit" _digit
"xdigit" _xdigit
"alnum" _alnum
"blank" _blank
"space" _space
"cntrl" _cntrl
"punct" _punct
"graph" _graph
"print" _print
rules: none
options: context [
case: none
invert: none
count: none
quiet: none
stats: context [
files: 0
lines: 0
matched: 0
time: 0
make-rules: func [
{takes a regex and constructs the corresponding Red parser rules
which will do the matching; returns the rules}
regex [string!] "the pattern (regex)"
/local res pattern rule-stk rng-nr limits ll lo hi fch frst rng r qf
ch nr cr el lc rst class clb chset escaped compl brexp
unless parse regex [some _ASCII] [
print "pattern error, non-ASCII character(s) found"
return none
; set up
pattern: copy regex
rule-stk: copy []
rng-nr: 0
res: copy []
insert/only rule-stk res
escaped: false
; main loop
while [not tail? pattern][
cr: first rule-stk ; current rule
ch: take pattern
case [
any [
find _atom ch
all [#")" = ch 1 = length? rule-stk]
all [#"\" = ch tail? pattern]
case [
any [
empty? cr
not string? last cr
insert tail cr form ch
string? last cr [
insert tail last cr ch
escaped: false
#"\" = ch [
unless find _escape first pattern [
print ["pattern error: meta character expected after \, found" first pattern]
return none
escaped: true
#"." = ch [
insert tail cr 'skip
#"(" = ch [
insert/only tail cr nr: copy []
insert/only rule-stk nr ; new rule
#")" = ch [
remove rule-stk
#"|" = ch [
insert tail cr '|
#"[" = ch [
fch: none
frst: true
compl: false
rng: false
brexp: make bitset! 128
while [not tail? pattern][
ch: take pattern
if all [frst #"^^" = ch][compl: true frst: false continue]
if all [not frst #"]" = ch][ break]
frst: false
either all [not rng #"-" = ch]
rng: true
either rng
either ch > fch
brexp: union brexp charset compose [(fch) - (ch)]
rng: false
fch: none
print ["pattern error: range inverted" fch "-" ch]
return none
either #"["= ch
either #":" = first pattern
remove pattern
unless clb: find pattern ":]" [ ; closing bracket
print ["pattern error: expected :], found" pattern]
return none
class: take/part pattern clb
remove/part pattern 2
unless chset: select char-classes class [
print ["pattern error: wrong character class" class]
return none
brexp: union brexp chset
print ["pattern error: expected :, found" first pattern]
return none
either fch
brexp: union brexp charset fch
fch: none
fch: ch
brexp: union brexp charset ch
either #"]" <> ch
print "pattern error: range not closed"
return none
if compl [brexp: complement brexp]
rng-nr: rng-nr + 1
r: to word! rejoin ["_range" rng-nr]
set r brexp
insert tail cr r
#"{" = ch [
unless clb: find pattern #"}" [
print ["pattern error: expected }, found" pattern]
return none
limits: split take/part pattern clb #","
ll: length? limits
if any [ll < 1 ll > 2][
print ["pattern error, expected one or two numbers, found" limits]
return none
unless lo: attempt [to integer! limits/1] [
print ["pattern error: expected number, found" limits/1]
return none
unless lo >= 0 [
print ["pattern error: negative number" lo]
return none
if 2 = ll [
unless hi: attempt [to integer! limits/2] [
print ["pattern error: expected number, found" limits/2]
return none
unless hi > lo [
print ["pattern error: upper bound lower than lower one" hi]
return none
el: last cr
either string? el
lc: last el
rst: head remove back tail el
either empty? rst
remove back tail cr
change back tail cr form rst
insert tail cr lo
if ll = 2 [insert tail cr hi]
insert tail cr form lc
insert back tail cr lo
if ll = 2 [insert back tail cr hi]
any [
#"?" = ch #"*" = ch #"+" = ch
qf: switch ch [
#"?" ['opt]
#"*" ['any]
#"+" ['some]
el: last cr
either string? el
lc: last el
rst: head remove back tail el
either empty? rst
remove back tail cr
change back tail cr form rst
insert tail cr qf
insert tail cr form lc
insert back tail cr qf
; print "after"
; print mold res
; final check for balanced ( )
unless 1 = length? rule-stk [
print "pattern error: unmatched parentheses"
return none
copy res
set 'grep func [
{partial implementation of Unix/Linux grep pattern matching engine
documentation consulted: GNU Grep 2.26 and grep(1) - Linux man page}
pattern [string!] "the pattern (extended regular expression) to match"
input [string! file! url! block!] {a single string, possibly containing
newlines, a file, a url or a block of strings, or files and/or urls}
/case "case-sensitive comparison"
/invert "print only non-matching lines"
/count "print a count of (non-)matching lines only"
/quiet "suppress printing when no lines found"
/local fst tm
if empty? input [
print "empty input"
if empty? pattern [
print "error, no pattern specified"
unless rules: make-rules pattern [
print "error in rules"
rules: head insert/only next copy [thru to end] rules
options/case: case
options/invert: invert
options/count: count
options/quiet: quiet
stats/lines: 0
stats/files: 0
stats/matched: 0
stats/time: now/time/precise
switch type?/word input [
string! [
report split input #"^/" "matching text lines"
file! [
either #"/" = first input
process reduce [input] %""
process reduce [input] what-dir
url! [
process reduce [input] %""
block! [
fst: first input
unless any [string? fst file? fst url? fst][
print ["error, string, file or url expected:" fst]
either string? fst
report input "matching text lines"
process input what-dir
tm: to integer! now/time/precise - stats/time * 1000
prin ["done, checked" stats/lines "lines in "]
unless stats/files = 0 [
prin [stats/files "files/urls in "]
print [tm "msec, found" stats/matched "matches"]
process: func [
{process a block of files/urls;
if some of the files are directories,
process the files in them recursively}
input [block!] "block of files/urls"
pdir [file!] "parent directory for files"
/local lines src
foreach src input [
if file? src [src: clean-path append copy pdir src]
case [
any [
url? src
all [file? src not dir? src]
either lines: attempt [read/lines src]
report lines ["matching" fmt length? lines 5 "lines from" mold src]
unless options/quiet [
print [mold src "cannot be read, skipped"]
file? src [ ; this is a dir!!!
process read src src
true [
print ["error, file or url expected, found:" type? src]
report: func [
{match and report according to options}
lines [block!] "block of lines to match"
text [string! block!] "heading for each string/file/url"
/local buffer line res line-count i
if any [not lines empty? lines ][exit]
if block? text [stats/files: stats/files + 1]
stats/lines: stats/lines + length? lines
buffer: copy []
line-count: 0
repeat i length? lines [
line: lines/:i
unless string? line [
print ["error, string expected, found:" type? line]
res: either options/case [parse/case line rules][parse line rules]
if options/invert [res: not res]
if res [
line-count: line-count + 1
insert tail buffer rejoin [fmt i 5 " " line]
stats/matched: stats/matched + line-count
if any [
line-count <> 0
not options/quiet
print text
either options/count
print [fmt line-count 5 "matching line(s) found"]
repeat i length? buffer [print buffer/:i]
fmt: func [
{convert an integer to a right aligned string of given size;
pad with blanks on left; if too narrow, show ***}
i [integer!] "the integer to convert"
s [integer!] "the size in characters"
/local f lf
s: max 1 s
f: form i
lf: length? f
either lf <= s
f: head insert/dup f #" " s - lf
f: head insert/dup copy "" #"*" s
