Skip to content

Instantly share code, notes, and snippets.

@iArnold
Forked from meijeru/%grep.red
Created November 25, 2016 22:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iArnold/e75b3ae233fad0cb73dd3fc44fc8caa2 to your computer and use it in GitHub Desktop.
Save iArnold/e75b3ae233fad0cb73dd3fc44fc8caa2 to your computer and use it in GitHub Desktop.
Red [
Title: "Partial grep implementation"
Purpose: {To search the input for lines containing a match
to the given pattern, specified as a regular expression}
Author: "Rudolf W. MEIJER"
File: %grep.red
Version: 0.4.0
Date: "24-Nov-2016"
Rights: "(c) Copyright 2016 Rudolf W. MEIJER"
History: [
[0.0.0 "2-Nov-2017" {Start of project}]
[0.1.0 "9-Nov-2017" {First incomplete working version}]
[0.2.0 "22-Nov-2016" {Ranges and repetition implemented}]
[0.3.0 "23-Nov-2016" {Refactoring}]
[0.4.0 "24-Nov-2017" {Added /quiet option}]
]
Notes: {see GNU Grep 2.26, http://www.gnu.org/software/grep/manual/grep,
also the grep(1) Linux man page, e.g. https://linux.die.net/man/1/grep,
and https://en.wikipedia.org/wiki/Regular_expression
}
Language: 'English
]
;---|----1----|----2----|----3----|----4----|----5----|----6----|----7----|-
grep-impl: context [
; charset constants
_ASCII: charset [#"^(00)" - #"^(7F)"]
_upper: charset [#"A" - #"Z"]
_lower: charset [#"a" - #"z"]
_alpha: union _upper _lower
_digit: charset [#"0" - #"9"]
_xdigit: union _digit charset [#"A" - #"F" #"a" - #"f"]
_alnum: union _alpha _digit
_blank: charset " ^-"
_space: union _blank charset [#"^(0A)" - #"^(0D)"]
_cntrl: charset [#"^(00)" - #"^(1F)" #"^(7F)"]
_punct: charset {!"#$%&'()*+,-./:;<=>?@[\]^^_`{|}~}
_graph: union _alnum _punct
_print: union _graph charset " "
_meta: charset "|()?*+.[]{}\"
_escape: charset "|()?*+.[{\"
_atom: intersect _ASCII complement _meta
char-classes: reduce [
"upper" _upper
"lower" _lower
"alpha" _alpha
"digit" _digit
"xdigit" _xdigit
"alnum" _alnum
"blank" _blank
"space" _space
"cntrl" _cntrl
"punct" _punct
"graph" _graph
"print" _print
]
rules: none
options: context [
case: none
invert: none
count: none
quiet: none
]
stats: context [
files: 0
lines: 0
matched: 0
time: 0
]
make-rules: func [
{takes a regex and constructs the corresponding Red parser rules
which will do the matching; returns the rules}
regex [string!] "the pattern (regex)"
/local res pattern rule-stk rng-nr limits ll lo hi fch frst rng r qf
ch nr cr el lc rst class clb chset escaped compl brexp
][
unless parse regex [some _ASCII] [
print "pattern error, non-ASCII character(s) found"
return none
]
; set up
pattern: copy regex
rule-stk: copy []
rng-nr: 0
res: copy []
insert/only rule-stk res
escaped: false
; main loop
while [not tail? pattern][
cr: first rule-stk ; current rule
ch: take pattern
case [
any [
escaped
find _atom ch
all [#")" = ch 1 = length? rule-stk]
all [#"\" = ch tail? pattern]
][
case [
any [
empty? cr
not string? last cr
][
insert tail cr form ch
]
string? last cr [
insert tail last cr ch
]
]
escaped: false
]
#"\" = ch [
unless find _escape first pattern [
print ["pattern error: meta character expected after \, found" first pattern]
return none
]
escaped: true
]
#"." = ch [
insert tail cr 'skip
]
#"(" = ch [
insert/only tail cr nr: copy []
insert/only rule-stk nr ; new rule
]
#")" = ch [
remove rule-stk
]
#"|" = ch [
insert tail cr '|
]
#"[" = ch [
fch: none
frst: true
compl: false
rng: false
brexp: make bitset! 128
while [not tail? pattern][
ch: take pattern
if all [frst #"^^" = ch][compl: true frst: false continue]
if all [not frst #"]" = ch][ break]
frst: false
either all [not rng #"-" = ch]
[
rng: true
][
either rng
[
either ch > fch
[
brexp: union brexp charset compose [(fch) - (ch)]
rng: false
fch: none
][
print ["pattern error: range inverted" fch "-" ch]
return none
]
][
either #"["= ch
[
either #":" = first pattern
[
remove pattern
unless clb: find pattern ":]" [ ; closing bracket
print ["pattern error: expected :], found" pattern]
return none
]
class: take/part pattern clb
remove/part pattern 2
unless chset: select char-classes class [
print ["pattern error: wrong character class" class]
return none
]
brexp: union brexp chset
][
print ["pattern error: expected :, found" first pattern]
return none
]
][
either fch
[
brexp: union brexp charset fch
fch: none
][
fch: ch
]
brexp: union brexp charset ch
]
]
]
]
either #"]" <> ch
[
print "pattern error: range not closed"
return none
][
if compl [brexp: complement brexp]
rng-nr: rng-nr + 1
r: to word! rejoin ["_range" rng-nr]
set r brexp
insert tail cr r
]
]
#"{" = ch [
unless clb: find pattern #"}" [
print ["pattern error: expected }, found" pattern]
return none
]
limits: split take/part pattern clb #","
ll: length? limits
if any [ll < 1 ll > 2][
print ["pattern error, expected one or two numbers, found" limits]
return none
]
unless lo: attempt [to integer! limits/1] [
print ["pattern error: expected number, found" limits/1]
return none
]
unless lo >= 0 [
print ["pattern error: negative number" lo]
return none
]
if 2 = ll [
unless hi: attempt [to integer! limits/2] [
print ["pattern error: expected number, found" limits/2]
return none
]
unless hi > lo [
print ["pattern error: upper bound lower than lower one" hi]
return none
]
]
el: last cr
either string? el
[
lc: last el
rst: head remove back tail el
either empty? rst
[
remove back tail cr
][
change back tail cr form rst
]
insert tail cr lo
if ll = 2 [insert tail cr hi]
insert tail cr form lc
][
insert back tail cr lo
if ll = 2 [insert back tail cr hi]
]
]
any [
#"?" = ch #"*" = ch #"+" = ch
][
qf: switch ch [
#"?" ['opt]
#"*" ['any]
#"+" ['some]
]
el: last cr
either string? el
[
lc: last el
rst: head remove back tail el
either empty? rst
[
remove back tail cr
][
change back tail cr form rst
]
insert tail cr qf
insert tail cr form lc
][
insert back tail cr qf
]
]
]
; print "after"
; print mold res
]
; final check for balanced ( )
unless 1 = length? rule-stk [
print "pattern error: unmatched parentheses"
return none
]
copy res
]
set 'grep func [
{partial implementation of Unix/Linux grep pattern matching engine
documentation consulted: GNU Grep 2.26 and grep(1) - Linux man page}
pattern [string!] "the pattern (extended regular expression) to match"
input [string! file! url! block!] {a single string, possibly containing
newlines, a file, a url or a block of strings, or files and/or urls}
/case "case-sensitive comparison"
/invert "print only non-matching lines"
/count "print a count of (non-)matching lines only"
/quiet "suppress printing when no lines found"
/local fst tm
][
if empty? input [
print "empty input"
exit
]
if empty? pattern [
print "error, no pattern specified"
exit
]
unless rules: make-rules pattern [
print "error in rules"
exit
]
rules: head insert/only next copy [thru to end] rules
options/case: case
options/invert: invert
options/count: count
options/quiet: quiet
stats/lines: 0
stats/files: 0
stats/matched: 0
stats/time: now/time/precise
switch type?/word input [
string! [
report split input #"^/" "matching text lines"
]
file! [
either #"/" = first input
[
process reduce [input] %""
][
process reduce [input] what-dir
]
]
url! [
process reduce [input] %""
]
block! [
fst: first input
unless any [string? fst file? fst url? fst][
print ["error, string, file or url expected:" fst]
exit
]
either string? fst
[
report input "matching text lines"
][
process input what-dir
]
]
]
tm: to integer! now/time/precise - stats/time * 1000
prin ["done, checked" stats/lines "lines in "]
unless stats/files = 0 [
prin [stats/files "files/urls in "]
]
print [tm "msec, found" stats/matched "matches"]
]
process: func [
{process a block of files/urls;
if some of the files are directories,
process the files in them recursively}
input [block!] "block of files/urls"
pdir [file!] "parent directory for files"
/local lines src
][
foreach src input [
if file? src [src: append copy pdir src]
case [
any [
url? src
all [file? src not dir? src]
][
either lines: attempt [read/lines src]
[
report lines ["matching" fmt length? lines 5 "lines from" mold src]
][
unless options/quiet [
print [mold src "cannot be read, skipped"]
]
]
]
file? src [ ; this is a dir!!!
process read src src
]
true [
print ["error, file or url expected, found:" type? src]
]
]
]
]
report: func [
{match and report according to options}
lines [block!] "block of lines to match"
text [string! block!] "heading for each string/file/url"
/local buffer line res line-count i
][
if any [not lines empty? lines ][exit]
if block? text [stats/files: stats/files + 1]
stats/lines: stats/lines + length? lines
buffer: copy []
line-count: 0
repeat i length? lines [
line: lines/:i
unless string? line [
print ["error, string expected, found:" type? line]
exit
]
res: either options/case [parse/case line rules][parse line rules]
if options/invert [res: not res]
if res [
line-count: line-count + 1
insert tail buffer rejoin [fmt i 5 " " line]
]
]
stats/matched: stats/matched + line-count
if any [
line-count <> 0
not options/quiet
][
print text
either options/count
[
print [fmt line-count 5 "matching line(s) found"]
][
repeat i length? buffer [print buffer/:i]
]
]
]
fmt: func [
{convert an integer to a right aligned string of given size;
pad with blanks on left; if too narrow, show ***}
i [integer!] "the integer to convert"
s [integer!] "the size in characters"
/local f lf
][
s: max 1 s
f: form i
lf: length? f
either lf <= s
[
f: head insert/dup f #" " s - lf
][
f: head insert/dup copy "" #"*" s
]
]
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment