Created
June 22, 2017 17:18
-
-
Save aryszka/25a7f49973b67b43e0dcb4ef5aaffb07 to your computer and use it in GitHub Desktop.
regexp replace in a stream, buffering only the necessary amount of bytes, but all if there is no match
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Try: | |
cat | go run rxstream.go '(?ms)(.*)42' '{"chars before 42": "$1"}' | |
And input: | |
abc42abc42 | |
*/ | |
package main | |
import ( | |
"io" | |
"os" | |
"regexp" | |
) | |
// replace with pattern | |
// returns the number of matches | |
func streamReplaceRx( | |
w io.Writer, | |
r io.Reader, | |
rx *regexp.Regexp, | |
replacement []byte, | |
readBuf []byte, | |
) (int, error) { | |
var ( | |
matchCount int | |
matchBuf []byte | |
eof bool | |
) | |
readLoop: | |
for { | |
if eof { | |
// output the remaining buffer | |
_, err := w.Write(matchBuf) | |
return matchCount, err | |
} | |
n, err := r.Read(readBuf) | |
if err != nil { | |
if err == io.EOF { | |
// depending on the reader implementation, we cannot be sure if any bytes were | |
// read, so we need to do a matching round before returning | |
eof = true | |
err = nil | |
} else { | |
return matchCount, err | |
} | |
} | |
// store what was read | |
matchBuf = append(matchBuf, readBuf[:n]...) | |
// find all the matches in the currently available input | |
for { | |
// find the next match, with submatches for template replacement | |
m := rx.FindSubmatchIndex(matchBuf) | |
// if there is no match, we need to read more, because we don't know if the expression | |
// could match a longer input | |
if len(m) == 0 { | |
continue readLoop | |
} | |
matchCount++ | |
// the unmatched part of the input | |
if _, err := w.Write(matchBuf[:m[0]]); err != nil { | |
return matchCount, err | |
} | |
// avoid looping around zero-length matches by reading more | |
if m[0] == m[1] { | |
continue readLoop | |
} | |
// the replacement | |
if _, err := w.Write(rx.Expand(nil, replacement, matchBuf, m)); err != nil { | |
return matchCount, err | |
} | |
// remove until the matched part and continue checking for matches in the available | |
// input | |
matchBuf = matchBuf[m[1]:] | |
} | |
} | |
} | |
func main() { | |
if len(os.Args) < 3 { | |
os.Exit(2) | |
} | |
rx := regexp.MustCompile(os.Args[1]) | |
readBuf := make([]byte, 4) | |
streamReplaceRx( | |
os.Stdout, | |
os.Stdin, | |
rx, | |
[]byte(os.Args[2]), | |
readBuf, | |
) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment