Skip to content

Instantly share code, notes, and snippets.

@aryszka
Created June 22, 2017 17:18
Show Gist options
  • Save aryszka/25a7f49973b67b43e0dcb4ef5aaffb07 to your computer and use it in GitHub Desktop.
Save aryszka/25a7f49973b67b43e0dcb4ef5aaffb07 to your computer and use it in GitHub Desktop.
regexp replace in a stream, buffering only the necessary amount of bytes, but all if there is no match
/*
Try:
cat | go run rxstream.go '(?ms)(.*)42' '{"chars before 42": "$1"}'
And input:
abc42abc42
*/
package main
import (
"io"
"os"
"regexp"
)
// replace with pattern
// returns the number of matches
func streamReplaceRx(
w io.Writer,
r io.Reader,
rx *regexp.Regexp,
replacement []byte,
readBuf []byte,
) (int, error) {
var (
matchCount int
matchBuf []byte
eof bool
)
readLoop:
for {
if eof {
// output the remaining buffer
_, err := w.Write(matchBuf)
return matchCount, err
}
n, err := r.Read(readBuf)
if err != nil {
if err == io.EOF {
// depending on the reader implementation, we cannot be sure if any bytes were
// read, so we need to do a matching round before returning
eof = true
err = nil
} else {
return matchCount, err
}
}
// store what was read
matchBuf = append(matchBuf, readBuf[:n]...)
// find all the matches in the currently available input
for {
// find the next match, with submatches for template replacement
m := rx.FindSubmatchIndex(matchBuf)
// if there is no match, we need to read more, because we don't know if the expression
// could match a longer input
if len(m) == 0 {
continue readLoop
}
matchCount++
// the unmatched part of the input
if _, err := w.Write(matchBuf[:m[0]]); err != nil {
return matchCount, err
}
// avoid looping around zero-length matches by reading more
if m[0] == m[1] {
continue readLoop
}
// the replacement
if _, err := w.Write(rx.Expand(nil, replacement, matchBuf, m)); err != nil {
return matchCount, err
}
// remove until the matched part and continue checking for matches in the available
// input
matchBuf = matchBuf[m[1]:]
}
}
}
func main() {
if len(os.Args) < 3 {
os.Exit(2)
}
rx := regexp.MustCompile(os.Args[1])
readBuf := make([]byte, 4)
streamReplaceRx(
os.Stdout,
os.Stdin,
rx,
[]byte(os.Args[2]),
readBuf,
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment