Skip to content

Instantly share code, notes, and snippets.

@AlbinoDrought
Created August 13, 2021 00:09
Show Gist options
  • Save AlbinoDrought/c292adc185ea6593338fb5b56ac89aa7 to your computer and use it in GitHub Desktop.
Save AlbinoDrought/c292adc185ea6593338fb5b56ac89aa7 to your computer and use it in GitHub Desktop.
Working around `bufio.ErrTooLong`: "bufio.Scanner: token too long" with and without Logrus

You can avoid a bufio.ErrTooLong error ("bufio.Scanner: token too long") by using something like this as your scanner.Split() function:

const maxTokenLength = bufio.MaxScanTokenSize / 2

func scanLinesOrGiveLong(data []byte, atEOF bool) (advance int, token []byte, err error) {
	advance, token, err = bufio.ScanLines(data, atEOF)
	if advance > 0 || token != nil || err != nil {
		// bufio.ScanLines found something, use it
		return
	}

	// bufio.ScanLines found nothing
	// if our buffer is still a reasonable size, continue scanning for regular lines
	if len(data) < maxTokenLength {
		return
	}

	// our buffer is getting massive, stop waiting for line breaks and return data now
	// this avoids bufio.ErrTooLong
	return maxTokenLength, data[0:maxTokenLength], nil
}

Like this:

package main

import (
	"bufio"
	"os"
)

func main() {
	scanner := bufio.NewScanner(os.Stdin)
	scanner.Split(scanLinesOrGiveLong)
	for scanner.Scan() {
		os.Stdout.Write(scanner.Bytes())
	}
}

I had this issue while using logrus.Write(), so I've included an extension for that below.

// Modification of https://github.com/sirupsen/logrus/blob/6cd8d684fda8caaa8d1fb8fa4b23b844850f4e7b/writer.go
// to support long-line bufio.Scanner.Split() function
// Original code under original MIT license at https://github.com/sirupsen/logrus/blob/6cd8d684fda8caaa8d1fb8fa4b23b844850f4e7b/LICENSE
// Modified code under the Unlicense, or public domain, or MIT, or whatever, it's like five lines of code
package logrusext
import (
"bufio"
"io"
"runtime"
"github.com/sirupsen/logrus"
)
// SafeWriterLevel returns an io.Writer that can be used to write arbitrary text to
// the logger at the given log level. This function mimics logrus.WriterLevel but
// works around bufio.ErrTooLong being thrown for long text without linebreaks
func SafeWriterLevel(logger *logrus.Logger, level logrus.Level) *io.PipeWriter {
return SafeEntryWriterLevel(logrus.NewEntry(logger), level)
}
// SafeEntryWriterLevel returns an io.Writer that can be used to write arbitrary text to
// the logger at the given log level. This function mimics logrus.Entry.WriterLevel but
// works around bufio.ErrTooLong being thrown for long text without linebreaks
func SafeEntryWriterLevel(entry *logrus.Entry, level logrus.Level) *io.PipeWriter {
reader, writer := io.Pipe()
var printFunc func(args ...interface{})
switch level {
case logrus.TraceLevel:
printFunc = entry.Trace
case logrus.DebugLevel:
printFunc = entry.Debug
case logrus.InfoLevel:
printFunc = entry.Info
case logrus.WarnLevel:
printFunc = entry.Warn
case logrus.ErrorLevel:
printFunc = entry.Error
case logrus.FatalLevel:
printFunc = entry.Fatal
case logrus.PanicLevel:
printFunc = entry.Panic
default:
printFunc = entry.Print
}
go entryWriterScanner(entry, reader, printFunc)
runtime.SetFinalizer(writer, writerFinalizer)
return writer
}
func entryWriterScanner(entry *logrus.Entry, reader *io.PipeReader, printFunc func(args ...interface{})) {
scanner := bufio.NewScanner(reader)
scanner.Split(scanLinesOrGiveLong) // custom split function
for scanner.Scan() {
printFunc(scanner.Text())
}
if err := scanner.Err(); err != nil {
entry.Errorf("Error while reading from Writer: %s", err)
}
reader.Close()
}
const maxTokenLength = bufio.MaxScanTokenSize / 2
func scanLinesOrGiveLong(data []byte, atEOF bool) (advance int, token []byte, err error) {
advance, token, err = bufio.ScanLines(data, atEOF)
if advance > 0 || token != nil || err != nil {
// bufio.ScanLines found something, use it
return
}
// bufio.ScanLines found nothing
// if our buffer is still a reasonable size, continue scanning for regular lines
if len(data) < maxTokenLength {
return
}
// our buffer is getting massive, stop waiting for line breaks and return data now
// this avoids bufio.ErrTooLong
return maxTokenLength, data[0:maxTokenLength], nil
}
func writerFinalizer(writer *io.PipeWriter) {
writer.Close()
}
package logrusext
import (
"bytes"
"testing"
"github.com/sirupsen/logrus"
)
func TestSafeWriterLevel(t *testing.T) {
// find a byte length that breaks original writer
var err error
b := []byte("meaty_guns-")
logger := logrus.New()
for err == nil {
b = bytes.Repeat(b, 2)
writer := logger.Writer()
_, err = writer.Write(b)
writer.Close()
}
logger.Info("These many bytes break the original writer: ", len(b))
// pass same bytes to safe writer, will work
writer := SafeWriterLevel(logger, logrus.InfoLevel)
_, err = writer.Write(b)
writer.Close()
if err != nil {
logger.Warn("safe writer didn't work :(")
t.Error(err)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment