Skip to content

Instantly share code, notes, and snippets.

@utrack
Last active January 15, 2020 10:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save utrack/3629bcf8106ed2c3a4c4289cc976b803 to your computer and use it in GitHub Desktop.
Save utrack/3629bcf8106ed2c3a4c4289cc976b803 to your computer and use it in GitHub Desktop.
bcp data slurper
package main
import (
"bufio"
"encoding/csv"
"os"
)
func main() {
bufSize := 2 ^ 15
filename := os.Args[1]
f, err := os.Open(filename)
if err != nil {
panic(err)
}
rdr := bufio.NewReaderSize(f, bufSize)
w := csv.NewWriter(bufio.NewWriter(os.Stdout))
scanner := bufio.NewScanner(rdr)
scanner.Split(scanTok)
var record []string
append := func(v string) {
if len(v) == 0 {
record = append(record, "\\N")
return
}
if v == "\000" {
record = append(record, "")
} else {
record = append(record, v)
}
}
for scanner.Scan() {
text := scanner.Text()
l := len(text)
if l == 0 {
continue
}
sig := text[l-1]
text = text[:l-1]
switch sig {
case '\001':
append(text)
case '\002':
append(text)
w.Write(record)
record = record[:0]
case '\000':
append("\000")
}
}
w.Flush()
}
func scanTok(data []byte, atEOF bool) (advance int, token []byte, err error) {
for i := 0; i < len(data); i++ {
switch data[i] {
case '\000', '\001', '\002':
return i + 1, data[:i+1], nil
}
}
if !atEOF {
return 0, nil, nil
}
// There is one final token to be delivered, which may be the empty string.
// Returning bufio.ErrFinalToken here tells Scan there are no more tokens after this
// but does not trigger an error to be returned from Scan itself.
return 0, data, bufio.ErrFinalToken
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment