Skip to content

Instantly share code, notes, and snippets.

@cespare
Last active August 29, 2015 14:05
Show Gist options
  • Save cespare/b6d064187b5b2ec5bb61 to your computer and use it in GitHub Desktop.
Save cespare/b6d064187b5b2ec5bb61 to your computer and use it in GitHub Desktop.

Dump redis rdb files to json. Many limitations, including:

  • Only one DB allowed
  • Only plain key/vals and hashes allowed (no sets, lists, zsets for now)

3x faster than rdbtool for my test dump:

$ time rdb --command json dump.rdb > rdbtool.json 
rdb --command json  > rdbtool.json  264.65s user 0.80s system 99% cpu 4:25.93 total
$ time go run db2json.go dump.rdb > rdb2json.json
go run db2json.go  > rdb2json.json  25.37s user 62.78s system 98% cpu 1:29.59 total
package main
import (
"io"
"log"
"os"
"runtime"
"strconv"
"sync"
"github.com/cupcake/rdb"
)
// decoder is an rdb.Decoder that writes each key/val in json to w.
type decoder struct {
dbStarted bool
firstKeySeen []bool // one for each current nesting level
w io.Writer
}
func newDecoder(w io.Writer) rdb.Decoder {
return &decoder{
firstKeySeen: []bool{false},
w: w,
}
}
func (d *decoder) StartRDB() { io.WriteString(d.w, "[") }
func (d *decoder) EndRDB() { io.WriteString(d.w, "]") }
func (d *decoder) StartDatabase(n int) {
if d.dbStarted {
panic("only one db per RDB file handled")
}
io.WriteString(d.w, "{")
}
func (d *decoder) EndDatabase(n int) { io.WriteString(d.w, "}") }
func (d *decoder) Set(key, value []byte, expiry int64) {
d.writeKey(key)
d.writeValue(value)
}
func (d *decoder) StartHash(key []byte, length, expiry int64) {
d.writeKey(key)
io.WriteString(d.w, "{")
d.firstKeySeen = append(d.firstKeySeen, false)
}
func (d *decoder) EndHash(key []byte) {
io.WriteString(d.w, "}")
d.firstKeySeen = d.firstKeySeen[:len(d.firstKeySeen)-1]
}
func (d *decoder) Hset(key, field, value []byte) {
d.writeKey(field)
d.writeValue(value)
}
func (d *decoder) StartSet(key []byte, cardinality, expiry int64) { panic("sets unimplemented") }
func (d *decoder) Sadd(key, member []byte) { panic("sets unimplemented") }
func (d *decoder) EndSet(key []byte) { panic("sets unimplemented") }
func (d *decoder) StartList(key []byte, length, expiry int64) { panic("lists unimplemented") }
func (d *decoder) Rpush(key, value []byte) { panic("lists unimplemented") }
func (d *decoder) EndList(key []byte) { panic("lists unimplemented") }
func (d *decoder) StartZSet(key []byte, cardinality, expiry int64) { panic("zsets unimplemented") }
func (d *decoder) Zadd(key []byte, score float64, member []byte) { panic("zsets unimplemented") }
func (d *decoder) EndZSet(key []byte) { panic("zsets unimplemented") }
func (d *decoder) writeKey(key []byte) {
if d.firstKeySeen[len(d.firstKeySeen)-1] {
io.WriteString(d.w, `,`)
} else {
d.firstKeySeen[len(d.firstKeySeen)-1] = true
}
io.WriteString(d.w, strconv.QuoteToASCII(string(key)))
io.WriteString(d.w, `:`)
}
func (d *decoder) writeValue(value []byte) {
// Redis stores numbers as ascii :(
// Looks like rdbtool just guesses which are ints. Do that too as a quick hack.
for _, b := range value {
if b < '0' || b > '9' {
// Not an int
io.WriteString(d.w, strconv.QuoteToASCII(string(value)))
return
}
}
io.WriteString(d.w, string(value))
}
func process(filename string) error {
in, err := os.Open(filename)
if err != nil {
return err
}
defer in.Close()
out, err := os.Create(filename + ".json")
if err != nil {
return err
}
defer out.Close()
return rdb.Decode(in, newDecoder(out))
}
func main() {
if len(os.Args) < 2 {
log.Fatal("Usage: rdb2json FILE1 FILE2 ... FILEN")
}
jobs := make(chan string) // Filenames as input
wg := new(sync.WaitGroup)
n := runtime.NumCPU()
wg.Add(n)
runtime.GOMAXPROCS(n)
for i := 0; i < n; i++ {
go func() {
defer wg.Done()
for filename := range jobs {
if err := process(filename); err != nil {
log.Fatal(err)
}
}
}()
}
for _, filename := range os.Args[1:] {
jobs <- filename
}
close(jobs)
wg.Wait()
}
package main
import (
"io"
"log"
"os"
"strconv"
"github.com/cupcake/rdb"
)
// decoder is an rdb.Decoder that writes each key/val in json to w.
type decoder struct {
dbStarted bool
firstKeySeen []bool // one for each current nesting level
w io.Writer
}
func newDecoder(w io.Writer) rdb.Decoder {
return &decoder{
firstKeySeen: []bool{false},
w: w,
}
}
func (d *decoder) StartRDB() { io.WriteString(d.w, "[") }
func (d *decoder) EndRDB() { io.WriteString(d.w, "]") }
func (d *decoder) StartDatabase(n int) {
if d.dbStarted {
panic("only one db per RDB file handled")
}
io.WriteString(d.w, "{")
}
func (d *decoder) EndDatabase(n int) { io.WriteString(d.w, "}") }
func (d *decoder) Set(key, value []byte, expiry int64) {
d.writeKey(key)
d.writeValue(value)
}
func (d *decoder) StartHash(key []byte, length, expiry int64) {
d.writeKey(key)
io.WriteString(d.w, "{")
d.firstKeySeen = append(d.firstKeySeen, false)
}
func (d *decoder) EndHash(key []byte) {
io.WriteString(d.w, "}")
d.firstKeySeen = d.firstKeySeen[:len(d.firstKeySeen)-1]
}
func (d *decoder) Hset(key, field, value []byte) {
d.writeKey(field)
d.writeValue(value)
}
func (d *decoder) StartSet(key []byte, cardinality, expiry int64) { panic("sets unimplemented") }
func (d *decoder) Sadd(key, member []byte) { panic("sets unimplemented") }
func (d *decoder) EndSet(key []byte) { panic("sets unimplemented") }
func (d *decoder) StartList(key []byte, length, expiry int64) { panic("lists unimplemented") }
func (d *decoder) Rpush(key, value []byte) { panic("lists unimplemented") }
func (d *decoder) EndList(key []byte) { panic("lists unimplemented") }
func (d *decoder) StartZSet(key []byte, cardinality, expiry int64) { panic("zsets unimplemented") }
func (d *decoder) Zadd(key []byte, score float64, member []byte) { panic("zsets unimplemented") }
func (d *decoder) EndZSet(key []byte) { panic("zsets unimplemented") }
func (d *decoder) writeKey(key []byte) {
if d.firstKeySeen[len(d.firstKeySeen)-1] {
io.WriteString(d.w, `,`)
} else {
d.firstKeySeen[len(d.firstKeySeen)-1] = true
}
io.WriteString(d.w, strconv.QuoteToASCII(string(key)))
io.WriteString(d.w, `:`)
}
func (d *decoder) writeValue(value []byte) {
// Redis stores numbers as ascii :(
// Looks like rdbtool just guesses which are ints. Do that too as a quick hack.
for _, b := range value {
if b < '0' || b > '9' {
// Not an int
io.WriteString(d.w, strconv.QuoteToASCII(string(value)))
return
}
}
io.WriteString(d.w, string(value))
}
func main() {
if len(os.Args) != 2 {
log.Fatal("Usage: rdb2json FILENAME")
}
f, err := os.Open(os.Args[1])
if err != nil {
log.Fatal(err)
}
dec := newDecoder(os.Stdout)
if err := rdb.Decode(f, dec); err != nil {
log.Fatal(err)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment