Skip to content

Instantly share code, notes, and snippets.

@florinutz
Created March 29, 2019 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save florinutz/24d972665e1f9a047b9697a4fceebf4d to your computer and use it in GitHub Desktop.
Save florinutz/24d972665e1f9a047b9697a4fceebf4d to your computer and use it in GitHub Desktop.
Compressing a boltdb database using gz
package main
import (
"bytes"
"compress/gzip"
"crypto/sha256"
"encoding/gob"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"strings"
"time"
"github.com/florinutz/filme/pkg/collector"
"github.com/florinutz/filme/pkg/collector/google/search"
"github.com/pkg/errors"
bolt "go.etcd.io/bbolt"
)
var urls = []string{search.TestSearch}
const BucketName = "store"
func main() {
if len(os.Args) < 2 {
log.Fatal("output path is required as argument")
}
outputPath := strings.Join(os.Args[1:], " ")
reqs, err := collector.GenerateSimpleRequests(urls, func(req *http.Request) {
req.Header.Set("Accept-Language", "en-US;q=0.8,es;q=0.5,fr;q=0.3")
})
if err != nil {
log.Fatal(err)
}
// load db from compressed outputPath of create a new tmp file for it
db, err := loadDbFromGz(outputPath)
if err != nil {
var tmpFile *os.File
tmpFile, err = ioutil.TempFile("", "bolt-*.db")
if err != nil {
log.Fatal("cannot create temporary file")
}
defer os.Remove(tmpFile.Name())
db, err = bolt.Open(tmpFile.Name(), 0600, &bolt.Options{Timeout: 1 * time.Second})
if err != nil {
log.Fatal("cannot create temporary db")
}
}
defer db.Close()
responses, errs := collector.FetchUrls(reqs, *http.DefaultClient)
for _, err := range errs {
fmt.Fprintln(os.Stderr, err)
}
if err = db.Update(func(tx *bolt.Tx) error {
bucket, err := tx.CreateBucketIfNotExists([]byte(BucketName))
if err != nil {
return fmt.Errorf("create bucket: %s", err)
}
if bucket == nil {
return fmt.Errorf("Could not retrieve bucket '%s'\n", BucketName)
}
for _, resp := range responses {
var buf bytes.Buffer
encoder := gob.NewEncoder(&buf)
err := encoder.Encode(resp.Request)
if err != nil {
return errors.Wrapf(err, "couldn't encode a key from the request into bucket %s", BucketName)
}
b := sha256.Sum256(buf.Bytes())
if err = bucket.Put(b[:], buf.Bytes()); err != nil {
return errors.Wrapf(err, "couldn't save response into bucket '%s'", BucketName)
}
}
return nil
}); err != nil {
log.Fatal(err)
}
err = dumpDbToGz(db, outputPath)
if err != nil {
log.Fatal(err)
}
}
func loadDbFromGz(filename string) (*bolt.DB, error) {
f, err := os.OpenFile(filename, os.O_RDONLY, 0700)
if err != nil {
return nil, errors.Wrapf(err, "could not open file '%s'", filename)
}
defer f.Close()
zr, err := gzip.NewReader(f)
if err != nil {
return nil, errors.Wrapf(err, "could not instantiate gz reader from '%s'", filename)
}
var uncompressed []byte
_, err = zr.Read(uncompressed)
if err != nil {
return nil, errors.Wrapf(err, "could not read gz contents from '%s'", filename)
}
tmpFile, err := ioutil.TempFile("", "bolt-*.db")
if err != nil {
return nil, errors.Wrap(err, "cannot create temporary file")
}
defer os.Remove(tmpFile.Name())
_, err = tmpFile.Write(uncompressed)
if err != nil {
return nil, errors.Wrap(err, "can't write contents to tmp file")
}
db, err := bolt.Open(tmpFile.Name(), 0640, &bolt.Options{Timeout: 1 * time.Second})
if err != nil {
return nil, errors.Wrapf(err, "couldn't create/open bolt db at path '%s'", filename)
}
return db, nil
}
func dumpDbToGz(db *bolt.DB, filename string) error {
f, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0700)
if err != nil {
return err
}
defer f.Close()
zw := gzip.NewWriter(f)
zw.Comment = "unit test db"
defer zw.Close()
err = db.View(func(tx *bolt.Tx) error {
_, err := tx.WriteTo(zw)
return err
})
return err
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment