Skip to content

Instantly share code, notes, and snippets.

@campoy
Last active August 1, 2019 23:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save campoy/fc62ad4457fc5aa4ab1878458d4d6bad to your computer and use it in GitHub Desktop.
Save campoy/fc62ad4457fc5aa4ab1878458d4d6bad to your computer and use it in GitHub Desktop.
Estimating size for protobuf in memory
package main
import (
"flag"
"fmt"
"io/ioutil"
"log"
"unsafe"
"github.com/creachadair/misctools/sizeof"
"github.com/dgraph-io/dgo/protos/api"
"github.com/dgraph-io/dgraph/protos/pb"
"github.com/golang/protobuf/proto"
)
var verbose = flag.Bool("v", false, "verbose")
func logf(format string, args ...interface{}) {
if *verbose {
fmt.Printf(format, args...)
}
}
func main() {
flag.Parse()
ratioSum := 0.0
ratioSum2 := 0.0
fmt.Printf("% 10s, % 10s, % 10s, % 10s, % 10s, % 10s, % 10s\n",
"encoded", "in mem", "in mem 2", "ratio", "ratio2", "blocks", "postings")
for _, arg := range flag.Args() {
b, err := ioutil.ReadFile(arg)
if err != nil {
log.Fatal(err)
}
var pl pb.PostingList
if err := proto.Unmarshal(b, &pl); err != nil {
log.Fatal(err)
}
encoded := len(b)
inMem := sizePostingList(&pl)
inMem2 := sizeof.DeepSize(pl)
ratio := float64(inMem) / float64(encoded)
ratioSum += ratio
ratio2 := float64(inMem2) / float64(encoded)
ratioSum2 += ratio2
blocks := len(pl.Pack.Blocks)
postings := len(pl.Postings)
fmt.Printf("% 10d, % 10d, % 10d, % 10.2f, % 10.2f, % 10d, % 10d\n",
encoded, inMem, inMem2, ratio, ratio2, blocks, postings)
}
fmt.Printf("avg ratio: %.2f\n", ratioSum/float64(flag.NArg()))
fmt.Printf("avg ratio 2: %.2f\n", ratioSum2/float64(flag.NArg()))
}
func sizePostingList(pl *pb.PostingList) uintptr {
logf("PostingList: %s", proto.MarshalTextString(pl))
if pl == nil {
return 0
}
s := unsafe.Sizeof(pl)
s += sizePack(pl.Pack)
for _, p := range pl.Postings {
s += sizePosting(p)
}
s += uintptr(len(pl.XXX_unrecognized))
return s
}
func sizePack(p *pb.UidPack) uintptr {
logf("UidPack: %s", proto.MarshalTextString(p))
if p == nil {
return 0
}
s := unsafe.Sizeof(*p)
for _, b := range p.Blocks {
s += unsafe.Sizeof(*b)
s += uintptr(len(b.Deltas))
s += uintptr(len(b.XXX_unrecognized))
}
s += uintptr(len(p.XXX_unrecognized))
return s
}
func sizePosting(p *pb.Posting) uintptr {
logf("Posting: %s", proto.MarshalTextString(p))
if p == nil {
return 0
}
s := unsafe.Sizeof(*p)
s += uintptr(len(p.Value))
s += uintptr(len(p.LangTag))
s += uintptr(len(p.Label))
s += uintptr(len(p.XXX_unrecognized))
for _, f := range p.Facets {
s += sizeFacet(f)
}
return s
}
func sizeFacet(f *api.Facet) uintptr {
logf("Facet: %s", proto.MarshalTextString(f))
if f == nil {
return 0
}
s := unsafe.Sizeof(f)
s += uintptr(len(f.Key))
s += uintptr(len(f.Value))
for _, t := range f.Tokens {
s += uintptr(len(t))
}
s += uintptr(len(f.Alias))
s += uintptr(len(f.XXX_unrecognized))
return s
}
encoded, in mem, in mem 2, ratio, ratio2, blocks, postings
4394, 12186, 40890, 2.77, 9.31, 41, 41
41824, 47604, 108308, 1.14, 2.59, 107, 0
44259, 50470, 111382, 1.14, 2.52, 115, 0
4481, 5031, 13759, 1.12, 3.07, 9, 0
4459, 4955, 12787, 1.11, 2.87, 8, 0
4692, 5240, 13560, 1.12, 2.89, 9, 0
5086, 5690, 14954, 1.12, 2.94, 10, 0
6026, 6681, 20033, 1.11, 3.32, 11, 0
7344, 8160, 25856, 1.11, 3.52, 14, 0
5397, 5999, 16295, 1.11, 3.02, 10, 0
22497, 25236, 53972, 1.12, 2.40, 50, 0
13902, 15466, 31834, 1.11, 2.29, 28, 0
9692, 10775, 28655, 1.11, 2.96, 19, 0
6939, 7701, 19293, 1.11, 2.78, 13, 0
5864, 6519, 19919, 1.11, 3.40, 11, 0
4502, 5052, 13660, 1.12, 3.03, 9, 0
6731, 7493, 22157, 1.11, 3.29, 13, 0
6931, 7693, 24085, 1.11, 3.47, 13, 0
6068, 6780, 19132, 1.12, 3.15, 12, 0
4864, 5412, 13444, 1.11, 2.76, 9, 0
6944, 7706, 24554, 1.11, 3.54, 13, 0
5177, 5778, 14850, 1.12, 2.87, 10, 0
4554, 5105, 12825, 1.12, 2.82, 9, 0
22256, 25003, 52419, 1.12, 2.36, 50, 0
7282, 8097, 23465, 1.11, 3.22, 14, 0
4624, 5172, 13204, 1.12, 2.86, 9, 0
8122, 9046, 28966, 1.11, 3.57, 16, 0
5242, 5844, 15636, 1.11, 2.98, 10, 0
5879, 6534, 18038, 1.11, 3.07, 11, 0
4369, 4864, 11808, 1.11, 2.70, 8, 0
4545, 5095, 13439, 1.12, 2.96, 9, 0
15556, 17335, 35055, 1.11, 2.25, 32, 0
5435, 6037, 17725, 1.11, 3.26, 10, 0
4610, 5158, 13142, 1.12, 2.85, 9, 0
7440, 8257, 25929, 1.11, 3.49, 14, 0
5757, 6413, 18517, 1.11, 3.22, 11, 0
7955, 8824, 25400, 1.11, 3.19, 15, 0
4573, 5123, 13179, 1.12, 2.88, 9, 0
7159, 7975, 22815, 1.11, 3.19, 14, 0
7242, 8057, 25537, 1.11, 3.53, 14, 0
10200, 11336, 26728, 1.11, 2.62, 20, 0
7426, 8241, 26297, 1.11, 3.54, 14, 0
6297, 7006, 20078, 1.11, 3.19, 12, 0
6236, 6944, 19200, 1.11, 3.08, 12, 0
5493, 6095, 16391, 1.11, 2.98, 10, 0
8306, 9228, 30540, 1.11, 3.68, 16, 0
4787, 5335, 13679, 1.11, 2.86, 9, 0
9660, 10742, 27494, 1.11, 2.85, 19, 0
4493, 5043, 12907, 1.12, 2.87, 9, 0
5917, 6572, 17932, 1.11, 3.03, 11, 0
5192, 5793, 15273, 1.12, 2.94, 10, 0
5620, 6275, 17851, 1.12, 3.18, 11, 0
5975, 6686, 15918, 1.12, 2.66, 12, 0
6430, 7139, 21243, 1.11, 3.30, 12, 0
13439, 15003, 30195, 1.12, 2.25, 28, 0
9739, 10821, 28365, 1.11, 2.91, 19, 0
4973, 5579, 12899, 1.12, 2.59, 10, 0
4919, 5467, 13715, 1.11, 2.79, 9, 0
21960, 24593, 52569, 1.12, 2.39, 48, 0
4671, 5219, 13659, 1.12, 2.92, 9, 0
39749, 45287, 101343, 1.14, 2.55, 102, 0
9259, 10349, 25397, 1.12, 2.74, 19, 0
10364, 11616, 29184, 1.12, 2.82, 22, 0
5756, 6470, 15318, 1.12, 2.66, 12, 0
6514, 7222, 21638, 1.11, 3.32, 12, 0
13635, 15205, 37837, 1.12, 2.77, 28, 0
7703, 8632, 20824, 1.12, 2.70, 16, 0
7094, 7915, 18435, 1.12, 2.60, 14, 0
7434, 8249, 24577, 1.11, 3.31, 14, 0
6606, 7315, 21035, 1.11, 3.18, 12, 0
16753, 18688, 40704, 1.12, 2.43, 35, 0
6858, 7619, 20411, 1.11, 2.98, 13, 0
8658, 9639, 25151, 1.11, 2.90, 17, 0
9095, 10129, 25625, 1.11, 2.82, 18, 0
10327, 11523, 28027, 1.12, 2.71, 21, 0
4538, 5088, 12832, 1.12, 2.83, 9, 0
5991, 6703, 16295, 1.12, 2.72, 12, 0
7319, 8134, 25302, 1.11, 3.46, 14, 0
11422, 12719, 31303, 1.11, 2.74, 23, 0
49624, 56748, 142444, 1.14, 2.87, 132, 0
49459, 56528, 138064, 1.14, 2.79, 131, 0
7915, 8782, 22526, 1.11, 2.85, 15, 0
9580, 10667, 27683, 1.11, 2.89, 19, 0
9444, 10473, 30769, 1.11, 3.26, 18, 0
6228, 6936, 20536, 1.11, 3.30, 12, 0
6346, 7057, 16985, 1.11, 2.68, 12, 0
4630, 5180, 12636, 1.12, 2.73, 9, 0
10326, 11463, 26879, 1.11, 2.60, 20, 0
19989, 22353, 49593, 1.12, 2.48, 43, 0
8523, 9500, 26428, 1.11, 3.10, 17, 0
6903, 7664, 20144, 1.11, 2.92, 13, 0
8076, 9000, 26952, 1.11, 3.34, 16, 0
5378, 5980, 15964, 1.11, 2.97, 10, 0
5706, 6361, 18177, 1.11, 3.19, 11, 0
12879, 14453, 36317, 1.12, 2.82, 28, 0
11961, 13371, 32499, 1.12, 2.72, 25, 0
32842, 37195, 78243, 1.13, 2.38, 80, 0
5459, 6060, 17196, 1.11, 3.15, 10, 0
20961, 23489, 49865, 1.12, 2.38, 46, 0
4970, 5576, 14504, 1.12, 2.92, 10, 0
avg ratio: 1.13
avg ratio 2: 3.00
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment