Skip to content

Instantly share code, notes, and snippets.

@harshavardhana
Created August 11, 2019 23:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save harshavardhana/2ffc990bf3d3ee7ee935f5b146413a72 to your computer and use it in GitHub Desktop.
Save harshavardhana/2ffc990bf3d3ee7ee935f5b146413a72 to your computer and use it in GitHub Desktop.
go test -bench .
/*
* Minio Cloud Storage, (C) 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"encoding/json"
"errors"
"fmt"
"os"
"strconv"
"testing"
"time"
jsoniter "github.com/json-iterator/go"
"github.com/valyala/fastjson"
)
/* Benchamarks to compare the performance of parsing of XLMeta for growing number of parts.
Json.UnMarshal and Gjson ("github.com/tidwall/gjson") functions are the compared for the speed of UnMarshalling.
For this purpose Sample xl.sjon data is created for parts sizes 10,100,1000,2000,5000,10000 part and then
its parsed using the current implemntation of json.Unmarshal and the new approach Gjson library which is under scrutiny
in the current benchmarks
*/
type objectPartInfo struct {
Number int `json:"number"`
Name string `json:"name"`
ETag string `json:"etag"`
Size int64 `json:"size"`
}
type checkSumInfo struct {
Name string `json:"name"`
Algorithm string `json:"algorithm"`
Hash string `json:"hash"`
}
// Constant indicates current bit-rot algo used when creating objects.
const (
bitRotAlgo = "blake2b"
)
// statInfo - carries stat information of the object.
type statInfo struct {
Size int64 `json:"size"` // Size of the object `xl.json`.
ModTime time.Time `json:"modTime"` // ModTime of the object `xl.json`.
}
type erasureInfo struct {
Algorithm string `json:"algorithm"`
DataBlocks int `json:"data"`
ParityBlocks int `json:"parity"`
BlockSize int64 `json:"blockSize"`
Index int `json:"index"`
Distribution []int `json:"distribution"`
Checksum []checkSumInfo `json:"checksum,omitempty"`
}
// A xlMetaV1 represents `xl.json` metadata header.
type xlMetaV1 struct {
Version string `json:"version"` // Version of the current `xl.json`.
Format string `json:"format"` // Format of the current `xl.json`.
Stat statInfo `json:"stat"` // Stat of the current object `xl.json`.
// Erasure coded info for the current object `xl.json`.
Erasure erasureInfo `json:"erasure"`
// Minio release tag for current object `xl.json`.
Minio struct {
Release string `json:"release"`
} `json:"minio"`
// Metadata map for current object `xl.json`.
Meta map[string]string `json:"meta,omitempty"`
// Captures all the individual object `xl.json`.
Parts []objectPartInfo `json:"parts,omitempty"`
}
// newXLMetaV1 - initializes new xlMetaV1, adds version, allocates a fresh erasure info.
func newXLMetaV1() xlMetaV1 {
xlMeta := xlMetaV1{}
xlMeta.Version = "1.0.0"
xlMeta.Format = "xl"
xlMeta.Minio.Release = "1.0.0"
xlMeta.Erasure = erasureInfo{
Algorithm: "klauspost/reedsolomon/vandermonde",
DataBlocks: 5,
ParityBlocks: 5,
BlockSize: 10485760,
Distribution: []int{9, 10, 1, 2, 3, 4, 5, 6, 7, 8},
}
xlMeta.Stat = statInfo{
Size: int64(20),
ModTime: time.Now().UTC(),
}
// Set meta data.
xlMeta.Meta = make(map[string]string)
xlMeta.Meta["testKey1"] = "val1"
xlMeta.Meta["testKey2"] = "val2"
return xlMeta
}
func (m *xlMetaV1) AddObjectCheckSum(checkSumNum int, name string, hash string, algo string) {
checkSum := checkSumInfo{
Name: name,
Algorithm: algo,
Hash: hash,
}
m.Erasure.Checksum[checkSumNum] = checkSum
}
// AddObjectPart - add a new object part in order.
func (m *xlMetaV1) AddObjectPart(partNumber int, partName string, partETag string, partSize int64) {
partInfo := objectPartInfo{
Number: partNumber,
Name: partName,
ETag: partETag,
Size: partSize,
}
// Proceed to include new part info.
m.Parts[partNumber] = partInfo
}
func getXLMetaBytes(totalParts int) []byte {
xlSampleMeta := getSampleXLMeta(totalParts)
xlMetaBytes, err := json.Marshal(xlSampleMeta)
if err != nil {
panic(err)
}
return xlMetaBytes
}
func getSampleXLMeta(totalParts int) xlMetaV1 {
xlMeta := newXLMetaV1()
// Number of checksum info == total parts.
xlMeta.Erasure.Checksum = make([]checkSumInfo, totalParts)
// total number of parts.
xlMeta.Parts = make([]objectPartInfo, totalParts)
for i := 0; i < totalParts; i++ {
partName := "part." + strconv.Itoa(i+1)
// hard coding hash and algo value for the checksum, Since we are benchmarking the parsing of xl.json the magnitude doesn't affect the test,
// The magnitude doesn't make a difference, only the size does.
xlMeta.AddObjectCheckSum(i, partName, "a23f5eff248c4372badd9f3b2455a285cd4ca86c3d9a570b091d3fc5cd7ca6d9484bbea3f8c5d8d4f84daae96874419eda578fd736455334afbac2c924b3915a", "blake2b")
xlMeta.AddObjectPart(i, partName, "d3fdd79cc3efd5fe5c068d7be397934b", 67108864)
}
return xlMeta
}
func parseXLStat(v *fastjson.Value) (si statInfo, err error) {
// obtain stat info.
stat := statInfo{}
st := v.GetObject("stat")
modTimeB := st.Get("modTime").GetStringBytes()
if modTimeB == nil {
return si, errors.New("corrupted")
}
// fetching modTime.
stat.ModTime, err = time.Parse(time.RFC3339, string(modTimeB))
if err != nil {
return si, err
}
// obtain Stat.Size .
stat.Size = st.Get("size").GetInt64()
return stat, nil
}
func parseXLVersion(v *fastjson.Value) string {
return string(v.GetStringBytes("version"))
}
func parseXLFormat(v *fastjson.Value) string {
return string(v.GetStringBytes("format"))
}
func parseXLRelease(v *fastjson.Value) string {
return string(v.GetStringBytes("minio", "release"))
}
func parseXLErasureInfo(v *fastjson.Value) (erasureInfo, error) {
erasure := erasureInfo{}
// parse the xlV1Meta.Erasure.Distribution.
es := v.GetObject("erasure")
disResult := es.Get("distribution").GetArray()
var err error
distribution := make([]int, len(disResult))
for i, dis := range disResult {
distribution[i], err = dis.Int()
if err != nil {
return erasure, err
}
}
erasure.Distribution = distribution
erasure.Algorithm = string(es.Get("algorithm").GetStringBytes())
erasure.DataBlocks = es.Get("data").GetInt()
erasure.ParityBlocks = es.Get("parity").GetInt()
erasure.BlockSize = es.Get("blockSize").GetInt64()
erasure.Index = es.Get("index").GetInt()
checkSumsResult := es.Get("checksum").GetArray()
// Parse xlMetaV1.Erasure.Checksum array.
checkSums := make([]checkSumInfo, len(checkSumsResult))
for i, ck := range checkSumsResult {
algBytes := ck.GetStringBytes("algorithm")
if algBytes == nil {
return erasure, errors.New("bitrot error")
}
nameBytes := ck.GetStringBytes("name")
if nameBytes == nil {
return erasure, errors.New("corrupted")
}
checkSums[i] = checkSumInfo{
Name: string(nameBytes),
Algorithm: string(algBytes),
Hash: string(ck.GetStringBytes("hash")),
}
}
erasure.Checksum = checkSums
return erasure, nil
}
func parseXLParts(partsResult []*fastjson.Value) []objectPartInfo {
partInfo := make([]objectPartInfo, len(partsResult))
for i, p := range partsResult {
partInfo[i] = objectPartInfo{
Number: p.GetInt("number"),
Name: string(p.GetStringBytes("name")),
ETag: string(p.GetStringBytes("etag")),
Size: p.GetInt64("size"),
}
}
return partInfo
}
func parseXLMetaMap(v *fastjson.Value) map[string]string {
metaMap := make(map[string]string)
// Get xlMetaV1.Meta map.
v.GetObject("meta").Visit(func(k []byte, kv *fastjson.Value) {
metaMap[string(k)] = string(kv.GetStringBytes())
})
return metaMap
}
// xl.json Parser pool
var xlParserPool fastjson.ParserPool
// Constructs XLMetaV1 using `fastjson` lib to retrieve each field.
func fastJSONMetaV1UnmarshalJSON(xlMetaBuf []byte) (xlMeta xlMetaV1, err error) {
parser := xlParserPool.Get()
defer xlParserPool.Put(parser)
var v *fastjson.Value
v, err = parser.ParseBytes(xlMetaBuf)
if err != nil {
return xlMeta, err
}
// obtain version.
xlMeta.Version = parseXLVersion(v)
// obtain format.
xlMeta.Format = parseXLFormat(v)
// Parse xlMetaV1.Stat .
stat, err := parseXLStat(v)
if err != nil {
return xlMeta, err
}
xlMeta.Stat = stat
// parse the xlV1Meta.Erasure fields.
xlMeta.Erasure, err = parseXLErasureInfo(v)
if err != nil {
return xlMeta, err
}
// Check for scenario where checksum information missing for some parts.
partsResult := v.GetArray("parts")
if len(xlMeta.Erasure.Checksum) != len(partsResult) {
return xlMeta, errors.New("corrupted")
}
// Parse the XL Parts.
xlMeta.Parts = parseXLParts(partsResult)
// Get the xlMetaV1.Realse field.
xlMeta.Minio.Release = parseXLRelease(v)
// parse xlMetaV1.
xlMeta.Meta = parseXLMetaMap(v)
return xlMeta, nil
}
func benchmarkParseUnmarshalN(b *testing.B, totalParts int) {
// obtain XLMetaV1 for part size `totalParts`.
xlMetaBuf := getXLMetaBytes(totalParts)
b.SetBytes(int64(len(xlMetaBuf)))
b.ReportAllocs()
parser := os.Getenv("PARSER")
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
switch parser {
case "fastjson":
fastJSONMetaV1UnmarshalJSON(xlMetaBuf)
case "jsoniter":
var unMarshalXLMeta xlMetaV1
jsoniter.Unmarshal(xlMetaBuf, &unMarshalXLMeta)
}
}
})
}
func BenchmarkParseUnmarshal(b *testing.B) {
for _, n := range []int{
10,
100,
500,
1000,
5000,
10000,
} {
b.Run(fmt.Sprintf("N%d", n), func(b *testing.B) {
benchmarkParseUnmarshalN(b, n)
})
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment