Skip to content

Instantly share code, notes, and snippets.

@vmlinz
Forked from jszmajda/1readme.md
Last active August 29, 2015 14:26
Show Gist options
  • Save vmlinz/1d55e810b3ceb55ded90 to your computer and use it in GitHub Desktop.
Save vmlinz/1d55e810b3ceb55ded90 to your computer and use it in GitHub Desktop.
Data Serialization: JSON, MsgPack, ProtoBufs

Data Serialization Formats

JSON, MessagePack, and Google's Protocol Buffers are all awesome. Here's how they're awesome on different client environments and how to use them on your rails env.

Some links:

Output from Ruby Client:

Running at 10000 times
               user     system      total        real
msgpack    0.230000   0.010000   0.240000 (  0.242138)
json       0.590000   0.030000   0.620000 (  0.630611)
protobuf   3.170000   0.040000   3.210000 (  3.256336)
 msgpack: 5.20 mb
    json: 9.78 mb
protobuf: 2.59 mb
 msgpack: 694k allocs
    json: 1240k allocs
protobuf: 2608k allocs

Output from Python Client:


Benchmark Report
================

Benchmark Serialization
-----------------------

    name | rank | runs |   mean |       sd | timesBaseline
---------|------|------|--------|----------|--------------
    json |    1 |    5 | 0.3613 | 0.007957 |           1.0
protobuf |    2 |    5 |  1.498 |  0.05966 | 4.14576325546
 msgpack |    3 |    5 |  2.687 |   0.0336 | 7.43663921844

Each of the above 15 runs were run in random, non-consecutive order by
`benchmark` v0.1.5 (http://jspi.es/benchmark) with Python 2.7.5
Darwin-13.1.0-x86_64 on 2014-04-25 18:40:40.

Output from Go Client:

BenchmarkJSONOb   50000     31776 ns/op    1555 B/op      83 allocs/op
BenchmarkJSON     50000     40147 ns/op    3506 B/op     196 allocs/op
BenchmarkPB      500000      5338 ns/op    1413 B/op      48 allocs/op
BenchmarkMPk     100000     22812 ns/op    2518 B/op     155 allocs/op
package main
import (
"testing"
"net/http"
"io/ioutil"
"github.com/vmihailenco/msgpack"
"encoding/json"
"code.google.com/p/goprotobuf/proto"
"userland"
)
func BenchmarkJSONOb(b *testing.B) {
tr := &http.Transport{}
client := &http.Client{Transport: tr}
var users []userland.User
resp, _ := client.Get("http://localhost:8080/users.json")
data, _ := ioutil.ReadAll(resp.Body)
for i := 0; i < b.N; i++ {
json.Unmarshal(data, &users);
if(*users[0].Login != "someguy"){ panic("boom") }
}
}
func BenchmarkJSON(b *testing.B) {
tr := &http.Transport{}
client := &http.Client{Transport: tr}
var users []map[string]interface{}
resp, _ := client.Get("http://localhost:8080/users.json")
data, _ := ioutil.ReadAll(resp.Body)
for i := 0; i < b.N; i++ {
json.Unmarshal(data, &users);
if(users[0]["login"] != "someguy"){ panic("boom") }
}
}
func BenchmarkPB(b *testing.B) {
tr := &http.Transport{}
client := &http.Client{Transport: tr}
users := &userland.Users{}
resp, _ := client.Get("http://localhost:8080/users.protobuf")
data, _ := ioutil.ReadAll(resp.Body)
for i := 0; i < b.N; i++ {
proto.Unmarshal(data, users);
if(*users.User[0].Login != "someguy"){ panic("boom") }
}
}
func BenchmarkMPk(b *testing.B) {
tr := &http.Transport{}
client := &http.Client{Transport: tr}
var users []map[string]interface{}
resp, _ := client.Get("http://localhost:8080/users.msgpack")
data, _ := ioutil.ReadAll(resp.Body)
for i := 0; i < b.N; i++ {
msgpack.Unmarshal(data, &users);
if(users[0]["login"] != "someguy"){ panic("boom") }
}
}
#config/initializers/mime_types.rb
Mime::Type.register "bin/msgpack", :msgpack
Mime::Type.register "bin/protobuf", :protobuf
import benchmark
import httplib
import json
import msgpack
import userland_pb2
class Benchmark_Serialization(benchmark.Benchmark):
each = 5
def setUp(self):
self.con = httplib.HTTPConnection("localhost:8080")
self.size = 10000
def test_json(self):
self.con.request("GET", "/users.json", headers={"Connection":" keep-alive"})
result = self.con.getresponse()
msg = result.read()
for i in xrange(self.size):
data = json.loads(msg)
if data[0]['login'] != "someguy":
raise Exception('boom')
def test_msgpack(self):
self.con.request("GET", "/users.msgpack", headers={"Connection":" keep-alive"})
result = self.con.getresponse()
msg = result.read()
for i in xrange(self.size):
data = msgpack.unpackb(msg)
if data[0]['login'] != "someguy":
raise Exception('boom')
def test_protobuf(self):
users_message = userland_pb2.Users()
self.con.request("GET", "/users.protobuf", headers={"Connection":" keep-alive"})
result = self.con.getresponse()
msg = result.read()
for i in xrange(self.size):
users_message.ParseFromString(msg)
if users_message.user[0].login != "someguy":
raise Exception('boom')
def tearDown(self):
self.con.close()
if __name__ == '__main__':
benchmark.main(format="markdown", numberFormat="%.4g")
# could have written benchmark.main(each=50) if the
# first class shouldn't have been run 100 times.
require 'benchmark'
require 'json'
require 'msgpack'
require 'net/http'
require_relative './userland.pb'
def get(target)
uri = URI(target)
Net::HTTP.get(uri)
end
def test(runs, type, &block)
GC.start
uri = "#{@uri_s}.#{type}"
v = 0
a = 0
cas = GC.stat
res = get(uri)
runs.times do
v += res.bytes.count
block.call(res)
end
cae = GC.stat
a += cae[:total_allocated_object] - cas[:total_allocated_object]
@vols[type] = v
@objs[type] = a
end
@vols = {}
@objs = {}
@uri_s = "http://localhost:8080/users"
runs = 10000
puts "Running at #{runs} times"
Benchmark.bm(8) do |x|
x.report('msgpack') do
test(runs, :msgpack) do |res|
data = MessagePack.unpack(res)
raise unless data[0]['login'] == 'someguy'
end
end
x.report('json') do
test(runs, :json) do |res|
data = JSON.parse(res)
raise unless data[0]['login'] == 'someguy'
end
end
x.report('protobuf') do
@data = Userland::Users.new
test(runs, :protobuf) do |res|
@data.parse_from_string(res)
raise unless @data.user.first.login == 'someguy'
end
end
end
@vols.each_pair do |k, v|
puts "#{k.to_s.rjust(8, ' ')}: #{sprintf("%0.2f", v/1024.0/1024.0)} mb"
end
@objs.each_pair do |k, v|
puts "#{k.to_s.rjust(8, ' ')}: #{v/1024}k allocs"
end
# app/models/user.rb, Server-side user model
# == Schema Information
# Schema version: 20131207163108
#
# Table name: users
#
# id :integer not null, primary key
# login :string(255)
# password :string(255)
# created_at :datetime
# updated_at :datetime
#
require_relative '../../userland.pb'
class User < ActiveRecord::Base
def self.login(login, password)
User.where(login: login, password: password).first
end
def to_pb
pb = Userland::User.new
attributes.each do |k,v|
if k =~ /_at/
pb.send("#{k}=", v.to_i)
else
pb.send("#{k}=",v)
end
end
pb
end
end
// Code generated by protoc-gen-go.
// source: userland.proto
// DO NOT EDIT!
/*
Package userland is a generated protocol buffer package.
It is generated from these files:
userland.proto
It has these top-level messages:
Users
User
*/
package userland
import proto "code.google.com/p/goprotobuf/proto"
import math "math"
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = math.Inf
type Users struct {
User []*User `protobuf:"bytes,1,rep,name=user" json:"user,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *Users) Reset() { *m = Users{} }
func (m *Users) String() string { return proto.CompactTextString(m) }
func (*Users) ProtoMessage() {}
func (m *Users) GetUser() []*User {
if m != nil {
return m.User
}
return nil
}
type User struct {
Id *int32 `protobuf:"varint,1,req,name=id" json:"id,omitempty"`
Login *string `protobuf:"bytes,2,req,name=login" json:"login,omitempty"`
Password *string `protobuf:"bytes,3,opt,name=password" json:"password,omitempty"`
CreatedAt *int32 `protobuf:"varint,4,opt,name=created_at" json:"created_at,omitempty"`
UpdatedAt *int32 `protobuf:"varint,5,opt,name=updated_at" json:"updated_at,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *User) Reset() { *m = User{} }
func (m *User) String() string { return proto.CompactTextString(m) }
func (*User) ProtoMessage() {}
func (m *User) GetId() int32 {
if m != nil && m.Id != nil {
return *m.Id
}
return 0
}
func (m *User) GetLogin() string {
if m != nil && m.Login != nil {
return *m.Login
}
return ""
}
func (m *User) GetPassword() string {
if m != nil && m.Password != nil {
return *m.Password
}
return ""
}
func (m *User) GetCreatedAt() int32 {
if m != nil && m.CreatedAt != nil {
return *m.CreatedAt
}
return 0
}
func (m *User) GetUpdatedAt() int32 {
if m != nil && m.UpdatedAt != nil {
return *m.UpdatedAt
}
return 0
}
func init() {
}
### Generated by rprotoc. DO NOT EDIT!
### <proto file: userland.proto>
# package userland;
# message Users {
# repeated User user = 1;
# }
#
# message User {
# required int32 id = 1;
# required string login = 2;
# optional string password = 3;
# optional int32 created_at = 4;
# optional int32 updated_at = 5;
# }
#
require 'protobuf/message/message'
require 'protobuf/message/enum'
require 'protobuf/message/service'
require 'protobuf/message/extend'
module Userland
class Users < ::Protobuf::Message
defined_in __FILE__
repeated :User, :user, 1
end
class User < ::Protobuf::Message
defined_in __FILE__
required :int32, :id, 1
required :string, :login, 2
optional :string, :password, 3
optional :int32, :created_at, 4
optional :int32, :updated_at, 5
end
end
package userland;
message Users {
repeated User user = 1;
}
message User {
required int32 id = 1;
required string login = 2;
optional string password = 3;
optional int32 created_at = 4;
optional int32 updated_at = 5;
}
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: userland.proto
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import descriptor_pb2
# @@protoc_insertion_point(imports)
DESCRIPTOR = _descriptor.FileDescriptor(
name='userland.proto',
package='userland',
serialized_pb='\n\x0euserland.proto\x12\x08userland\"%\n\x05Users\x12\x1c\n\x04user\x18\x01 \x03(\x0b\x32\x0e.userland.User\"[\n\x04User\x12\n\n\x02id\x18\x01 \x02(\x05\x12\r\n\x05login\x18\x02 \x02(\t\x12\x10\n\x08password\x18\x03 \x01(\t\x12\x12\n\ncreated_at\x18\x04 \x01(\x05\x12\x12\n\nupdated_at\x18\x05 \x01(\x05')
_USERS = _descriptor.Descriptor(
name='Users',
full_name='userland.Users',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='user', full_name='userland.Users.user', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
extension_ranges=[],
serialized_start=28,
serialized_end=65,
)
_USER = _descriptor.Descriptor(
name='User',
full_name='userland.User',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='id', full_name='userland.User.id', index=0,
number=1, type=5, cpp_type=1, label=2,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='login', full_name='userland.User.login', index=1,
number=2, type=9, cpp_type=9, label=2,
has_default_value=False, default_value=unicode("", "utf-8"),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='password', full_name='userland.User.password', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=unicode("", "utf-8"),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='created_at', full_name='userland.User.created_at', index=3,
number=4, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='updated_at', full_name='userland.User.updated_at', index=4,
number=5, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
extension_ranges=[],
serialized_start=67,
serialized_end=158,
)
_USERS.fields_by_name['user'].message_type = _USER
DESCRIPTOR.message_types_by_name['Users'] = _USERS
DESCRIPTOR.message_types_by_name['User'] = _USER
class Users(_message.Message):
__metaclass__ = _reflection.GeneratedProtocolMessageType
DESCRIPTOR = _USERS
# @@protoc_insertion_point(class_scope:userland.Users)
class User(_message.Message):
__metaclass__ = _reflection.GeneratedProtocolMessageType
DESCRIPTOR = _USER
# @@protoc_insertion_point(class_scope:userland.User)
# @@protoc_insertion_point(module_scope)
# why did I have to monkeypatch this? :P
class Time
def to_msgpack(*args)
self.to_i.to_msgpack(*args)
end
end
class UsersController < ApplicationController
respond_to :json, :msgpack, :protobuf, :marshall
def index
respond_to do |fmt|
fmt.json do
render json: users
end
fmt.msgpack do
send_data MessagePack.pack(users.as_json)
end
fmt.protobuf do
d = Userland::Users.new
d.user = users.map(&:to_pb)
send_data d.serialize_to_string
end
end
end
private
def users
[User.all, User.all, User.all, User.all].flatten
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment