Skip to content

Instantly share code, notes, and snippets.

@kogupta
Last active July 19, 2018 08:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kogupta/a31bd68f744a667643990d40409bacbb to your computer and use it in GitHub Desktop.
Save kogupta/a31bd68f744a667643990d40409bacbb to your computer and use it in GitHub Desktop.
Exploration in binary json formats
import com.codahale.metrics.Histogram;
import com.codahale.metrics.SlidingTimeWindowArrayReservoir;
import com.codahale.metrics.Snapshot;
import com.fasterxml.jackson.annotation.JsonFormat;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.databind.deser.std.FromStringDeserializer;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.fasterxml.jackson.dataformat.cbor.CBORFactory;
import com.fasterxml.jackson.dataformat.smile.SmileFactory;
import com.fasterxml.jackson.module.afterburner.AfterburnerModule;
import com.google.common.reflect.TypeToken;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.net.Inet4Address;
import java.net.InetAddress;
import java.util.Arrays;
import java.util.function.Function;
public final class BinaryJsonHelper {
private static final ObjectMapper objectMapper = new ObjectMapper();
private static final ObjectMapper cborMapper = new ObjectMapper(new CBORFactory());
private static final ObjectMapper smileMapper = new ObjectMapper(new SmileFactory());
private static final ObjectMapper objectMapper2 = new ObjectMapper();
private static final ObjectMapper cborMapper2 = new ObjectMapper(new CBORFactory());
private static final ObjectMapper smileMapper2 = new ObjectMapper(new SmileFactory());
private static final ObjectMapper[] mappers = {
objectMapper, cborMapper, smileMapper,
objectMapper2, cborMapper2, smileMapper2
};
private static final String[] labels = {
"utf8 vanilla json", "cbor", "smile",
"utf8 column", "cbor column", "smile col"
};
private static final ObjectMapper[] columnarMappers = {objectMapper2, cborMapper2, smileMapper2};
static {
assert mappers.length == labels.length : "same number of mappers and labels";
for (ObjectMapper mapper : mappers) {
mapper.registerModule(new AfterburnerModule());
mapper.enable(SerializationFeature.WRITE_ENUMS_USING_INDEX);
// SimpleModule module = ....
// mapper.registerModule(module);
}
Arrays.setAll(sizeHistograms, i -> newHistogram());
Arrays.setAll(serTimeHistograms, i -> newHistogram());
Arrays.setAll(deserTimeHistograms, i -> newHistogram());
}
private static Histogram newHistogram() {
return new Histogram(new SlidingTimeWindowArrayReservoir(10, MINUTES));
}
public static <T> void enableColumnarJson(Class<T> clazz) {
for (ObjectMapper mapper : columnarMappers) {
mapper.configOverride(clazz)
.setFormat(JsonFormat.Value.forShape(JsonFormat.Shape.ARRAY));
}
}
public static <T> void pojo2Bytes(T pojo) throws IOException {
TypeToken<?> token = TypeToken.of(pojo.getClass());
for (int i = 0; i < mappers.length; i++) {
ObjectMapper _mapper = mappers[i];
long t0 = System.nanoTime();
byte[] _bytes = _mapper.writeValueAsBytes(pojo);
serTimeHistograms[i].update(NANOSECONDS.toMicros(System.nanoTime() - t0));
sizeHistograms[i].update(_bytes.length);
Class<T> clazz = (Class<T>) token.getRawType();
long start = System.nanoTime();
T _pojo = _mapper.readValue(_bytes, clazz);
deserTimeHistograms[i].update(NANOSECONDS.toMicros(System.nanoTime() - start));
byte[] _bytes2 = _mapper.writeValueAsBytes(_pojo);
assert Arrays.equals(_bytes, _bytes2) : "byte arrays are not equal";
// correctness(_mapper, _bytes, token.getRawType());
}
}
private static <T> void correctness(ObjectMapper _mapper, byte[] _bytes, Class<T> clazz) {
try {
T _pojo2 = _mapper.readValue(_bytes, clazz);
byte[] _bytes2 = _mapper.writeValueAsBytes(_pojo2);
if (!Arrays.equals(_bytes, _bytes2)) {
throw new AssertionError("byte arrays are unequal!");
}
} catch (IOException e) {
e.printStackTrace();
throw new UncheckedIOException(e);
}
}
public static <T> void pojo2Bytes(T pojo, Function<T, byte[]> serializer) throws IOException {
pojo2Bytes(pojo);
long t0 = System.nanoTime();
byte[] handRolled = serializer.apply(pojo);
otherTime.update(NANOSECONDS.toMicros(System.nanoTime() - t0));
otherSize.update(handRolled.length);
}
public static void displayStats() {
for (int i = 0; i < labels.length; i++) {
String s = labels[i];
printMinimalHistogram(s, sizeHistograms[i], serTimeHistograms[i], deserTimeHistograms[i]);
}
// others
System.out.println("---- " + "other" + " ----");
_print("size", otherSize);
_print("time", otherTime);
}
public static void reset() {
Arrays.setAll(sizeHistograms, i -> newHistogram());
Arrays.setAll(serTimeHistograms, i -> newHistogram());
Arrays.setAll(deserTimeHistograms, i -> newHistogram());
if (otherSize.getCount() != 0) otherSize = newHistogram();
if (otherTime.getCount() != 0) otherTime = newHistogram();
}
private static void printMinimalHistogram(String message, Histogram size, Histogram time, Histogram time2) {
System.out.println("---- " + message + " ----");
_print("size", size);
_print("serialization time", time);
_print("deserialization time", time2);
}
private static void _print(String message, Histogram hist) {
if (hist.getCount() == 0) return;
Snapshot snapshot = hist.getSnapshot();
System.out.println(" " + message);
System.out.println(String.format(" range: [%d ... %d] avg: %2.2f",
// hist.getCount(), snapshot.getMin(), snapshot.getMax(), snapshot.getMean()));
snapshot.getMin(), snapshot.getMax(), snapshot.getMean()));
}
private static void _print2(String message, Histogram hist) {
if (hist.getCount() == 0) return;
Snapshot snapshot = hist.getSnapshot();
System.out.println(" " + message);
System.out.println(String.format(" count = %,d range: [%d ... 50:%2.2f ...95:%2.2f ... %d] avg: %2.2f",
hist.getCount(),
snapshot.getMin(), snapshot.getMedian(), snapshot.get95thPercentile(), snapshot.getMax(),
snapshot.getMean()));
}
}
regular json [60 bytes]:
{"x":5,"y":50,"xs":["pppp","qqqqq","rrrr"],"name":"aaaaaaa"}
column json [40 bytes]:
[5,50,["pppp","qqqqq","rrrr"],"aaaaaaa"]
binary json [48 bytes]:
00000000 3A 29 0A 01 FA 80 78 CA 80 79 24 01 A4 81 78 73 :)....x..y$...xs
00000010 F8 43 70 70 70 70 44 71 71 71 71 71 43 72 72 72 .CppppDqqqqqCrrr
00000020 72 F9 83 6E 61 6D 65 46 61 61 61 61 61 61 61 FB r..nameFaaaaaaa.
binary column json [36 bytes]:
00000000 3A 29 0A 01 F8 CA 24 01 A4 F8 43 70 70 70 70 44 :)....$...CppppD
00000010 71 71 71 71 71 43 72 72 72 72 F9 46 61 61 61 61 qqqqqCrrrr.Faaaa
00000020 61 61 61 F9 aaa.
----
sample size = 68,650
POJO data json with 73 fields:
- VANILLA pojo [almost all String fields]
private String someGUID;
private String someTimeField;
...
- TYPED pojo [long, int, UUID, etc fields]
private UUID someGUID;
private long someTimeField;
...
- TYPED2 pojo [long, int, UUID, etc fields]
same as `TYPED`, but with hierarchy [separate Mutable and Immutable interfaces for setters and getters]
╔════════════════════════════════════════════════╦═══════════════════════════════╦════════════════════════════════╦═══════════════════════════════╗
║ ║ Vanilla json ║ Typed Json ║ Typed2 json ║
║ ╠══════════════+══════+═════════╬═══════════════+══════+═════════╬═════════════+═══════+═════════╣
║ ║ Min | Max | Avg. ║ Min | Max | Avg. ║ Min | Max | Avg. ║
╠════════════════╦═══════════════════════════════╬══════════════+══════+═════════╬═══════════════+══════+═════════╬═════════════+═══════+═════════╣
║ UTF-8 json ║ Size - bytes ║ 2673 | 4082 | 2859.22 ║ 2439 | 3874 | 2624.37 ║ 2472 | 3908 | 2657.75 ║
║ ╠-------------------------------╬--------------+------+---------╬---------------+------+---------╬-------------+-------+---------╣
║ ║ Serialization - micros ║ 8 | 53 | 8.51 ║ 10 | 75 | 11.51 ║ 11 | 10407 | 13.14 ║
║ ╠-------------------------------╬--------------+------+---------╬---------------+------+---------╬-------------+-------+---------╣
║ ║ Deserialization time - micros ║ 10 | 127 | 11.39 ║ 16 | 156 | 17.94 ║ 17 | 8888 | 19.85 ║
╠════════════════╬═══════════════════════════════╬══════════════+══════+═════════╬═══════════════+══════+═════════╬═════════════+═══════+═════════╣
║ CBOR ║ Size ║ 2387 | 3786 | 2577.50 ║ 2060 | 3478 | 2243.70 ║ 2080 | 3498 | 2263.70 ║
║ ╠-------------------------------╬--------------+------+---------╬---------------+------+---------╬-------------+-------+---------╣
║ ║ Serialization ║ 7 | 72 | 7.63 ║ 8 | 2516 | 9.68 ║ 9 | 3975 | 11.01 ║
║ ╠-------------------------------╬--------------+------+---------╬---------------+------+---------╬-------------+-------+---------╣
║ ║ Deserialization ║ 10 | 2380 | 11.05 ║ 14 | 98 | 15.71 ║ 15 | 5223 | 16.69 ║
╠════════════════╬═══════════════════════════════╬══════════════+══════+═════════╬═══════════════+══════+═════════╬═════════════+═══════+═════════╣
║ Smile ║ Size ║ 2373 | 3767 | 2565.70 ║ 2068 | 3480 | 2250.02 ║ 2090 | 3502 | 2271.89 ║
║ ╠-------------------------------╬--------------+------+---------╬---------------+------+---------╬-------------+-------+---------╣
║ ║ Serialization ║ 8 | 129 | 8.99 ║ 10 | 53 | 10.95 ║ 11 | 3415 | 12.77 ║
║ ╠-------------------------------╬--------------+------+---------╬---------------+------+---------╬-------------+-------+---------╣
║ ║ Deserialization ║ 9 | 55 | 9.70 ║ 13 | 110 | 14.79 ║ 14 | 4879 | 15.84 ║
╠════════════════╬═══════════════════════════════╬══════════════+══════+═════════╬═══════════════+══════+═════════╬═════════════+═══════+═════════╣
║ UTF-8 columnar ║ Size ║ 1404 | 2813 | 1590.22 ║ 1170 | 2605 | 1355.37 ║ 1190 | 2626 | 1375.75 ║
║ ╠-------------------------------╬--------------+------+---------╬---------------+------+---------╬-------------+-------+---------╣
║ ║ Serialization ║ 5 | 2092 | 6.24 ║ 7 | 69 | 8.24 ║ 8 | 3648 | 9.35 ║
║ ╠-------------------------------╬--------------+------+---------╬---------------+------+---------╬-------------+-------+---------╣
║ ║ Deserialization ║ 7 | 156 | 8.26 ║ 12 | 88 | 13.72 ║ 13 | 5430 | 15.02 ║
╠════════════════╬═══════════════════════════════╬══════════════+══════+═════════╬═══════════════+══════+═════════╬═════════════+═══════+═════════╣
║ CBOR columnar ║ Size ║ 1256 | 2655 | 1446.50 ║ 929 | 2347 | 1112.70 ║ 938 | 2356 | 1121.70 ║
║ ╠-------------------------------╬--------------+------+---------╬---------------+------+---------╬-------------+-------+---------╣
║ ║ Serialization ║ 4 | 39 | 5.19 ║ 6 | 74 | 6.59 ║ 8 | 3486 | 7.44 ║
║ ║-------------------------------╬--------------+------+---------╬---------------+------+---------╬-------------+-------+---------╣
║ ║ Deserialization ║ 7 | 65 | 7.29 ║ 10 | 2189 | 11.37 ║ 10 | 4941 | 11.79 ║
╠════════════════╬═══════════════════════════════╬══════════════+══════+═════════╬═══════════════+══════+═════════╬═════════════+═══════+═════════╣
║ Smile columnar ║ Size ║ 1250 | 2644 | 1442.70 ║ 845 | 2357 | 1127.02 ║ 956 | 2368 | 1137.89 ║
║ ╠-------------------------------╬--------------+------+---------╬---------------+------+---------╬-------------+-------+---------╣
║ ║ Serialization ║ 4 | 74 | 5.08 ║ 6 | 3106 | 6.31 ║ 6 | 3346 | 7.29 ║
║ ╠-------------------------------╬--------------+------+---------╬---------------+------+---------╬-------------+-------+---------╣
║ ║ Deserialization ║ 6 | 61 | 6.44 ║ 10 | 92 | 11.04 ║ 10 | 4741 | 11.58 ║
╚════════════════╩═══════════════════════════════╩══════════════╩══════╩═════════╩═══════════════╩══════╩═════════╩═════════════╩═══════╩═════════╝
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment