Last active
February 5, 2019 06:50
-
-
Save gom/6b4fd5f4c27de49ab912d9ec251383fa to your computer and use it in GitHub Desktop.
Compare HLL Benchmark: airlift.stats and stream-lib
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plugins { | |
id 'java' | |
id 'application' | |
id "com.github.johnrengelman.shadow" version "4.0.4" | |
} | |
group 'com.gomlog' | |
version '1.0-SNAPSHOT' | |
mainClassName = 'com.gomlog.hll.benchmark.Comparison' | |
repositories { | |
mavenCentral() | |
} | |
dependencies { | |
compile "com.clearspring.analytics:stream:2.9.6" | |
compile "io.airlift:stats:0.178" | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.gomlog.hll.benchmark; | |
import java.util.UUID; | |
import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus; | |
import io.airlift.slice.Slices; | |
import io.airlift.stats.cardinality.HyperLogLog; | |
public class Comparison { | |
static final int DATA_ROWS = 10_000_000; | |
static final int FIXED_DATA_ROWS = 1; | |
static final int DEFAULT_P = 15; | |
static final int DEFAULT_SP = 25; | |
public static void main(String[] args) throws Exception { | |
for (int i = 0; i < 2; i++) { | |
if (i == 0) { | |
System.out.println("Reharsal"); | |
} else { | |
System.out.println("----------"); | |
} | |
streamHll(); | |
airliftHll(); | |
} | |
} | |
private static String data() { | |
return UUID.randomUUID().toString(); | |
} | |
private static String fixedData() { | |
return "0123abCD-4567-6789-89ab-cdefABCDEF01"; | |
} | |
private static void printResult(String name, Long estimateCount, int size, Long durationMsec) { | |
double error = 1d - estimateCount.doubleValue() / (DATA_ROWS + FIXED_DATA_ROWS); | |
System.out.printf("%s [error: %f, calcTime: %d, estimateCount: %d, dataSize: %d bytes]\n", name, error, | |
durationMsec, estimateCount, size); | |
} | |
private static void streamHll() throws Exception { | |
HyperLogLogPlus hll = new HyperLogLogPlus(DEFAULT_P, DEFAULT_SP); | |
long start = System.currentTimeMillis(); | |
for (int i = 0; i < DATA_ROWS; i++) { | |
hll.offer(data().getBytes()); | |
} | |
for (int i = 0; i < DATA_ROWS; i++) { | |
hll.offer(fixedData().getBytes()); | |
} | |
long duration = System.currentTimeMillis() - start; | |
printResult("stream-lib", | |
hll.cardinality(), | |
hll.sizeof(), | |
duration); | |
} | |
private static void airliftHll() { | |
// 2048 | |
//int buckets = Integer.highestOneBit((int) Math.ceil(1.0816 / (0.023 * 0.023)) - 1) << 1; | |
HyperLogLog hll = HyperLogLog.newInstance(4096); | |
long start = System.currentTimeMillis(); | |
for (int i = 0; i < DATA_ROWS; i++) { | |
hll.add(Slices.utf8Slice(data())); | |
} | |
for (int i = 0; i < DATA_ROWS; i++) { | |
hll.add(Slices.utf8Slice(fixedData())); | |
} | |
long duration = System.currentTimeMillis() - start; | |
printResult("airlift", | |
hll.cardinality(), | |
hll.estimatedInMemorySize(), | |
duration); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ java -classpath build/libs/hllbenchmark-1.0-SNAPSHOT-all.jar com.gomlog.hll.benchmark.Comparison | |
Reharsal | |
stream-lib [error: 0.006106, calcTime: 25014, estimateCount: 9938938, dataSize: 21858 bytes] | |
airlift [error: -0.013953, calcTime: 19990, estimateCount: 10139534, dataSize: 2182 bytes] | |
---------- | |
stream-lib [error: -0.005583, calcTime: 22240, estimateCount: 10055835, dataSize: 21858 bytes] | |
airlift [error: -0.025534, calcTime: 20773, estimateCount: 10255340, dataSize: 2182 bytes] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.gomlog.hll.benchmark; | |
import java.util.ArrayList; | |
import java.util.List; | |
import java.util.UUID; | |
import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus; | |
public class AddAndMerge { | |
private static final int DATA_ROWS = 100_000_000; | |
private static final int FIXED_DATA_ROWS = 1; | |
private static final int DEFAULT_P = 15; | |
private static final int DEFAULT_SP = 25; | |
public static void main(String[] args) throws Exception { | |
for (int i = 0; i < 2; i++) { | |
if (i == 0) { | |
System.out.println("Reharsal"); | |
} else { | |
System.out.println("----------"); | |
} | |
streamHll(); | |
mergeHLL(); | |
} | |
} | |
private static String data() { | |
return UUID.randomUUID().toString(); | |
} | |
private static String fixedData() { | |
return "0123abCD-4567-6789-89ab-cdefABCDEF01"; | |
} | |
private static void printResult(String name, Long estimateCount, int size, Long durationMsec) { | |
double error = 1d - estimateCount.doubleValue() / (DATA_ROWS + FIXED_DATA_ROWS); | |
System.out.printf("%s [error: %f, calcTime: %d, estimateCount: %d, dataSize: %d bytes]\n", name, error, | |
durationMsec, estimateCount, size); | |
} | |
private static void streamHll() throws Exception { | |
HyperLogLogPlus hll = new HyperLogLogPlus(DEFAULT_P, DEFAULT_SP); | |
long start = System.currentTimeMillis(); | |
for (int i = 0; i < DATA_ROWS; i++) { | |
hll.offer(data().getBytes()); | |
} | |
long duration = System.currentTimeMillis() - start; | |
printResult("stream-lib", | |
hll.cardinality(), | |
hll.sizeof(), | |
duration); | |
} | |
private static final int OBJ_BY_BUCKET = 10; | |
private static void mergeHLL() { | |
List<HyperLogLogPlus> stateList = new ArrayList<>(); | |
int bucketNums = DATA_ROWS / OBJ_BY_BUCKET; | |
for (int i = 0; i < bucketNums; i++) { | |
HyperLogLogPlus hll = new HyperLogLogPlus(DEFAULT_P, DEFAULT_SP); | |
for (int j = 0; j < OBJ_BY_BUCKET; j++) { | |
hll.offer(data().getBytes()); | |
} | |
stateList.add(hll); | |
} | |
long start = System.currentTimeMillis(); | |
stateList.stream().reduce((accum, h) -> { | |
try { | |
accum.addAll(h); | |
return accum; | |
} catch (Exception e) { | |
throw new RuntimeException(e); | |
} | |
}).ifPresent(h -> { | |
long duration = System.currentTimeMillis() - start; | |
printResult("merge", | |
h.cardinality(), | |
h.sizeof(), | |
duration); | |
}); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
❯ java -jar build/libs/hllbenchmark-1.0-SNAPSHOT.jar com.gomlog.hll.benchmark.AddAndMerge | |
Reharsal | |
stream-lib [error: -0.004718, calcTime: 152388, estimateCount: 100471783, dataSize: 21848 bytes] | |
merge [error: 0.004813, calcTime: 7556, estimateCount: 99518687, dataSize: 21848 bytes] | |
---------- | |
stream-lib [error: 0.002818, calcTime: 174693, estimateCount: 99718204, dataSize: 21848 bytes] | |
merge [error: -0.004745, calcTime: 7123, estimateCount: 100474510, dataSize: 21848 bytes] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.gomlog.hll.benchmark; | |
import java.util.ArrayList; | |
import java.util.List; | |
import java.util.UUID; | |
import io.airlift.slice.Slices; | |
import io.airlift.stats.cardinality.HyperLogLog; | |
public class AddAndMerge { | |
private static final int DATA_ROWS = 100_000_000; | |
private static final int FIXED_DATA_ROWS = 1; | |
private static final int DEFAULT_P = 15; | |
private static final int DEFAULT_SP = 25; | |
public static void main(String[] args) throws Exception { | |
for (int i = 0; i < 2; i++) { | |
if (i == 0) { | |
System.out.println("Reharsal"); | |
} else { | |
System.out.println("----------"); | |
} | |
airliftHll(); | |
mergeHLL(); | |
} | |
} | |
private static String data() { | |
return UUID.randomUUID().toString(); | |
} | |
private static String fixedData() { | |
return "0123abCD-4567-6789-89ab-cdefABCDEF01"; | |
} | |
private static void printResult(String name, Long estimateCount, int size, Long durationMsec) { | |
double error = 1d - estimateCount.doubleValue() / (DATA_ROWS + FIXED_DATA_ROWS); | |
System.out.printf("%s [error: %f, calcTime: %d, estimateCount: %d, dataSize: %d bytes]\n", name, error, | |
durationMsec, estimateCount, size); | |
} | |
private static void airliftHll() { | |
HyperLogLog hll = HyperLogLog.newInstance(4096); | |
long start = System.currentTimeMillis(); | |
for (int i = 0; i < DATA_ROWS; i++) { | |
hll.add(Slices.utf8Slice(data())); | |
} | |
long duration = System.currentTimeMillis() - start; | |
printResult("airlift", | |
hll.cardinality(), | |
hll.estimatedInMemorySize(), | |
duration); | |
} | |
private static final int OBJ_BY_BUCKET = 100; | |
private static void mergeHLL() { | |
List<HyperLogLog> stateList = new ArrayList<>(); | |
int bucketNums = DATA_ROWS / OBJ_BY_BUCKET; | |
for (int i = 0; i < bucketNums; i++) { | |
HyperLogLog hll = HyperLogLog.newInstance(4096); | |
for (int j = 0; j < OBJ_BY_BUCKET; j++) { | |
hll.add(Slices.utf8Slice(data())); | |
} | |
stateList.add(hll); | |
} | |
long start = System.currentTimeMillis(); | |
stateList.stream().reduce((accum, h) -> { | |
try { | |
accum.mergeWith(h); | |
return accum; | |
} catch (Exception e) { | |
throw new RuntimeException(e); | |
} | |
}).ifPresent(h -> { | |
long duration = System.currentTimeMillis() - start; | |
printResult("merge", | |
h.cardinality(), | |
h.estimatedInMemorySize(), | |
duration); | |
}); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
❯ java -jar build/libs/hllbenchmark-1.0-SNAPSHOT.jar com.gomlog.hll.benchmark.AddAndMerge | |
Reharsal | |
airlift [error: 0.001307, calcTime: 151422, estimateCount: 99869317, dataSize: 2182 bytes] | |
merge [error: -0.026703, calcTime: 26533, estimateCount: 102670315, dataSize: 2177 bytes] | |
---------- | |
airlift [error: -0.010419, calcTime: 151927, estimateCount: 101041874, dataSize: 2182 bytes] | |
merge [error: -0.019530, calcTime: 26263, estimateCount: 101953007, dataSize: 2177 bytes] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment