Skip to content

Instantly share code, notes, and snippets.

@benwtrent
Created November 10, 2020 16:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benwtrent/f19b82d9187a720b1bc367c75fcfee99 to your computer and use it in GitHub Desktop.
Save benwtrent/f19b82d9187a720b1bc367c75fcfee99 to your computer and use it in GitHub Desktop.
Benchmark for calculating the entropy for a string.
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import java.util.HashMap;
import java.util.Map;
import java.util.PrimitiveIterator;
import java.util.concurrent.TimeUnit;
@Fork(3)
@Warmup(iterations = 5)
@Measurement(iterations = 5)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@State(Scope.Benchmark)
@SuppressWarnings("unused") // invoked by benchmarking framework
public class EntropyBenchmark {
static String[] strings = new String[] {
/* Multi lengual strings of various sizes */
};
@Param({ "127", "256", "512" })
private String sizes;
private static int size;
@Setup(Level.Iteration)
public void setup() {
size = Integer.parseInt(sizes);
}
@Benchmark
public void unoptimized(Blackhole bh) {
for (String str : strings) {
bh.consume(entropyCalcUnOptimized(str));
}
}
@Benchmark
public void optimized(Blackhole bh) {
for (String str : strings) {
bh.consume(entropyCalcOptimized(str, size));
}
}
@Benchmark
public void hybrid(Blackhole bh) {
for (String str : strings) {
bh.consume(entropyCalcHybrid(str, size));
}
}
static double entropyCalcUnOptimized(String stringValue) {
final double len = stringValue.length();
Map<Integer, Integer> cpCount = new HashMap<>(stringValue.length());
for (PrimitiveIterator.OfInt it = stringValue.codePoints().iterator(); it.hasNext(); ) {
int cp = it.next();
cpCount.compute(cp, (_k, v) -> v == null ? 1 : v + 1);
}
double entropy = 0.0;
for (Integer count : cpCount.values()) {
double probability = count / len;
entropy -= probability * (Math.log(probability) / Math.log(2));
}
return entropy;
}
static double entropyCalcOptimized(String stringValue, int optSize) {
if (stringValue.codePoints().anyMatch(i -> i > optSize)) {
return entropyCalcUnOptimized(stringValue);
}
final double len = stringValue.length();
int[] vals = new int[optSize];
for (PrimitiveIterator.OfInt it = stringValue.codePoints().iterator(); it.hasNext(); ) {
int cp = it.next();
vals[cp]++;
}
double entropy = 0.0;
for (int occurrances : vals) {
if (occurrances > 0) {
double probability = occurrances / len;
entropy -= probability * (Math.log(probability) / Math.log(2));
}
}
return entropy;
}
static double entropyCalcHybrid(String stringValue, int optSize) {
final double len = stringValue.length();
int[] vals = new int[optSize];
Map<Integer, Integer> biggerCounts = new HashMap<>();
for (PrimitiveIterator.OfInt it = stringValue.codePoints().iterator(); it.hasNext(); ) {
int cp = it.next();
if (cp < optSize) {
vals[cp]++;
} else {
biggerCounts.compute(cp, (_k, v) -> v == null ? 1 : v + 1);
}
}
double entropy = 0.0;
for (int occurrances : vals) {
if (occurrances > 0) {
double probability = occurrances / len;
entropy -= probability * (Math.log(probability) / Math.log(2));
}
}
for (Integer count : biggerCounts.values()) {
double probability = count / len;
entropy -= probability * (Math.log(probability) / Math.log(2));
}
return entropy;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment