Last active
November 13, 2025 08:53
-
-
Save coderodde/643ab799e92f6ad84b1d32c54a2a3427 to your computer and use it in GitHub Desktop.
The Huffman encoder in Java.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package io.github.coderodde.compression; | |
| import java.util.BitSet; | |
| /** | |
| * This class implements a <b>binary</b> code word in data compression | |
| * scenarios. | |
| * | |
| * @author Rodion "rodde" Efremov | |
| * @version 1.0.0 (Oct 28, 2025) | |
| * @since 1.0.0 (Oct 28, 2025) | |
| */ | |
| public class CodeWord { | |
| private final int length; | |
| private final BitSet bits; | |
| public CodeWord(final int length) { | |
| checkLength(length); | |
| this.length = length; | |
| this.bits = new BitSet(length); | |
| } | |
| public CodeWord prependBit(final boolean bit) { | |
| final CodeWord nextCodeWord = new CodeWord(length + 1); | |
| if (bit) { | |
| nextCodeWord.set(length); | |
| } | |
| for (int i = 0; i < length; ++i) { | |
| if (get(i)) { | |
| nextCodeWord.set(i); | |
| } | |
| } | |
| return nextCodeWord; | |
| } | |
| public int length() { | |
| return length; | |
| } | |
| public boolean get(final int index) { | |
| checkIndex(index); | |
| return bits.get(index); | |
| } | |
| public void set(final int index) { | |
| checkIndex(index); | |
| bits.set(index); | |
| } | |
| public void unset(final int index) { | |
| checkIndex(index); | |
| bits.set(index, false); | |
| } | |
| @Override | |
| public String toString() { | |
| final StringBuilder sb = new StringBuilder(length); | |
| for (int i = length - 1; i >= 0; --i) { | |
| sb.append(get(i) ? "1" : "0"); | |
| } | |
| return sb.toString(); | |
| } | |
| private void checkIndex(final int index) { | |
| if (index < 0) { | |
| throw new IndexOutOfBoundsException( | |
| String.format("index(%d) < 0", index)); | |
| } | |
| if (index >= this.length) { | |
| throw new IndexOutOfBoundsException( | |
| String.format("index(%d) >= length(%d)", | |
| index, | |
| length)); | |
| } | |
| } | |
| private static void checkLength(final int length) { | |
| if (length < 0) { | |
| throw new IllegalArgumentException( | |
| String.format("length(%d) < 1", length)); | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package io.github.coderodde.encoding.demo; | |
| import io.github.coderodde.compression.CodeWord; | |
| import io.github.coderodde.encoding.HuffmannEncoder; | |
| import java.util.HashMap; | |
| import java.util.Map; | |
| /** | |
| * This class demonstrates the Huffmann encoding. | |
| * | |
| * @author Rodion "rodde" Efremov | |
| * @version 1.0.0 (Nov 12, 2025) | |
| * @since 1.0.0 (Now 12, 2025) | |
| */ | |
| public final class Demo { | |
| public static void main(String[] args) { | |
| demo1(); | |
| dctWeek2Task1(); | |
| } | |
| private static void demo1() { | |
| System.out.println("--- demo1 ---"); | |
| Map<Character, Double> weightDistribution = new HashMap<>(); | |
| weightDistribution.put('a', 5.0); | |
| weightDistribution.put('b', 9.0); | |
| weightDistribution.put('c', 12.); | |
| weightDistribution.put('d', 13.0); | |
| weightDistribution.put('e', 16.0); | |
| weightDistribution.put('f', 45.0); | |
| Map<Character, CodeWord> code = | |
| HuffmannEncoder.encode(weightDistribution); | |
| System.out.println(code); | |
| } | |
| private static void dctWeek2Task1() { | |
| System.out.println("--- dctWeek2Task1 ---"); | |
| Map<Character, Double> weightDistribution = new HashMap<>(); | |
| weightDistribution.put('a', 1.0 / 6.0); | |
| weightDistribution.put('b', 1.0 / 5.0); | |
| weightDistribution.put('c', 1.0 / 20.0); | |
| weightDistribution.put('d', 1.0 / 5.0); | |
| weightDistribution.put('e', 1.0 / 120.0); | |
| weightDistribution.put('f', 1.0 / 8.0); | |
| weightDistribution.put('g', 1.0 / 4.0); | |
| Map<Character, CodeWord> code = | |
| HuffmannEncoder.encode(weightDistribution); | |
| System.out.println(code); | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package io.github.coderodde.encoding; | |
| import io.github.coderodde.compression.CodeWord; | |
| import io.github.coderodde.compression.SymbolEntry; | |
| import java.util.HashMap; | |
| import java.util.HashSet; | |
| import java.util.Map; | |
| import java.util.Objects; | |
| import java.util.PriorityQueue; | |
| import java.util.Queue; | |
| import java.util.Set; | |
| /** | |
| * This class implements the Huffman encoding over probability distributions. | |
| * | |
| * @author Rodion "rodde" Efremov | |
| * @version 1.0.0 (Nov 12, 2025) | |
| * @since 1.0.0 (Nov 12, 2025) | |
| */ | |
| public final class HuffmannEncoder { | |
| private HuffmannEncoder() { | |
| // Hide constructor. | |
| } | |
| public static <S extends Comparable<? super S>> Map<S, CodeWord> | |
| encode(final Map<S, Double> weightDistribution) { | |
| Objects.requireNonNull(weightDistribution, | |
| "The input probability distribution is null"); | |
| if (weightDistribution.isEmpty()) { | |
| return Map.of(); | |
| } | |
| final Queue<PriorityQueueEntry<S>> queue = new PriorityQueue<>(); | |
| for (final Map.Entry<S, Double> entry : weightDistribution.entrySet()) { | |
| final double weight = entry.getValue(); | |
| checkWeight(weight); | |
| final S symbol = entry.getKey(); | |
| final Set<SymbolEntry<S>> set = new HashSet<>(); | |
| set.add(new SymbolEntry<>(symbol, weight)); | |
| queue.add(new PriorityQueueEntry<>(set, weight)); | |
| } | |
| final Map<S, CodeWord> codewordMap = | |
| new HashMap<>(weightDistribution.size()); | |
| for (final S symbol : weightDistribution.keySet()) { | |
| codewordMap.put(symbol, new CodeWord(0)); | |
| } | |
| while (queue.size() > 1) { | |
| final PriorityQueueEntry<S> entry1 = queue.remove(); | |
| final PriorityQueueEntry<S> entry2 = queue.remove(); | |
| for (final SymbolEntry<S> symbolEntry : entry1.set) { | |
| final S symbol = symbolEntry.symbol; | |
| codewordMap.put(symbol, | |
| codewordMap | |
| .get(symbol) | |
| .prependBit(true)); | |
| } | |
| for (final SymbolEntry<S> symbolEntry : entry2.set) { | |
| final S symbol = symbolEntry.symbol; | |
| codewordMap.put(symbol, | |
| codewordMap | |
| .get(symbol) | |
| .prependBit(false)); | |
| } | |
| entry1.set.addAll(entry2.set); | |
| queue.add(new PriorityQueueEntry<>( | |
| entry1.set, | |
| entry1.totalSetWeight + entry2.totalSetWeight)); | |
| } | |
| return codewordMap; | |
| } | |
| private static void checkWeight(final double weight) { | |
| if (Double.isNaN(weight)) { | |
| throw new IllegalArgumentException("weight is NaN"); | |
| } | |
| if (weight <= 0.0) { | |
| throw new IllegalArgumentException( | |
| String.format("weight(%f) <= 0.0", weight)); | |
| } | |
| if (Double.isInfinite(weight)) { | |
| throw new IllegalArgumentException("weight is Infinity"); | |
| } | |
| } | |
| private static final | |
| class PriorityQueueEntry<T extends Comparable<? super T>> | |
| implements Comparable<PriorityQueueEntry<T>> { | |
| Set<SymbolEntry<T>> set; | |
| double totalSetWeight; | |
| PriorityQueueEntry(final Set<SymbolEntry<T>> set, | |
| final double totalSetWeight) { | |
| this.set = set; | |
| this.totalSetWeight = totalSetWeight; | |
| } | |
| @Override | |
| public int compareTo(final PriorityQueueEntry<T> o) { | |
| return Double.compare(totalSetWeight, o.totalSetWeight); | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package io.github.coderodde.compression; | |
| import java.util.Objects; | |
| public final class SymbolEntry<S extends Comparable<? super S>> | |
| implements Comparable<SymbolEntry<S>> { | |
| public final S symbol; | |
| public final double weight; | |
| public SymbolEntry(final S symbol, final double weight) { | |
| this.symbol = Objects.requireNonNull(symbol); | |
| this.weight = validateWeight(weight); | |
| } | |
| private static double validateWeight(final double weight) { | |
| if (Double.isNaN(weight)) { | |
| throw new IllegalArgumentException( | |
| "The input weight is NaN"); | |
| } | |
| if (weight <= 0.0) { | |
| throw new IllegalArgumentException( | |
| String.format("weight(%f) <= 0.0", weight)); | |
| } | |
| if (Double.isInfinite(weight)) { | |
| throw new IllegalArgumentException("weight is +Infinity"); | |
| } | |
| return weight; | |
| } | |
| @Override | |
| public String toString() { | |
| return String.format("[%s: %f]", symbol.toString(), weight); | |
| } | |
| @Override | |
| public int compareTo(final SymbolEntry<S> o) { | |
| final int cmp = Double.compare(o.weight, | |
| weight); | |
| if (cmp != 0) { | |
| return cmp; | |
| } | |
| return symbol.compareTo(o.symbol); | |
| } | |
| @Override | |
| public boolean equals(final Object object) { | |
| final SymbolEntry<S> other = (SymbolEntry<S>) object; | |
| return symbol.equals(other.symbol); | |
| } | |
| @Override | |
| public int hashCode() { | |
| return symbol.hashCode(); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment