Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
%% Huffman Code for nêhiyawêwin %%
%% Note: Cree strings must be written in "askiy":
%% - short vowels and consonants: written in ASCII
%% - long vowels: ê is written as e; "âîô" written as "AIO" respectively.
%% - it's a very grounded character encoding 😉
%%
%% I haven't done extensive testing, but this usually uses half the bits to
%% represent Cree word forms. Maybe just gzip it instead?
-module(huff).
-export([encode/1, decode/1, tryit/1]).
%% Definitions for huffman code.
-define(K, 2#100 :3).
-define(A, 2#010 :3).
-define(I, 2#001 :3).
-define(EE, 2#1100 :4).
-define(W, 2#1011 :4).
-define(AA, 2#1010 :4).
-define(N, 2#0111 :4).
-define(T, 2#0110 :4).
-define(M, 2#0001 :4).
-define(S, 2#11111 :5).
-define(H, 2#11110 :5).
-define(HYPHEN, 2#11101 :5).
-define(Y, 2#11100 :5).
-define(O, 2#11010 :5).
-define(II, 2#00001 :5).
-define(P, 2#00000 :5).
-define(OO, 2#110111 :6).
-define(C, 2#1101101 :7).
-define(L, 2#11011001:8).
-define(R, 2#11011000:8).
% Encodes an SRO string in "askiy".
encode(String) ->
encode(String, <<>>).
encode([], Binary) ->
Binary;
encode([$k|Rest], Binary) ->
encode(Rest, <<?K, Binary/bitstring>>);
encode([$a|Rest], Binary) ->
encode(Rest, <<?A, Binary/bitstring>>);
encode([$i|Rest], Binary) ->
encode(Rest, <<?I, Binary/bitstring>>);
encode([$e|Rest], Binary) ->
encode(Rest, <<?EE, Binary/bitstring>>);
encode([$w|Rest], Binary) ->
encode(Rest, <<?W, Binary/bitstring>>);
encode([$A|Rest], Binary) ->
encode(Rest, <<?AA, Binary/bitstring>>);
encode([$n|Rest], Binary) ->
encode(Rest, <<?N, Binary/bitstring>>);
encode([$t|Rest], Binary) ->
encode(Rest, <<?T, Binary/bitstring>>);
encode([$m|Rest], Binary) ->
encode(Rest, <<?M, Binary/bitstring>>);
encode([$s|Rest], Binary) ->
encode(Rest, <<?S, Binary/bitstring>>);
encode([$h|Rest], Binary) ->
encode(Rest, <<?H, Binary/bitstring>>);
encode([$-|Rest], Binary) ->
encode(Rest, <<?HYPHEN, Binary/bitstring>>);
encode([$y|Rest], Binary) ->
encode(Rest, <<?Y, Binary/bitstring>>);
encode([$o|Rest], Binary) ->
encode(Rest, <<?O, Binary/bitstring>>);
encode([$I|Rest], Binary) ->
encode(Rest, <<?II, Binary/bitstring>>);
encode([$p|Rest], Binary) ->
encode(Rest, <<?P, Binary/bitstring>>);
encode([$O|Rest], Binary) ->
encode(Rest, <<?OO, Binary/bitstring>>);
encode([$c|Rest], Binary) ->
encode(Rest, <<?C, Binary/bitstring>>);
encode([$l|Rest], Binary) ->
encode(Rest, <<?L, Binary/bitstring>>);
encode([$r|Rest], Binary) ->
encode(Rest, <<?R, Binary/bitstring>>).
% Decodes a huffman code to "askiy"
decode(Binary) ->
decode(Binary, "").
decode(<<>>, String) ->
String;
decode(<<?K, Rest/bitstring>>, String) ->
decode(Rest, [$k|String]);
decode(<<?A, Rest/bitstring>>, String) ->
decode(Rest, [$a|String]);
decode(<<?I, Rest/bitstring>>, String) ->
decode(Rest, [$i|String]);
decode(<<?EE, Rest/bitstring>>, String) ->
decode(Rest, [$e|String]);
decode(<<?W, Rest/bitstring>>, String) ->
decode(Rest, [$w|String]);
decode(<<?AA, Rest/bitstring>>, String) ->
decode(Rest, [$A|String]);
decode(<<?N, Rest/bitstring>>, String) ->
decode(Rest, [$n|String]);
decode(<<?T, Rest/bitstring>>, String) ->
decode(Rest, [$t|String]);
decode(<<?M, Rest/bitstring>>, String) ->
decode(Rest, [$m|String]);
decode(<<?S, Rest/bitstring>>, String) ->
decode(Rest, [$s|String]);
decode(<<?H, Rest/bitstring>>, String) ->
decode(Rest, [$h|String]);
decode(<<?HYPHEN, Rest/bitstring>>, String) ->
decode(Rest, [$-|String]);
decode(<<?Y, Rest/bitstring>>, String) ->
decode(Rest, [$y|String]);
decode(<<?O, Rest/bitstring>>, String) ->
decode(Rest, [$o|String]);
decode(<<?II, Rest/bitstring>>, String) ->
decode(Rest, [$I|String]);
decode(<<?P, Rest/bitstring>>, String) ->
decode(Rest, [$p|String]);
decode(<<?OO, Rest/bitstring>>, String) ->
decode(Rest, [$O|String]);
decode(<<?C, Rest/bitstring>>, String) ->
decode(Rest, [$c|String]);
decode(<<?L, Rest/bitstring>>, String) ->
decode(Rest, [$l|String]);
decode(<<?R, Rest/bitstring>>, String) ->
decode(Rest, [$r|String]).
% Compress a word; determine how many bits were saved.
tryit(Word) ->
Encoded = encode(Word),
OriginalSize = 8 * length(Word),
SizeInBits = erlang:bit_size(Encoded),
Percentage = 100.0 * SizeInBits / OriginalSize,
{Encoded, decode(Encoded), SizeInBits, Percentage}.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.