Skip to content

Instantly share code, notes, and snippets.

@dribnet
Last active October 1, 2017 05:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dribnet/163656a1c57150def7909cbebf4f07d9 to your computer and use it in GitHub Desktop.
Save dribnet/163656a1c57150def7909cbebf4f07d9 to your computer and use it in GitHub Desktop.
k-hot encoding (like 1-hot, but with k active bits)
# khot.py:
# an experimental generialization of 1-hot encoding, encodes a sparse binary vector with multiple bits
#
# currently the API is to call combins_table(n, k) to build an two way mapping from
# integers to n-choose-k binary vectors. returns an array and reverse lookup table.
import scipy.misc
# inspiration from https://www.reddit.com/r/algorithms/comments/4o5a9x/unique_mapping_from_integer_to_nk_combination/
#input: n, k, m
#output: 0-based m-th combination with k element from n elemetns
def combins(n, k, m):
s = []
for i in range(1,n+1):
c = scipy.misc.comb(n-i, k)
if m >= c:
s.append(1)
m -= c
k = k - 1
else:
s.append(0)
return tuple(s)
def combins_table(n, k, map_max=None):
table = []
rev_table = {}
table_top = scipy.misc.comb(n, k)
for m in range(int(table_top)):
# forward mapping
c = combins(n, k, m)
if map_max is None or m < map_max:
table.append(c)
rev_table[c] = m
else:
rev_table[c] = m % map_max
return table, rev_table
# poor man's unit test: verify that a small table works and check reverse mapping
if __name__ == '__main__':
# test a 5 choose 2 table suitable for MNIST
word_size = 5
bits_hot = 2
table, rev_table = combins_table(word_size, bits_hot)
print("Checking table lengths: ", len(table), len(rev_table))
print("Verifying tables for a {}-hot encoding with a word size of {}.".format(bits_hot, word_size))
for i in range(len(table)):
k = table[i]
rev_lookup = rev_table[k]
print("Entry {} --> Encoded {} --> Reversed {}".format(i, k, rev_lookup))
# OUTPUT BELOW
# ------------
# Checking table lengths: 10 10
# Verifying tables for a 2-hot encoding with a word size of 5.
# Entry 0 --> Encoded (0, 0, 0, 1, 1) --> Reversed 0
# Entry 1 --> Encoded (0, 0, 1, 0, 1) --> Reversed 1
# Entry 2 --> Encoded (0, 0, 1, 1, 0) --> Reversed 2
# Entry 3 --> Encoded (0, 1, 0, 0, 1) --> Reversed 3
# Entry 4 --> Encoded (0, 1, 0, 1, 0) --> Reversed 4
# Entry 5 --> Encoded (0, 1, 1, 0, 0) --> Reversed 5
# Entry 6 --> Encoded (1, 0, 0, 0, 1) --> Reversed 6
# Entry 7 --> Encoded (1, 0, 0, 1, 0) --> Reversed 7
# Entry 8 --> Encoded (1, 0, 1, 0, 0) --> Reversed 8
# Entry 9 --> Encoded (1, 1, 0, 0, 0) --> Reversed 9
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment