ijkilchenko/RainbowTable.py

## RainbowTable.py
#!/usr/bin/python3
# author: @ijkilchenko
# MIT License

import math
import hashlib
import random
from collections import defaultdict

def base10_to_baseB(num, B):
    digits = []
    while num > 0:
        p = math.floor(math.log2(num) / math.log2(B))
        d = math.floor(num / (B ** p))
        num = num - d * B ** p
        if not digits:
            digits = [0] * (p + 1)
        digits[len(digits) - 1 - p] = d
    return digits

class RainbowTable():
    def __init__(self, vocab, word_length, chain_length, test_password='test_pass'):
        self.chains = defaultdict(lambda : [])
        self.vocab = vocab
        self.P = len(self.vocab) ** word_length
        self.n = word_length
        self.chain_length = chain_length
        # each of the words become our salts for the reduction functions
        self.salts = 'the quick brown fox'.split()
        self.salts = [self.string_2_hashval(self.salts[i]) for i in range(len(self.salts))]

        self.test_word = test_password
        self.is_test_word_generated = False

        self.compute_chains()

    def get_ith_word_over_vocab(self, i):
        assert i < self.P
        choices = base10_to_baseB(i, len(self.vocab))
        word = [self.vocab[j] for j in [0 for _ in range(0, self.n - len(choices))] + choices]
        word = ''.join(word)
        return word

    def hashval_2_string(self, hashval, salt_index):
        '''
        Here is an example of a reduction function, a function that maps from the
        hashvalues into the set of all possible strings (which can be derived from
        the vocabulary string). We simply take the hexadecimal value and one of the
        salts (thus we have as many unique reduction functions as there are salts),
        convert that to decimal, mod by the vocabulary size, and call the result i.

        Finally, since we know the vocabulary, we can derive the i-th possible word
        in the list of all lexicographically ordered words over the vocabulary.

        We do these steps because the reduction function should be uniform and we
        want to be able to map each hashvalue back into the set of all possible
        passwords.

        References:
        https://en.wikipedia.org/wiki/Rainbow_table#Precomputed_hash_chains
        '''
        i = (int(hashval, 16) + int(self.salts[salt_index % len(self.salts)], 16)) % self.P
        return self.get_ith_word_over_vocab(i)

    def string_2_hashval(self, string):
        d = hashlib.sha256()
        d.update(bytes(string, encoding='utf-8'))
        return d.hexdigest()

    def compute_chains(self):
        print('Building our rainbow table', end="")
        num_of_chains = math.floor(self.P / self.chain_length)
        start_words = [self.get_ith_word_over_vocab(i) for i in random.sample(range(0, self.P), num_of_chains)]

        for count, word in enumerate(start_words):
            if count % (math.floor(len(start_words) / 10)) == 0:
                print('.', end="")
            chain_start_word = word
            if self.test_word == chain_start_word:
                self.is_test_word_generated = True
            for salt_index in range(self.chain_length):
                word, _ = self.forward(word, salt_index)
                if self.test_word == word and salt_index != self.chain_length - 1:
                    self.is_test_word_generated = True
            chain_end_word = word
            self.chains[chain_end_word].append(chain_start_word)
        print('Done')
        num_of_words = 0
        for word in self.chains:
            num_of_words += len(self.chains[word]) + 1
        print('%i\t Size (number of cells with some chains having merged ends) in our rainbow table' % num_of_words)
        print('%i\t Size (number of cells, half of which are hashes) of a table without the hash chaining trick' % (self.P * 2))
        print('%.2f\t Compression ratio' % (self.P * 2 / num_of_words))

    def forward(self, word, salt_index):
        hashval = self.string_2_hashval(word)
        word = self.hashval_2_string(hashval, salt_index)
        return (word, hashval)

    def find_inverse(self, password_hashval):
        def forward_from_start(chain_start_word):
            word = chain_start_word
            for i in range(self.chain_length + 1):
                possible_password = word
                word, hashval = r.forward(word, i)
                if hashval == password_hashval:
                    return possible_password
        for salt_index in range(len(self.salts)):
            word = self.hashval_2_string(password_hashval, salt_index)
            salt_index += 1
            for _ in range(self.chain_length):
                if word in self.chains:
                    chain_start_words = self.chains[word]
                    for chain_start_word in chain_start_words:
                        password = forward_from_start(chain_start_word)
                        if password is not None:
                            return password
                word, _ = self.forward(word, salt_index)
                salt_index += 1

if __name__ == '__main__':
    assert base10_to_baseB(11, 2) == [int(i) for i in '1011']
    assert base10_to_baseB(11, 3) == [int(i) for i in '102']
    assert base10_to_baseB(1001, 10) == [int(i) for i in '1001']

    random.seed(123456)

    test_password = 'aababa'
    r = RainbowTable(vocab='abcd1234', word_length=6, chain_length=10, test_password=test_password)
    password_hashval = r.string_2_hashval(test_password)
    if r.is_test_word_generated == True:
        guessed_password = r.find_inverse(password_hashval)
        assert guessed_password == test_password
        print('Test password is %s. Test attack succeeded. ' % guessed_password)
    else:
        print('The reduction function did not map to test password. Attack failed. ')

    print()

    pass1 = '3baba2'
    h1 = r.string_2_hashval(pass1)
    h1_inv = r.find_inverse(h1)
    print('The password for %s is %s ' % (h1, h1_inv))
    if (pass1 == h1_inv):
        print('Attack succeeded.')
    else:
        print('Attack failed.')

    print()

    pass2 = 'bada33'
    h2 = r.string_2_hashval(pass2)
    h2_inv = r.find_inverse(h2)
    print('The password for %s is %s ' % (h2, h2_inv))
    if (pass2 == h2_inv):
        print('Attack succeeded.')
    else:
        print('Attack failed.')
	#!/usr/bin/python3
	# author: @ijkilchenko
	# MIT License

	import math
	import hashlib
	import random
	from collections import defaultdict

	def base10_to_baseB(num, B):
	digits = []
	while num > 0:
	p = math.floor(math.log2(num) / math.log2(B))
	d = math.floor(num / (B ** p))
	num = num - d * B ** p
	if not digits:
	digits = [0] * (p + 1)
	digits[len(digits) - 1 - p] = d
	return digits

	class RainbowTable():
	def __init__(self, vocab, word_length, chain_length, test_password='test_pass'):
	self.chains = defaultdict(lambda : [])
	self.vocab = vocab
	self.P = len(self.vocab) ** word_length
	self.n = word_length
	self.chain_length = chain_length
	# each of the words become our salts for the reduction functions
	self.salts = 'the quick brown fox'.split()
	self.salts = [self.string_2_hashval(self.salts[i]) for i in range(len(self.salts))]

	self.test_word = test_password
	self.is_test_word_generated = False

	self.compute_chains()

	def get_ith_word_over_vocab(self, i):
	assert i < self.P
	choices = base10_to_baseB(i, len(self.vocab))
	word = [self.vocab[j] for j in [0 for _ in range(0, self.n - len(choices))] + choices]
	word = ''.join(word)
	return word

	def hashval_2_string(self, hashval, salt_index):
	'''
	Here is an example of a reduction function, a function that maps from the
	hashvalues into the set of all possible strings (which can be derived from
	the vocabulary string). We simply take the hexadecimal value and one of the
	salts (thus we have as many unique reduction functions as there are salts),
	convert that to decimal, mod by the vocabulary size, and call the result i.

	Finally, since we know the vocabulary, we can derive the i-th possible word
	in the list of all lexicographically ordered words over the vocabulary.

	We do these steps because the reduction function should be uniform and we
	want to be able to map each hashvalue back into the set of all possible
	passwords.

	References:
	https://en.wikipedia.org/wiki/Rainbow_table#Precomputed_hash_chains
	'''
	i = (int(hashval, 16) + int(self.salts[salt_index % len(self.salts)], 16)) % self.P
	return self.get_ith_word_over_vocab(i)

	def string_2_hashval(self, string):
	d = hashlib.sha256()
	d.update(bytes(string, encoding='utf-8'))
	return d.hexdigest()

	def compute_chains(self):
	print('Building our rainbow table', end="")
	num_of_chains = math.floor(self.P / self.chain_length)
	start_words = [self.get_ith_word_over_vocab(i) for i in random.sample(range(0, self.P), num_of_chains)]

	for count, word in enumerate(start_words):
	if count % (math.floor(len(start_words) / 10)) == 0:
	print('.', end="")
	chain_start_word = word
	if self.test_word == chain_start_word:
	self.is_test_word_generated = True
	for salt_index in range(self.chain_length):
	word, _ = self.forward(word, salt_index)
	if self.test_word == word and salt_index != self.chain_length - 1:
	self.is_test_word_generated = True
	chain_end_word = word
	self.chains[chain_end_word].append(chain_start_word)
	print('Done')
	num_of_words = 0
	for word in self.chains:
	num_of_words += len(self.chains[word]) + 1
	print('%i\t Size (number of cells with some chains having merged ends) in our rainbow table' % num_of_words)
	print('%i\t Size (number of cells, half of which are hashes) of a table without the hash chaining trick' % (self.P * 2))
	print('%.2f\t Compression ratio' % (self.P * 2 / num_of_words))

	def forward(self, word, salt_index):
	hashval = self.string_2_hashval(word)
	word = self.hashval_2_string(hashval, salt_index)
	return (word, hashval)

	def find_inverse(self, password_hashval):
	def forward_from_start(chain_start_word):
	word = chain_start_word
	for i in range(self.chain_length + 1):
	possible_password = word
	word, hashval = r.forward(word, i)
	if hashval == password_hashval:
	return possible_password
	for salt_index in range(len(self.salts)):
	word = self.hashval_2_string(password_hashval, salt_index)
	salt_index += 1
	for _ in range(self.chain_length):
	if word in self.chains:
	chain_start_words = self.chains[word]
	for chain_start_word in chain_start_words:
	password = forward_from_start(chain_start_word)
	if password is not None:
	return password
	word, _ = self.forward(word, salt_index)
	salt_index += 1

	if __name__ == '__main__':
	assert base10_to_baseB(11, 2) == [int(i) for i in '1011']
	assert base10_to_baseB(11, 3) == [int(i) for i in '102']
	assert base10_to_baseB(1001, 10) == [int(i) for i in '1001']

	random.seed(123456)

	test_password = 'aababa'
	r = RainbowTable(vocab='abcd1234', word_length=6, chain_length=10, test_password=test_password)
	password_hashval = r.string_2_hashval(test_password)
	if r.is_test_word_generated == True:
	guessed_password = r.find_inverse(password_hashval)
	assert guessed_password == test_password
	print('Test password is %s. Test attack succeeded. ' % guessed_password)
	else:
	print('The reduction function did not map to test password. Attack failed. ')

	print()

	pass1 = '3baba2'
	h1 = r.string_2_hashval(pass1)
	h1_inv = r.find_inverse(h1)
	print('The password for %s is %s ' % (h1, h1_inv))
	if (pass1 == h1_inv):
	print('Attack succeeded.')
	else:
	print('Attack failed.')

	print()

	pass2 = 'bada33'
	h2 = r.string_2_hashval(pass2)
	h2_inv = r.find_inverse(h2)
	print('The password for %s is %s ' % (h2, h2_inv))
	if (pass2 == h2_inv):
	print('Attack succeeded.')
	else:
	print('Attack failed.')