malfaux/brutecounter.txt

## brutecounter.txt
#!/usr/bin/python

"""
for user1796738@stackoverflow.com
in reply to the stackoverflow thread: http://stackoverflow.com/questions/13210943/md5-brute-forcing
which was closed by some weirdos


=>> problem description <snip from the above url>, posted by user1796738:
...     In someone's puzzle game, I reached this puzzle. I tried searching for an answer or a way
...     to find the answer to it for atl east 2 hours... I got an MD5 Hash and a regular
...     expression and he told me to find the text behind it all.. I know I can bruteforce it but I
...     don't know any programs to brute force an MD5 and surely don't know how to use the Regular Expression
...     to crack it faster... So if anyone could tell me a way to bruteforce the MD5 hash using the
...     Regular Expression or tell me the answer, it will be helpful.
<<=


=>> solution:

this bruteforce md5 cracker is adapted from my own source code.
copyright: adrian ilarion ciobanu <cia@mud.ro> aka malfaux. all rights reserved.

license: I release this piece of crap to public domain, under the terms of WTFPL license.
for more information, see http://sam.zoy.org/wtfpl/COPYING

[kill'em all, machine]


the answer is: not|caod
source=not|caod, target=b89e49cab317f2681be60fb3d1c0f8f8, numtries=67145004

real    7m2.701s
user    7m2.340s
sys     0m0.014s


as you will notice, I am using no regular expressions whatsoever.
instead, i'm translating the regular expression into an alphabet which has 11 letters.
the regular expression limits the length of the word to 8 which saves my ass because I
will have max 214,358,881 iterations to find the crack.

so what i'm doing is i'm counting in base 11 over that alphabet, from 0 to 214,358,881
for each generated number i'm testing if it's representation in base 11 over that alphabet matches
the string we are looking for.


example:
alphabet=('0','1','2')
counting in base 3 using 2 digits for representation:

('N3','N10','BASE')
===================
('00', 0,        3)
('01', 1,        3)
('02', 2,        3)
('10', 3,        3)
('11', 4,        3)
('12', 5,        3)
('20', 6,        3)
('21', 7,        3)
('22', 8,        3)
"""

"""
the alphabet used to produce words
"""
alphabase = (
    'a', 'c', 'd', 'n', 'o', 'p', 'q', 'r', 's', 't', '|'
)

base = len(alphabase)

class _runes(tuple):
    """
    runes allows accesing tuple members by using alphabet
    letters as indexes
    this allows us to easily lookup letter indexes in the shuffled
    alphabet in O(1) instead of using tuple.index()
    """
    def __new__(cls,it):
        return tuple.__new__(cls,it)

    def __getitem__(self,index):
        return tuple.__getitem__(self,self.ordinal(index))
        #if isinstance(index,int):
        #    return tuple.__getitem__(self,index)
        #return tuple.__getitem__(self,_runes.ordinal(ord(index)))

    @staticmethod
    def ordinal(ch):
        if isinstance(ch,int): return ch
        try:
            ordcode = ord(ch)
        except TypeError:
            raise IndexError
        if ordcode < 58 and ordcode > 47: return ordcode-48
        if ordcode < 91 and ordcode > 64: return ordcode-55
        if ordcode < 123 and ordcode > 96: return ordcode-61
        raise IndexError


    def count(self,val):
        """ always returns 1 if rune found """
        try:
            #force True to 1
            return 0 + self.ordinal(val) < len(self)
        except IndexError:
            return 0
    def index(self,value,start=None,stop=None):
        #may throw
        try:
            return self.ordinal(value)
        except IndexError:
            raise ValueError
    def __contains__(self,val):
        try:
            return self.ordinal(val) < len(self)
        except IndexError:
            return False

import random
#al  = list(alphabase)
#random.shuffle(al)
#alpha = tuple(al)
alpha = alphabase
alpos = _runes([alpha.index(alphabase[i]) for i in xrange(0,len(alpha))])


def base10(digits,base):
    """
    transform from base len(alphabet) in base10
    alphabet=('C','A','T')
    N3 = TT
    N10= 8
    """
    num = 0
    power = len(digits)-1
    sz = len(digits)
    i = 0
    while i < sz:
        try:
            num += alpos[digits[i]]*base**power
        except ValueError:
            return None
        i+=1
        power-=1
    return num

def base_transform(num,base,digits):
    """
    write the counter-generated number into specified base
    with the specified number of digits
    """
    l = [ alpha[0] for i in xrange(0,digits)]
    pos = digits - 1
    while (pos >= 0):
        l[pos] = alpha[num%base]
        pos-=1
        num = num/base
    return l

class baseTcounter:
    """
    counter generator.
    given the start value counter, the base into which the numbers are
    computed, the number of digits
    and the max number of digits, generate up to len(alphabet)**digits numbers.

    when the maximum counter value is reached for a number of digits, the
    number of digits is incremented by 1, up to max_digits
    """
    def __init__(self,start,base,digits,max_digits):
        self.start = start
        self.base = base
        self.digits = digits
        self.stop = base**digits
        self.val = start
        self.max_digits = max_digits
    def __call__(self):
        while True:
            while self.val < self.stop:
                yield (self.val,self.base,self.digits)
                self.val+=1
            if self.digits == self.max_digits:
                raise StopIteration
            self.digits+=1
            self.start = 0
            self.val = self.start
            self.stop = self.base**self.digits

import sys
import hashlib

if __name__ == '__main__':

    #gencount = baseTcounter(startfrom,base,num_digits,max_digits)
    gencount = baseTcounter(0, base, 8, 8)
    counter = gencount()
    target = 'b89e49cab317f2681be60fb3d1c0f8f8'
    numtries = 0
    while True:
        numtries+=1
        (val,base,num_digits) = counter.next()
        digits = base_transform(val,base,num_digits)
        source = ''.join(digits)
        md5hex = hashlib.md5(source).hexdigest()
        if md5hex == target:
            print "source=%s, target=%s, numtries=%d" % (source, md5hex, numtries)
            sys.exit(0)
        if numtries % 10000 == 0:
            print "numtries = %d at %s" % (numtries, source)
    sys.exit(0)
	#!/usr/bin/python

	"""
	for user1796738@stackoverflow.com
	in reply to the stackoverflow thread: http://stackoverflow.com/questions/13210943/md5-brute-forcing
	which was closed by some weirdos


	=>> problem description <snip from the above url>, posted by user1796738:
	... In someone's puzzle game, I reached this puzzle. I tried searching for an answer or a way
	... to find the answer to it for atl east 2 hours... I got an MD5 Hash and a regular
	... expression and he told me to find the text behind it all.. I know I can bruteforce it but I
	... don't know any programs to brute force an MD5 and surely don't know how to use the Regular Expression
	... to crack it faster... So if anyone could tell me a way to bruteforce the MD5 hash using the
	... Regular Expression or tell me the answer, it will be helpful.
	<<=


	=>> solution:

	this bruteforce md5 cracker is adapted from my own source code.
	copyright: adrian ilarion ciobanu <cia@mud.ro> aka malfaux. all rights reserved.

	license: I release this piece of crap to public domain, under the terms of WTFPL license.
	for more information, see http://sam.zoy.org/wtfpl/COPYING

	[kill'em all, machine]


	the answer is: not\|caod
	source=not\|caod, target=b89e49cab317f2681be60fb3d1c0f8f8, numtries=67145004

	real 7m2.701s
	user 7m2.340s
	sys 0m0.014s


	as you will notice, I am using no regular expressions whatsoever.
	instead, i'm translating the regular expression into an alphabet which has 11 letters.
	the regular expression limits the length of the word to 8 which saves my ass because I
	will have max 214,358,881 iterations to find the crack.

	so what i'm doing is i'm counting in base 11 over that alphabet, from 0 to 214,358,881
	for each generated number i'm testing if it's representation in base 11 over that alphabet matches
	the string we are looking for.


	example:
	alphabet=('0','1','2')
	counting in base 3 using 2 digits for representation:

	('N3','N10','BASE')
	===================
	('00', 0, 3)
	('01', 1, 3)
	('02', 2, 3)
	('10', 3, 3)
	('11', 4, 3)
	('12', 5, 3)
	('20', 6, 3)
	('21', 7, 3)
	('22', 8, 3)
	"""

	"""
	the alphabet used to produce words
	"""
	alphabase = (
	'a', 'c', 'd', 'n', 'o', 'p', 'q', 'r', 's', 't', '\|'
	)

	base = len(alphabase)

	class _runes(tuple):
	"""
	runes allows accesing tuple members by using alphabet
	letters as indexes
	this allows us to easily lookup letter indexes in the shuffled
	alphabet in O(1) instead of using tuple.index()
	"""
	def __new__(cls,it):
	return tuple.__new__(cls,it)

	def __getitem__(self,index):
	return tuple.__getitem__(self,self.ordinal(index))
	#if isinstance(index,int):
	# return tuple.__getitem__(self,index)
	#return tuple.__getitem__(self,_runes.ordinal(ord(index)))

	@staticmethod
	def ordinal(ch):
	if isinstance(ch,int): return ch
	try:
	ordcode = ord(ch)
	except TypeError:
	raise IndexError
	if ordcode < 58 and ordcode > 47: return ordcode-48
	if ordcode < 91 and ordcode > 64: return ordcode-55
	if ordcode < 123 and ordcode > 96: return ordcode-61
	raise IndexError


	def count(self,val):
	""" always returns 1 if rune found """
	try:
	#force True to 1
	return 0 + self.ordinal(val) < len(self)
	except IndexError:
	return 0
	def index(self,value,start=None,stop=None):
	#may throw
	try:
	return self.ordinal(value)
	except IndexError:
	raise ValueError
	def __contains__(self,val):
	try:
	return self.ordinal(val) < len(self)
	except IndexError:
	return False

	import random
	#al = list(alphabase)
	#random.shuffle(al)
	#alpha = tuple(al)
	alpha = alphabase
	alpos = _runes([alpha.index(alphabase[i]) for i in xrange(0,len(alpha))])


	def base10(digits,base):
	"""
	transform from base len(alphabet) in base10
	alphabet=('C','A','T')
	N3 = TT
	N10= 8
	"""
	num = 0
	power = len(digits)-1
	sz = len(digits)
	i = 0
	while i < sz:
	try:
	num += alpos[digits[i]]base*power
	except ValueError:
	return None
	i+=1
	power-=1
	return num

	def base_transform(num,base,digits):
	"""
	write the counter-generated number into specified base
	with the specified number of digits
	"""
	l = [ alpha[0] for i in xrange(0,digits)]
	pos = digits - 1
	while (pos >= 0):
	l[pos] = alpha[num%base]
	pos-=1
	num = num/base
	return l

	class baseTcounter:
	"""
	counter generator.
	given the start value counter, the base into which the numbers are
	computed, the number of digits
	and the max number of digits, generate up to len(alphabet)**digits numbers.

	when the maximum counter value is reached for a number of digits, the
	number of digits is incremented by 1, up to max_digits
	"""
	def __init__(self,start,base,digits,max_digits):
	self.start = start
	self.base = base
	self.digits = digits
	self.stop = base**digits
	self.val = start
	self.max_digits = max_digits
	def __call__(self):
	while True:
	while self.val < self.stop:
	yield (self.val,self.base,self.digits)
	self.val+=1
	if self.digits == self.max_digits:
	raise StopIteration
	self.digits+=1
	self.start = 0
	self.val = self.start
	self.stop = self.base**self.digits

	import sys
	import hashlib

	if __name__ == '__main__':

	#gencount = baseTcounter(startfrom,base,num_digits,max_digits)
	gencount = baseTcounter(0, base, 8, 8)
	counter = gencount()
	target = 'b89e49cab317f2681be60fb3d1c0f8f8'
	numtries = 0
	while True:
	numtries+=1
	(val,base,num_digits) = counter.next()
	digits = base_transform(val,base,num_digits)
	source = ''.join(digits)
	md5hex = hashlib.md5(source).hexdigest()
	if md5hex == target:
	print "source=%s, target=%s, numtries=%d" % (source, md5hex, numtries)
	sys.exit(0)
	if numtries % 10000 == 0:
	print "numtries = %d at %s" % (numtries, source)
	sys.exit(0)