Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Evaluation of password security for sequences of random characters and words
import random, math
import re
def log_bin(value):
return math.log(value) / math.log(2)
def cumprod(values):
result = 1
for i in values:
result *= i
return result
def gen_password(charset, length):
result = ""
for i in range(length):
to_add = random.choice(charset)
if len(to_add) > 1: #If it's not a single char, capitalize it so the attacker cannot make assumption that it's all lowercase
to_add = to_add.capitalize()
result += to_add
return result
def format_size(size):
size *= 1.0 #Convert to floats
unit = ""
prefix_list = "KMGTPEZY" #Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta
prefix = -1
while size > 1024:
size /= 1024
prefix+= 1
if prefix >= len(prefix_list):
return "much"
elif prefix == 0:
return "%.2fB" % size
return "%.2f%siB" % (size, prefix_list[prefix])
def num_separators(number):
number = list( str( int(number) ) )
result = ""
separator = ','
for index, char in enumerate(reversed(number)):
result = char + result
if not (index +1) % 3 and index != len(number):
result = separator + result
return result
def acronym(text): #From big letters found in it
pattern = re.compile("[A-Z]") #Simply find big letters ^^
return ''.join( re.findall(pattern, text) )
dictfile = open("/usr/share/dict/words", "r")
pattern = re.compile("^.*[^'][^s]$") #Remove words ending with 's
#Generate list of words:
word_list = [word[:-1] for word in dictfile.readlines() if re.match(pattern, word[:-1])] # [:-1] removes the last character of the line - \n
#Count them and their average length
word_count = len(word_list)
average_word_length = sum([len(word) for word in word_list]) / (1.0 * len(word_list))
word_entropy_per_char = log_bin(word_count) / average_word_length
hash_digest_size = 512 / 8 #For SHA512, in bytes
#We're only using ASCII letters and numbers, no symbols except for underscore - a common set of allowed characters for passwords
char_list = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789"
char_count = len(char_list)
char_entropy_per_char = log_bin(char_count)
print "%i words in our dictionary" % word_count
print " Of average length %.2f" % average_word_length
print " With entropy content of %.2f bits per character\n" % word_entropy_per_char
print "We're using %i characters" % char_count
print " With entropy content of %.2f bits each" % char_entropy_per_char
print " Containing %.2f%% the entropy content of single character in a random word\n" % (char_entropy_per_char / word_entropy_per_char * 100)
results = [] #In format (combinations, text, disk usage of all hashes)
#Let's do the math for characters first, from 4 to 22
for i in range(4, 22):
combinations = char_count**i
#Disk usage of hashes for all possibilities
disk_usage = combinations * (hash_digest_size + i + 1) #+1 for a delimiter, presumably newline
results += [(combinations,
"%2i chars: %.2e permutations, %s disk space" % (i, combinations, format_size(disk_usage) ),
#Now for words, we assume that attacker has a complete dictionary available
for i in range(1,8):
combinations = word_count**i
disk_usage = combinations * (hash_digest_size + i*average_word_length + 1)
results += [(combinations,
"%i words(%2ich): %.2e permutations, %s disk space" % (i, math.ceil(i*average_word_length), combinations, format_size(disk_usage) ),
#Disclaimer: I have no idea how much this would take on what hardware, so please adjust yourself
hashing_speed = 9001.0 * 1000**2 #9001 million hashes per second
print "With %s hashes per second: " % num_separators(hashing_speed)
marks = [60, 60, 24, 30, 12, 100, 137000000] #Minutes, hours, days, months, years, centuries, ages of universe
marks_text = ["seconds", "minutes", "hours", "days", "months", "years", "centuries", "ages of universe"]
mark = 0
print "\nExample passwords:"
print " 12 random characters:"
for i in range(0,5):
print " %s %s" % (gen_password(char_list, 12), gen_password(char_list, 12))
print " 4 random words:"
for i in range(0,5):
word = gen_password(word_list, 4)
print " %s: %s (%ich)" % (acronym(word), word, len(word))
print " 5 random words:"
for i in range(0,5):
word = gen_password(word_list, 5)
print " %s: %s (%ich)" % (acronym(word), word, len(word))
print " 15 random characters:"
for i in range(0,5):
print " %s %s" % (gen_password(char_list, 15), gen_password(char_list, 15))
print "==Starting from seconds=="
for i in sorted(results): #Sorted by amount of permutations
#Change our unit when we go over it's limit
while i[0] / hashing_speed > cumprod( marks[:mark+1] ) and mark <= len(marks)-1: #More than one mark may be passed in one step
mark += 1
print "==Going into %s==" % marks_text[mark]
print "%s (%.2f %s to brute force)" % ( i[1], i[0] / hashing_speed / cumprod(marks[:mark]), marks_text[mark] )
print "Disk usage is amount of space to store all hashes"
print "Brute force assumes worst case, averages to half of the value"
print "\nNotes:"
print " With disk usage, I am assuming no compression is used on words."
print " Zipping my words file yielded decrease from 931 to 252 kilobytes"
print " Compressing would probably decrease size by ~2/3 of that,"
print " since hashes are pretty much incompressible"
print " Compressing random characters makes no significant difference"
print " I am not accounting for technological progress, but I assume"
print " that by time it becomes easy enough to crack these,"
print " most currently used algorithms will be irrelevant anyway"
print "\nVerdict:"
print " While random characters contain way more entropy per character,"
print " a set of words is much easier to remember"
num_increase = average_word_length * 4 * 10
print " Inserting randomly a single number into 4 words increases"
print " amount of permutations %i times (%i orders of magnitude)" % (num_increase, math.floor (math.log10(num_increase)) )
print " %.2f%% of how much adding a single char to a random sequence does" % (num_increase * 100.0 / char_count)
print "As such, I declare that using a sequence of 5 or even 4 random words"
print " is a superior alternative to random characters,"
print " since it is much easier for human brain to remember sequences"
print " that have a meaning, and acronyms help further"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment