tylerkerr/cracksinglebytexor.py

## cracksinglebytexor.py
#!/usr/bin/env python3

import sys, re
from operator import itemgetter

"""

takes a hex string that is potentially 'encrypted' via single-byte xor
(each hex byte has been xor'd with the same byte),
creates an output for each potential key (256 total),
then scores each output by its likelihood of being english
via single letter frequency analysis.

ex.:
./cracksinglebytexor.py 1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736
result 88 with score 120.088 : "Cooking MC's like a pound of bacon"

"""

# init

hexin = sys.argv[1]                  # hex input
asc=[]                               # ascii conversion of hex input
trials = [[] for i in range(256)]    # list of lists of all trial keys
ratings = []                         # list of tuples of each byte and its rating
threshold = 1                        # only output results scoring above this. 1 seems good
for i in (range(256)):
    ratings.append((i, 0))           # prepopulate tuples with each key value

# lowercase letters get a score of their percentage according to wikipedia's english language frequency
# uppercase get half of that
# space gets 1
# all other chars get -20

scoredict = {'e': 12.702,\
            't': 9.056,\
            'a': 8.167,\
            'o': 7.507,\
            'i': 6.966,\
            'n': 6.749,\
            's': 6.327,\
            'h': 6.094,\
            'r': 5.987,\
            'd': 4.253,\
            'l': 4.025,\
            'c': 2.782,\
            'u': 2.758,\
            'm': 2.406,\
            'w': 2.361,\
            'f': 2.228,\
            'g': 2.015,\
            'y': 1.974,\
            'p': 1.929,\
            'b': 1.492,\
            'v': 0.978,\
            'k': 0.772,\
            'j': 0.153,\
            'x': 0.150,\
            'q': 0.095,\
            'z': 0.074,\
            ' ': 1,\
            'E': 6.351,\
            'T': 4.528,\
            'A': 4.084,\
            'O': 7.507,\
            'I': 3.754,\
            'N': 3.375,\
            'S': 3.164,\
            'H': 3.047,\
            'R': 2.994,\
            'D': 2.127,\
            'L': 2.013,\
            'C': 1.391,\
            'U': 1.379,\
            'M': 1.203,\
            'W': 1.181,\
            'F': 1.114,\
            'G': 1.008,\
            'Y': 0.987,\
            'P': 0.965,\
            'B': 0.746,\
            'V': 0.489,\
            'K': 0.386,\
            'J': 0.077,\
            'X': 0.075,\
            'Q': 0.048,\
            'Z': 0.037,\
            }

# pad input if length is odd
if len(hexin) % 2 == 1:
    hexin = "0" + hexin

# create list of ascii chars from hex input
for pair in range(int(len(hexin)/2)):
    asc.append(chr(int(hexin[pair*2:pair*2+2], 16)))

# create list of lists of trial xors
for t in range(256):
    for c in asc:
        trials[t].append(chr(ord(c) ^ t))

# score each char according to scoredict, and write each trial's score to ratings array
for i in range(len(trials)):
    rating = 0.0
    for c in trials[i]:
        if c in scoredict:
            rating += scoredict[c]
        else:
            rating -= 20
    ratings[i] = (i, rating)

# sort keys by score
sortedratings = sorted(ratings, key=itemgetter(1), reverse=True)

# print results
for res in sortedratings:
    if res[1] >= threshold:
        print("result %s with score %s : \"%s\"" % (res[0], res[1], "".join(trials[res[0]])))
	#!/usr/bin/env python3

	import sys, re
	from operator import itemgetter

	"""

	takes a hex string that is potentially 'encrypted' via single-byte xor
	(each hex byte has been xor'd with the same byte),
	creates an output for each potential key (256 total),
	then scores each output by its likelihood of being english
	via single letter frequency analysis.

	ex.:
	./cracksinglebytexor.py 1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736
	result 88 with score 120.088 : "Cooking MC's like a pound of bacon"

	"""

	# init

	hexin = sys.argv[1] # hex input
	asc=[] # ascii conversion of hex input
	trials = [[] for i in range(256)] # list of lists of all trial keys
	ratings = [] # list of tuples of each byte and its rating
	threshold = 1 # only output results scoring above this. 1 seems good
	for i in (range(256)):
	ratings.append((i, 0)) # prepopulate tuples with each key value

	# lowercase letters get a score of their percentage according to wikipedia's english language frequency
	# uppercase get half of that
	# space gets 1
	# all other chars get -20

	scoredict = {'e': 12.702,\
	't': 9.056,\
	'a': 8.167,\
	'o': 7.507,\
	'i': 6.966,\
	'n': 6.749,\
	's': 6.327,\
	'h': 6.094,\
	'r': 5.987,\
	'd': 4.253,\
	'l': 4.025,\
	'c': 2.782,\
	'u': 2.758,\
	'm': 2.406,\
	'w': 2.361,\
	'f': 2.228,\
	'g': 2.015,\
	'y': 1.974,\
	'p': 1.929,\
	'b': 1.492,\
	'v': 0.978,\
	'k': 0.772,\
	'j': 0.153,\
	'x': 0.150,\
	'q': 0.095,\
	'z': 0.074,\
	' ': 1,\
	'E': 6.351,\
	'T': 4.528,\
	'A': 4.084,\
	'O': 7.507,\
	'I': 3.754,\
	'N': 3.375,\
	'S': 3.164,\
	'H': 3.047,\
	'R': 2.994,\
	'D': 2.127,\
	'L': 2.013,\
	'C': 1.391,\
	'U': 1.379,\
	'M': 1.203,\
	'W': 1.181,\
	'F': 1.114,\
	'G': 1.008,\
	'Y': 0.987,\
	'P': 0.965,\
	'B': 0.746,\
	'V': 0.489,\
	'K': 0.386,\
	'J': 0.077,\
	'X': 0.075,\
	'Q': 0.048,\
	'Z': 0.037,\
	}

	# pad input if length is odd
	if len(hexin) % 2 == 1:
	hexin = "0" + hexin

	# create list of ascii chars from hex input
	for pair in range(int(len(hexin)/2)):
	asc.append(chr(int(hexin[pair2:pair2+2], 16)))

	# create list of lists of trial xors
	for t in range(256):
	for c in asc:
	trials[t].append(chr(ord(c) ^ t))

	# score each char according to scoredict, and write each trial's score to ratings array
	for i in range(len(trials)):
	rating = 0.0
	for c in trials[i]:
	if c in scoredict:
	rating += scoredict[c]
	else:
	rating -= 20
	ratings[i] = (i, rating)

	# sort keys by score
	sortedratings = sorted(ratings, key=itemgetter(1), reverse=True)

	# print results
	for res in sortedratings:
	if res[1] >= threshold:
	print("result %s with score %s : \"%s\"" % (res[0], res[1], "".join(trials[res[0]])))