Skip to content

Instantly share code, notes, and snippets.

@tylerkerr
Last active December 18, 2015 19:49
Show Gist options
  • Save tylerkerr/298b7e44ebbd36bd80e7 to your computer and use it in GitHub Desktop.
Save tylerkerr/298b7e44ebbd36bd80e7 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import sys, re
from operator import itemgetter
"""
takes a hex string that is potentially 'encrypted' via single-byte xor
(each hex byte has been xor'd with the same byte),
creates an output for each potential key (256 total),
then scores each output by its likelihood of being english
via single letter frequency analysis.
ex.:
./cracksinglebytexor.py 1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736
result 88 with score 120.088 : "Cooking MC's like a pound of bacon"
"""
# init
hexin = sys.argv[1] # hex input
asc=[] # ascii conversion of hex input
trials = [[] for i in range(256)] # list of lists of all trial keys
ratings = [] # list of tuples of each byte and its rating
threshold = 1 # only output results scoring above this. 1 seems good
for i in (range(256)):
ratings.append((i, 0)) # prepopulate tuples with each key value
# lowercase letters get a score of their percentage according to wikipedia's english language frequency
# uppercase get half of that
# space gets 1
# all other chars get -20
scoredict = {'e': 12.702,\
't': 9.056,\
'a': 8.167,\
'o': 7.507,\
'i': 6.966,\
'n': 6.749,\
's': 6.327,\
'h': 6.094,\
'r': 5.987,\
'd': 4.253,\
'l': 4.025,\
'c': 2.782,\
'u': 2.758,\
'm': 2.406,\
'w': 2.361,\
'f': 2.228,\
'g': 2.015,\
'y': 1.974,\
'p': 1.929,\
'b': 1.492,\
'v': 0.978,\
'k': 0.772,\
'j': 0.153,\
'x': 0.150,\
'q': 0.095,\
'z': 0.074,\
' ': 1,\
'E': 6.351,\
'T': 4.528,\
'A': 4.084,\
'O': 7.507,\
'I': 3.754,\
'N': 3.375,\
'S': 3.164,\
'H': 3.047,\
'R': 2.994,\
'D': 2.127,\
'L': 2.013,\
'C': 1.391,\
'U': 1.379,\
'M': 1.203,\
'W': 1.181,\
'F': 1.114,\
'G': 1.008,\
'Y': 0.987,\
'P': 0.965,\
'B': 0.746,\
'V': 0.489,\
'K': 0.386,\
'J': 0.077,\
'X': 0.075,\
'Q': 0.048,\
'Z': 0.037,\
}
# pad input if length is odd
if len(hexin) % 2 == 1:
hexin = "0" + hexin
# create list of ascii chars from hex input
for pair in range(int(len(hexin)/2)):
asc.append(chr(int(hexin[pair*2:pair*2+2], 16)))
# create list of lists of trial xors
for t in range(256):
for c in asc:
trials[t].append(chr(ord(c) ^ t))
# score each char according to scoredict, and write each trial's score to ratings array
for i in range(len(trials)):
rating = 0.0
for c in trials[i]:
if c in scoredict:
rating += scoredict[c]
else:
rating -= 20
ratings[i] = (i, rating)
# sort keys by score
sortedratings = sorted(ratings, key=itemgetter(1), reverse=True)
# print results
for res in sortedratings:
if res[1] >= threshold:
print("result %s with score %s : \"%s\"" % (res[0], res[1], "".join(trials[res[0]])))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment