Last active
December 18, 2015 19:49
-
-
Save tylerkerr/298b7e44ebbd36bd80e7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys, re | |
from operator import itemgetter | |
""" | |
takes a hex string that is potentially 'encrypted' via single-byte xor | |
(each hex byte has been xor'd with the same byte), | |
creates an output for each potential key (256 total), | |
then scores each output by its likelihood of being english | |
via single letter frequency analysis. | |
ex.: | |
./cracksinglebytexor.py 1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736 | |
result 88 with score 120.088 : "Cooking MC's like a pound of bacon" | |
""" | |
# init | |
hexin = sys.argv[1] # hex input | |
asc=[] # ascii conversion of hex input | |
trials = [[] for i in range(256)] # list of lists of all trial keys | |
ratings = [] # list of tuples of each byte and its rating | |
threshold = 1 # only output results scoring above this. 1 seems good | |
for i in (range(256)): | |
ratings.append((i, 0)) # prepopulate tuples with each key value | |
# lowercase letters get a score of their percentage according to wikipedia's english language frequency | |
# uppercase get half of that | |
# space gets 1 | |
# all other chars get -20 | |
scoredict = {'e': 12.702,\ | |
't': 9.056,\ | |
'a': 8.167,\ | |
'o': 7.507,\ | |
'i': 6.966,\ | |
'n': 6.749,\ | |
's': 6.327,\ | |
'h': 6.094,\ | |
'r': 5.987,\ | |
'd': 4.253,\ | |
'l': 4.025,\ | |
'c': 2.782,\ | |
'u': 2.758,\ | |
'm': 2.406,\ | |
'w': 2.361,\ | |
'f': 2.228,\ | |
'g': 2.015,\ | |
'y': 1.974,\ | |
'p': 1.929,\ | |
'b': 1.492,\ | |
'v': 0.978,\ | |
'k': 0.772,\ | |
'j': 0.153,\ | |
'x': 0.150,\ | |
'q': 0.095,\ | |
'z': 0.074,\ | |
' ': 1,\ | |
'E': 6.351,\ | |
'T': 4.528,\ | |
'A': 4.084,\ | |
'O': 7.507,\ | |
'I': 3.754,\ | |
'N': 3.375,\ | |
'S': 3.164,\ | |
'H': 3.047,\ | |
'R': 2.994,\ | |
'D': 2.127,\ | |
'L': 2.013,\ | |
'C': 1.391,\ | |
'U': 1.379,\ | |
'M': 1.203,\ | |
'W': 1.181,\ | |
'F': 1.114,\ | |
'G': 1.008,\ | |
'Y': 0.987,\ | |
'P': 0.965,\ | |
'B': 0.746,\ | |
'V': 0.489,\ | |
'K': 0.386,\ | |
'J': 0.077,\ | |
'X': 0.075,\ | |
'Q': 0.048,\ | |
'Z': 0.037,\ | |
} | |
# pad input if length is odd | |
if len(hexin) % 2 == 1: | |
hexin = "0" + hexin | |
# create list of ascii chars from hex input | |
for pair in range(int(len(hexin)/2)): | |
asc.append(chr(int(hexin[pair*2:pair*2+2], 16))) | |
# create list of lists of trial xors | |
for t in range(256): | |
for c in asc: | |
trials[t].append(chr(ord(c) ^ t)) | |
# score each char according to scoredict, and write each trial's score to ratings array | |
for i in range(len(trials)): | |
rating = 0.0 | |
for c in trials[i]: | |
if c in scoredict: | |
rating += scoredict[c] | |
else: | |
rating -= 20 | |
ratings[i] = (i, rating) | |
# sort keys by score | |
sortedratings = sorted(ratings, key=itemgetter(1), reverse=True) | |
# print results | |
for res in sortedratings: | |
if res[1] >= threshold: | |
print("result %s with score %s : \"%s\"" % (res[0], res[1], "".join(trials[res[0]]))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment