aaronyoo/vigenere.py

## vigenere.py
import base64
from collections import Counter


# English language letter frequencies from:
# https://gist.githueb.com/pozhidaevak/0dca594d6f0de367f232909fe21cdb2f
letterFrequency = {'E' : 12.0, 'T' : 9.10, 'A' : 8.12, 'O' : 7.68,
                   'I' : 7.31, 'N' : 6.95, 'S' : 6.28, 'R' : 6.02,
                   'H' : 5.92, 'D' : 4.32, 'L' : 3.98, 'U' : 2.88,
                   'C' : 2.71, 'M' : 2.61, 'F' : 2.30, 'Y' : 2.11,
                   'W' : 2.09, 'G' : 2.03, 'P' : 1.82, 'B' : 1.49,
                   'V' : 1.11, 'K' : 0.69, 'X' : 0.17, 'Q' : 0.11,
                   'J' : 0.10, 'Z' : 0.07 , ' ' : 13.00}


def breakSingleByteRepeatingXOR(ct):
    """Breaks single byte repeating XOR by trying all possible bytes"""
    l = []

    for key in range(256):
        # get the plaintext string
        pt = bytearray()
        for byte in ct:
            pt.append(byte ^ key)

        # try to convert the plaintext byte object into string form
        # if there is a converstion error this isn't the right key so continue
        try:
            pt_str = str(pt, 'ascii').upper()
        except UnicodeDecodeError:
            continue

        # use Counter to create a frequency dictionary from the
        # uppercase version of the plaintext string
        pt_freq = Counter(pt_str)

        # score the string
        score = 0
        for k, v in pt_freq.items():
            if k in letterFrequency:
                score += letterFrequency[k]

        # append result to l
        l.append((key, pt, score))

    # sort based on highest score
    l.sort(key=lambda x: x[2], reverse=True)

    # return the tuple associated with the highest score
    if len(l) >= 1:
        return l[0]
    else:
        return None


def applyMultiByteRepeatingXOR(t, k):
    """Applies multibyte repeating XOR to the text using the key"""
    result = bytearray()
    for i in range(len(t)):
        result.append(t[i] ^ k[i % len(k)])
    return result


def getKeySize(ct):
    """Returns the most likely key size"""

    def hammingDistance(bs1, bs2):
        """Computes the Hamming distance between 2 byte-like objects"""
        result = 0
        for b1, b2 in zip(bs1, bs2):
            result += sum(b == '1' for b in bin(b1 ^ b2))
        return result

    l = []
    for key_size in range(2, 41):
        # divide into chunks of key_size
        chunks = [ct[i:i+key_size] for i in range(0, len(ct), key_size)]

        distances = []
        for c1, c2 in zip(chunks[0::2], chunks[1::2]):
            distances.append(hammingDistance(c1, c2) / key_size)
        avg_distance = sum(distances) / len(distances)
        l.append((key_size, avg_distance))

    # sort by the smallest hamming distance
    l.sort(key=lambda x: x[1])
    return l[0][0]


def getTransposedBlocks(key_size, ct):
    """Produces transposed blocks from key and ciphertext"""
    # divide into chuncks of key_size
    ct_blocks = [ct[i:i+key_size] for i in range(0, len(ct), key_size)]

    # transpose blocks
    ct_blocks_transposed = [bytearray() for i in range(key_size)]
    for i in range(key_size):
        for block in ct_blocks:
            if (i < len(block)):
                ct_blocks_transposed[i].append(block[i])

    return ct_blocks_transposed


def main():
    with open('6.txt', 'r') as infile:
        ciphertext = base64.b64decode(infile.read())

    key_size = getKeySize(ciphertext)
    key = bytearray()
    transposed_blocks = getTransposedBlocks(key_size, ciphertext)
    for block in transposed_blocks:
        # get the key from each single byte XOR
        key.append(breakSingleByteRepeatingXOR(block)[0])

    # decrypt the ciphertext using the key
    plaintext = applyMultiByteRepeatingXOR(ciphertext, key)
    print(str(key, 'ascii'))
    print(str(plaintext, 'ascii'))


if __name__ == '__main__':
    main()
	import base64
	from collections import Counter


	# English language letter frequencies from:
	# https://gist.githueb.com/pozhidaevak/0dca594d6f0de367f232909fe21cdb2f
	letterFrequency = {'E' : 12.0, 'T' : 9.10, 'A' : 8.12, 'O' : 7.68,
	'I' : 7.31, 'N' : 6.95, 'S' : 6.28, 'R' : 6.02,
	'H' : 5.92, 'D' : 4.32, 'L' : 3.98, 'U' : 2.88,
	'C' : 2.71, 'M' : 2.61, 'F' : 2.30, 'Y' : 2.11,
	'W' : 2.09, 'G' : 2.03, 'P' : 1.82, 'B' : 1.49,
	'V' : 1.11, 'K' : 0.69, 'X' : 0.17, 'Q' : 0.11,
	'J' : 0.10, 'Z' : 0.07 , ' ' : 13.00}


	def breakSingleByteRepeatingXOR(ct):
	"""Breaks single byte repeating XOR by trying all possible bytes"""
	l = []

	for key in range(256):
	# get the plaintext string
	pt = bytearray()
	for byte in ct:
	pt.append(byte ^ key)

	# try to convert the plaintext byte object into string form
	# if there is a converstion error this isn't the right key so continue
	try:
	pt_str = str(pt, 'ascii').upper()
	except UnicodeDecodeError:
	continue

	# use Counter to create a frequency dictionary from the
	# uppercase version of the plaintext string
	pt_freq = Counter(pt_str)

	# score the string
	score = 0
	for k, v in pt_freq.items():
	if k in letterFrequency:
	score += letterFrequency[k]

	# append result to l
	l.append((key, pt, score))

	# sort based on highest score
	l.sort(key=lambda x: x[2], reverse=True)

	# return the tuple associated with the highest score
	if len(l) >= 1:
	return l[0]
	else:
	return None


	def applyMultiByteRepeatingXOR(t, k):
	"""Applies multibyte repeating XOR to the text using the key"""
	result = bytearray()
	for i in range(len(t)):
	result.append(t[i] ^ k[i % len(k)])
	return result


	def getKeySize(ct):
	"""Returns the most likely key size"""

	def hammingDistance(bs1, bs2):
	"""Computes the Hamming distance between 2 byte-like objects"""
	result = 0
	for b1, b2 in zip(bs1, bs2):
	result += sum(b == '1' for b in bin(b1 ^ b2))
	return result

	l = []
	for key_size in range(2, 41):
	# divide into chunks of key_size
	chunks = [ct[i:i+key_size] for i in range(0, len(ct), key_size)]

	distances = []
	for c1, c2 in zip(chunks[0::2], chunks[1::2]):
	distances.append(hammingDistance(c1, c2) / key_size)
	avg_distance = sum(distances) / len(distances)
	l.append((key_size, avg_distance))

	# sort by the smallest hamming distance
	l.sort(key=lambda x: x[1])
	return l[0][0]


	def getTransposedBlocks(key_size, ct):
	"""Produces transposed blocks from key and ciphertext"""
	# divide into chuncks of key_size
	ct_blocks = [ct[i:i+key_size] for i in range(0, len(ct), key_size)]

	# transpose blocks
	ct_blocks_transposed = [bytearray() for i in range(key_size)]
	for i in range(key_size):
	for block in ct_blocks:
	if (i < len(block)):
	ct_blocks_transposed[i].append(block[i])

	return ct_blocks_transposed


	def main():
	with open('6.txt', 'r') as infile:
	ciphertext = base64.b64decode(infile.read())

	key_size = getKeySize(ciphertext)
	key = bytearray()
	transposed_blocks = getTransposedBlocks(key_size, ciphertext)
	for block in transposed_blocks:
	# get the key from each single byte XOR
	key.append(breakSingleByteRepeatingXOR(block)[0])

	# decrypt the ciphertext using the key
	plaintext = applyMultiByteRepeatingXOR(ciphertext, key)
	print(str(key, 'ascii'))
	print(str(plaintext, 'ascii'))


	if __name__ == '__main__':
	main()