Created
October 22, 2012 17:29
-
-
Save pmiron/3932796 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Kasiski-Babbage cryptanalysis to get the text encripted with Vigenere | |
import fractions | |
import functools | |
import re #Import regular expresions | |
from collections import Counter | |
#---------------------------------------------------------------------------------------------- | |
#Search for the words with the given frecuency and length | |
# @originalString [string]: the original text | |
# @long [int]: the size of the word to search | |
# @ocurences[int]: ocurences of the string to search | |
def searchForOftenLongRepetitions(originalString,long,ocurences): | |
lenOriginalString = len(originalString) | |
dictStringPos = {} | |
distList = [] | |
for n in range(1,int(lenOriginalString/ocurences)+1): | |
substrings=[originalString[i:i+n] for i in range(lenOriginalString-n+1)] | |
freqs=Counter(substrings) | |
if freqs.most_common(1)[0][1]<ocurences: | |
n-=1 | |
break | |
else: | |
seq=freqs.most_common(1)[0][0] | |
#If the sequence is at least big as long parameter value is | |
#We store the word and the position | |
if(n>=long): | |
posOcurenceList = [m.start() for m in re.finditer(seq, originalString)] | |
dictStringPos[seq] = posOcurenceList | |
#First we search for the the word with the max length | |
max = 0 | |
keyToConsult = '' | |
for a in dictStringPos.keys(): | |
if len(a) > max: | |
max = len(a) | |
keyToConsult = a | |
#Search for the distances between founded words | |
values = dictStringPos[keyToConsult] | |
for i in range(len(values)-1,-1,-1): | |
for j in range(i-1,-1,-1): | |
valueToAdd = values[i]-(values[j]+len(keyToConsult)) | |
distList.append(valueToAdd) | |
return distList | |
#---------------------------------------------------------------------------------------------- | |
#---------------------------------------------------------------------------------------------- | |
#Get the GCD of the list of keys | |
# @a [int]: the first number to search search GCD | |
# @b [int] the second number to search search GCD | |
def GCD(a,b): | |
return abs(a) if b==0 else GCD(b, a%b) | |
#---------------------------------------------------------------------------------------------- | |
#---------------------------------------------------------------------------------------------- | |
#Get the GCD of the elements of a list | |
# @list [int list]: list with numbers to search GCD | |
def gcdList(list): | |
return functools.reduce(GCD, list) | |
#---------------------------------------------------------------------------------------------- | |
#---------------------------------------------------------------------------------------------- | |
#Break the chiper in #of gcd | |
# @cTextToDivide[string]: given text to be divided in blocks | |
# @numBlocks[int]: number of blocks the text will be divided in | |
def divideInBlocks(cTextToDivide,numBlocks): | |
listOfBlocks = [] | |
for newBlock in range(numBlocks): | |
listOfBlocks.insert(newBlock,'') | |
for charToSort in range(len(cTextToDivide)): | |
listOfBlocks[charToSort%numBlocks] += cTextToDivide[charToSort] | |
return listOfBlocks | |
#---------------------------------------------------------------------------------------------- | |
#---------------------------------------------------------------------------------------------- | |
#Calculate frecuencies | |
# @listOfBlocks [string list]: list with divided text in # key leng size | |
# @numBlocks [int]: number of blocks the text was divided in | |
def getBlocksFrecuencies(listOfBlocks, numBlocks): | |
frecuenciesTotals = [] | |
frecuenciesPerBlock = [] | |
alphbet ='ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
for block in range(numBlocks): | |
frecuenciesPerBlock = [] | |
for charABC in alphbet: | |
frecuenciesPerBlock.append((charABC,listOfBlocks[block].count(charABC))) | |
frecuenciesTotals.append(frecuenciesPerBlock) | |
return frecuenciesTotals | |
#---------------------------------------------------------------------------------------------- | |
#---------------------------------------------------------------------------------------------- | |
# Get the element with the biggest ocurence | |
# @lst[object]: listwith the objects to count the most repetitive | |
def most_common(lst): | |
return max(set(lst), key=lst.count) | |
#---------------------------------------------------------------------------------------------- | |
#---------------------------------------------------------------------------------------------- | |
#Calculate Caesar per blocks, returns the full key | |
# @frecuenciesTotals[int list[list]]: list with the frecuencies of every char in every block | |
# @numBlocks[int]: key length | |
def getCaesarPerBlock(frecuenciesTotals,numBlocks): | |
alphbet='ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
listSorted = [] | |
keyZ ='' | |
#First we search for the greates frecuencies | |
for a in frecuenciesTotals: | |
a.sort(key=lambda k:(k[1]),reverse=True) | |
for a in frecuenciesTotals: | |
#Miramos solo los 3 mas frecuentes | |
posAlfabetE = alphbet.find('E') | |
posAlfabetT = alphbet.find('T') | |
posAlfabetA = alphbet.find('A') | |
list1= [posAlfabetA,posAlfabetE,posAlfabetT] | |
mostFrec1 = alphbet.find(a[0][0]) | |
mostFrec2 = alphbet.find(a[1][0]) | |
mostFrec3 = alphbet.find(a[2][0]) | |
list2 = [mostFrec1,mostFrec2,mostFrec3] | |
listFrec = [] | |
listFrecFinal = [[],[],[]] | |
for a in range(len(list1)): | |
for b in range(len(list2)): | |
listFrec.append((list2[b]-list1[a])%26) | |
lett = most_common(listFrec) | |
keyZ += alphbet[lett] | |
return (keyZ) | |
#---------------------------------------------------------------------------------------------- | |
#---------------------------------------------------------------------------------------------- | |
#Decript the cripted text | |
# @chiperText[string]: encripted text | |
# @keyWord[string]: key to get the original text | |
# @blockSize[int]: key size | |
def decript(chiperText, keyWord,blockSize): | |
textOriginal = '' | |
alphbet ='ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
for a in range(len(chiperText)): | |
textOriginal+= alphbet[(alphbet.find(chiperText[a])-alphbet.find(keyWord[a%blockSize])) %26] | |
print (textOriginal) | |
#---------------------------------------------------------------------------------------------- | |
#---------------------------------------------------------------------------------------------- | |
def kasiskiCryptanalysis(ciphertext,wordSize,ocurency): | |
distList = searchForOftenLongRepetitions(ciphertext,wordSize,ocurency) | |
keySize = gcdList(distList) | |
dividedBlocks = divideInBlocks(ciphertext,keySize) | |
frecsBlocks = getBlocksFrecuencies(dividedBlocks,keySize) | |
keyWord = getCaesarPerBlock(frecsBlocks,keySize) | |
decript(ciphertext,keyWord,keySize) | |
#---------------------------------------------------------------------------------------------- | |
#---------------------------------------------------------------------------------------------- | |
def main(): | |
ciphertext = "QZYYHFPIYXFGNIIHLJNAXAJSWKGBQPWWECHVEIWDNOMOWFNTXVHVTTRBRUYSEGAEEESOARQZYXSBNVNIXWVMLTRYHRNRTXJDFMEEIXGBOICRREAMEOQNTGXHELFTGAIFRNTRWWEEYEFTROMXRNXPHIWEVGHPIITUXEFXMOELSQXMEFMYIRJTJHVXESDEXPLXJDZTPHEWEGASDIYWBVSFRYRVXWSEILBGKMIJNFNWAIHTRWSQGWENMMYKYHRVSOIYHNMACIHKRWGPRYRVYYRIXAGBVLRXNNMEYDZRNGMFQJNEBGSQJNGYENMQIGRFFXSEIXVMIKOEXLLHFGBOICRREAMGZQJSBVPZWJTBVPLMRIAZVPWUOALMMMQIGRJZVFCLUICEYTNVOELJOEBKTRXOSMLPQTSGLSALNSGBGLXJDPRFPVFTGTGVWJVRKYYHJRGTOPRRALGSHFJCYXECFZTRQENXQYJAICIXUPAEEXFCXLJTXNNGAIFRNVRKWPSKWNKEYHKOEXMRRUOYBGJESDJAEEXMEVGXPVSAGBSYEQCBFQFRNTLPSFPICBGWTHJRNIVZTJREXWASSSRMSELJMVLWEMQLGAIDYGJRVXZJIEOTXPEUAEMMNYQAEECTQUOEMEYXVURLXTSSIFPLLXXOEMSQGDBRKEEXFCXBWELJEDNMGEQEAMSQEYRNWMEMTNNEECQJDNMXLGPESYSCXXTBTRDAJRGAEEUZEFMMZRMAIXGFPRIATXPHNNGAIXESUNESYMSTRKRLXNOATPWEBACIPTGFBYXXZGDBRKALVKAEXEWWTKAHAYEXTUXXLPQIAGQLRZAYPLTGMWVEPMIUUOEMDLJDYTXPVYHVLCPEWTUXXLPQIAGQLRZAYBWLRTNOBROMSGLXXLYYHBKMEEYIIXVPWYAGXQPRYOSMLPPFWBYECQJDPHRQPNCGTWTXWEYTXPWYOPRFPVBAEBXZJKEELEEXFCXXVDHJFRGHPVXAAWPPKFLRQTPVYSTNMOESCRHRSSBCLUICEYTNVODGFNOXGWEXSVYMPHFSNVXTSSSPHZPVJDHGHPVYHREEHWZCUTWLVREQTXEEHKFMLPXJRZTVXIIAGMENOMAFTTCIHIFXQPESIAZMYMSTRKRLXNOATPWEBNBMEWPHYOXVLXYAPDWCMXEGHXSIQEIXPZJFNNKQPHFTGTGVWFYFUVPXRIPAEPPFPEHJPWXOEHJNSRPHMICWHIRGGPESDREINXWIPTPPRLIAXICMSGNMXSIZSATZLPUOFMKCEIUNMIDGMOBEASSMAFUIPRXEEOMYKFSNMINLSIPTPPBUEEMXZXMETKSFTIRNYXTRLTUXXLPQIAGQLRZAYWIDTNTRMLTWURBZVPWXTUXMYXJRATXTSSAYVSXQZNVMCTWOUFMEEXMEOXKTRSIAZSQAMAGVSFPIBRTPZRLPEHGPWXSNRWNLFRYXWMEWRLTWPRNOEKIDIFRPAJPPQOJTXELJNNMMZRFLQXJPRXEHGMGIWSVMCDMSSGBXFXJFBKRLXNOATPDXWAGXKTGXTHWMPWNNJTWSMSGGHROGMECKIOMHTFMLLXNTJBPWXFKRTRZXMEEMSJIFRFMSRIYTETGEMTNBGGJFJRENPPWBHNMMDGJRGTMYWFYBUWPVAEELMDXMAGZSTRLFBKALVICBGZPRYIBGEWAFRSTVPANLYTPXSXTNEALCXBRVSXTQEZXREIIBLVCMIWWNKJLVJAVFIOEYKAHGVMSGBNXLRTPCHRPRYSPHQXYSIPTXTSSSNGHTRYEYEMRISCRZEELJRVGKNEUAOBPTXNEFTGEYFLYRXSEYSNEVPEIYOXMYKIOAXWLCXMVVLLIQCLUICEYTNVODGFNNBHTRRIYBXLVDCNFTLMLNFUYEGFNGAIELWENMSQERIYBXLVDRRLTZRXEFXVGIFSNVCMIWDRMICVJNGMLLXXDBPRCMLHGLMWPDBRVEFWJIGLHTJKIPNPEFTRQXVTRLOABQASXSVUPPXTIQXREMKYNVCMIWAGMENOJROXCZRIAFAEOSBOSTHZYGTFTCDPFRERGZRXTNGXTRJACKSQIXSBKMYXMEZTXSIRAGBGDESDRGKTRJEEBRRHJPNKXXISTNMXSIZNVOICWNTLHJXEIEVKETRUOEMYREQHBPIGIWIQXREMKIPTXTSSBRRSYHFSUTHZATFNWSFFYMVZLERTTEXEWPDBRGIPHJDGHIDGFLNMILGDBRKEEXFCXBRESFNNKQPHHOAYPTGYIACYYIFTPRGZRFNNMSDTTNFHVPHHYOXVNSSFYBGEGTNSXVPRHEVGXLPQIAGIDXTNVTYDENRSHVNIQTPHPQSWRRLXSEWEGHPOEYTRGHPIXTUTXLXYRVUYEMTNVLEASQIGBGLPSOGTPPKFLPHRNIUTGAIELWERLXLRIAEWWZJURBHJFWJDVGGCMRIATPWEBBRRSYHFRRTWZRFBYXHZYGTPEILVFNQVSXTJLYBRRESDCKIASSDRKEYGJOSMLPIAIQXRNIIOAMEATQYGHQTPNTNKCLRIIAMIWPNGRGGPSUEETXTSSSZBGSEJLNWHDXMAGMLPHNFSBGFPYYBYVPPNAOECEVFCVGKLRFTGTGVXTIGLWZYWCRWSPWSOGIVPGQUQXXSIZSRHJZXMEELSFVHEFMSHIFVRMSRIYHRKASEYHRVEWPXAPEILVROFTMNSKRRLTZRXIOBPTXDSUHATRLWUHJFRIEQMLPEHTVOMECTRCKSGMIEQMLPEHTBKWHMYHTNMOESCRFEJFJEAHYRLFNQMLPVJIFTPCIFDLTHPXJREXREMSTUXJZVROSMLPPFWBYECQJDPHRQPNCGLEJWRIPAEPPNTUHPOWRIYBXLVDCBFQLRIEELSCXMEVKGTZNLVTRDYUEEBSCWBHBHVOIWAGMENOXTUTXLQTUAMXZEBAEVVTQJAFVVTQNNNEPJVJSCHRDMGLRBRELJMRTREMRETHZPVSMRGXDGFNGKCESYAXXLPEWTVGXSIGEYBIQXMAGMLPVJAEXJPASAGBSYWHACTFWITFSBIWHNNTTGJFJRJXEASSWVMLELJSBILTWYIPTXTSSOSLXFBSEGUYENJFSKIJZTAFTGZQUUGXVDGNEAMMDXNNGAINSRPHMICWJCHKMECIIIBWTSSAGMLPYXNNMMZRFLVGWEMYUGXSQWYAAWECHXAAWXPGMNBESRCNNTTMELJRFUYCKRDAHXPWYHNMMQESAGMENOIORLREVJQHBVPWYENEXSXMEPHHPHTEFGXSEAEGHFPRJAEECLWFRGYYWESDGAICIFRRMIYWTFGASFWFNQLSQTJOCEIHLTCBNPOTZLYHJQEQEFLWZTMIFMMNEYEQLXCMPEFTCDGTNFMEYXNNRPLZHJSVZRPHMIFHAYWYUKGIEPNKRFEWAFRRBRLWYHRUEDMXFBKEYSAEYBRZXMEEPSCHXPBPICJZLPRFPVFTGTGVWFRRPMELNNGAICESGRHJXESYFMEEIXSBESYKFSGAIJHTNGVECINFGAIJKJTPTYRLY" | |
wordSize = 4 | |
ocurency = 4 | |
kasiskiCryptanalysis(ciphertext,wordSize,ocurency) | |
#---------------------------------------------------------------------------------------------- | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment