Skip to content

Instantly share code, notes, and snippets.

@GiovanniMounir
Last active June 4, 2021 14:01
Show Gist options
  • Save GiovanniMounir/393defa7cb10f8134a6aec9709e5c223 to your computer and use it in GitHub Desktop.
Save GiovanniMounir/393defa7cb10f8134a6aec9709e5c223 to your computer and use it in GitHub Desktop.
Crack Vigenere using index of coincidence and Chi-Square (X2) in PHP
<?php
/* Methods based on "Introduction to Modern Cryptography" by Jonathan Katz/Yehuda Lindell and as explained by Dr. Ching-Kuang Shene: https://pages.mtu.edu/~shene/NSF-4/Tutorial/VIG/Vig-IOC.html */
$frequency =
[
"A" => 8.2,
"B" => 1.5,
"C" => 2.8,
"D" => 4.3,
"E" => 12.7,
"F" => 2.2,
"G" => 2.0,
"H" => 6.1,
"I" => 7.0,
"J" => 0.2,
"K" => 0.8,
"L" => 4.0,
"M" => 2.4,
"N" => 6.7,
"O" => 7.5,
"P" => 1.9,
"Q" => 0.1,
"R" => 6.0,
"S" => 6.3,
"T" => 9.1,
"U" => 2.8,
"V" => 1.0,
"W" => 2.4,
"X" => 0.2,
"Y" => 2.0,
"Z" => 0.1,
]; /*Source: Introduction to Modern Cryptography, Figure 1.3: The frequency distribution of individual letters in English-language text.*/
function shift_bruteforce($input, $frequency = null)
{
$freq_length = 26;
if (is_array($frequency)) $freq_length = count($frequency);
$output = array();
for ($i = 0; $i < $freq_length; $i++)
{
$plaintext = "";
foreach (str_split($input) as $character)
{
$plaintext .= chr((((ord($character)-65)+$i) % $freq_length) + 65);
}
$output[$plaintext] = $freq_length-$i;
}
return $output;
}
function ic($text, $frequency = null)
{
$freq_length = 26;
if (is_array($frequency)) $freq_length = count($frequency);
$numerator = 0;
for ($i= 0; $i < $freq_length; $i++)
{
$ni = substr_count($text, chr($i+65));
$numerator += $ni * ($ni-1);
}
return $numerator / (strlen($text)*(strlen($text)-1));
}
function split_cosets($text, $length, $callback)
{
for ($j = 0; $j < $length; $j++)
{
$reconstructed_text = "";
for ($k = $j; $k < strlen($text); $k+=$length)
{
$reconstructed_text .= $text[$k];
}
$callback($reconstructed_text);
}
}
function guess_keylength($text, $start = 1, $end = 10 /*, $natural_ic = 0.068*/)
{
$ic_array = array();
if ($end > strlen($text)) $end = strlen($text);
$closest = null;
$keyword_length = 0;
for ($i = $start; $i <= $end; $i++)
{
$ic_sum = 0;
split_cosets($text, $i, function($reconstructed_text) use (&$ic_sum) { $ic_sum += ic($reconstructed_text); });
$ic = $ic_sum/$i;
$ic_array[strval($i)] = $ic;
if ($closest === null || $closest < $ic) {
$closest = $ic;
$keyword_length = $i;
}
/*
if ($closest === null || abs($natural_ic - $closest) > abs($ic - $natural_ic)) {
$closest = $ic;
$keyword_length = $i;
}*/
}
return array($keyword_length, $closest);
}
function crack_vigenere($text, $key_length, $frequency)
{
$key = array();
$cosets = array();
split_cosets($text, $key_length, function($reconstructed_text) use (&$cosets) {
$cosets[] = $reconstructed_text;
});
foreach ($cosets as $coset)
{
$shifted_texts = shift_bruteforce($coset);
$smallest_x2 = null;
$smallest_x2_shift = null;
$occurrences = array();
foreach ($shifted_texts as $shifted_text => $shift)
{
$occurrences = array();
$x2 = 0;
for ($i = 0; $i < strlen($shifted_text); $i++)
{
$occurrences[$shifted_text[$i]] = isset($occurrences[$shifted_text[$i]])? $occurrences[$shifted_text[$i]]+1 : 1;
}
foreach ($frequency as $alphabet => $value)
{
$fi = ($occurrences[$alphabet]?? 0)/strlen($shifted_text);
$Fi = $value/100;
$x2 += pow($fi - $Fi, 2) / $Fi;
}
if ($smallest_x2 === null || $smallest_x2 > $x2)
{
$smallest_x2 = $x2;
$smallest_x2_shift = chr($shift+65);
}
}
$key[] = $smallest_x2_shift;
}
return $key;
}
function input_filter($input)
{
return strtoupper(preg_replace('#\s+#', '', $input));
}
/*
//Some examples:
$ciphertext1 = "LVWYXMPVYYLGLWWBXCVWYAICGXPNQCUCBRJDRGSUXRXFSRPBWITNVRQIJNWQOSPBSKNVCJWMWQYWCLXRCGTCAXQRRQRWRXRBNWGPRGWKLNACWGPHTRRSLBGFNQCBXFJXQNIKCSRQIKCSZNQMAIQNGSAIRQELJRWXXFNVQLLCVIMWIYAXFCLCDRDXVRDRYCIRAYRQLMFITNVGBXFJXQDGFBGFNQCBEPNYQDEJUCRAMTREJCSZAIYT";
$ciphertext2 = "VVQGYTVVVKALURWFHQACMMVLEHUCATWFHHIPLXHVUWSCIGINCMUHNHQRMSUIMHWZODXTNAEKVVQGYTVVQPHXINWCABASYYMTKSZRCXWRPRFWYHXYGFIPSBWKQAMZYBXJQQABJEMTCHQSNAEKVVQGYTVVPCAQPBSLURQUCVMVPQUTMMLVHWDHNFIKJCPXMYEIOCDTXBJWKQGAN";
$ciphertext3 = "NWAIWEBBRFQFOCJPUGDOJVBGWSPTWRZ";
*/
print_r("Input: ");
$input = input_filter(readline());
$key_length = guess_keylength($input)[0];
print_r($key_length);
$key_data = crack_vigenere($input, $key_length, $frequency);
print_r("---\r\n");
print_r("Key: " . implode("", $key_data));
?>
@GiovanniMounir
Copy link
Author

Example keys for verification:

$ciphertext1: JEY
$ciphertext2: COMPUTER
$ciphertext3: BOY

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment