regularfry/codebreaking.rb

## codebreaking.rb
bitstring = <<-BITS
110101001110100001100101101000000101010111101110
011010011111011001100101011100101111001101101001
011101001111100110100000011011110110011010100000
110001011110010001101001111011101110001011110101
011100101110011111101000101000000111011101101111
111101010110110011100100101000000110110001101001
111010110110010110100000011101000110111110100000
011101001110100011100001111011101110101110100000
011010010111010011110011101000001111001111110101
111100001111000001101111011100100111010001100101
011100101111001110100000011001100110111101110010
101000000111010011101000011001010110100101110010
101000001110011101100101111011100110010101110010
011011111111010111110011101000000110001101101111
111011100111010001110010011010011110001011110101
011101000110100101101111111011101111001110100000
011101000110111110100000011101001110100001100101
101000001100100111101110011001100110111101110010
111011011110000101110100011010010110001111110011
101000001100011001101111011100101111010111101101
BITS

bitstring.gsub!(/\n/,'') # Join it back together
#bitstring.tr!("10", "01")
bytelength = 8 # reasonable assumption
# Now get an array of bytes
splitstring = bitstring.scan(/.{#{bytelength}}/)
# Make a keys lookup table for later
keys = splitstring.inject({}){|m,r| m[r]=r;m}

  # Count how many times we see each one
  occurrences = splitstring.inject(Hash.new(0)){|m,r| m[r] += 1; m}

most_frequent = occurrences.to_a.sort_by{|sym,count| count}.reverse
  # => [["10100000", 17],
  #     ["01110100", 11],
  #     ["01101111", 11],
  #     ["01110010", 10],
  #     ["01101001", 9],
  #     ["01100101", 8],
  #     ["11101110", 7],
  #     ["11110011", 7],
  #     ["11110101", 6],
  #     ["11101000", 5],
  #     ["01100110", 3],
  #     ["01100011", 2],
  #     ["11100111", 2],
  #     ["11100100", 2],
  #     ["11100001", 2],
  #     ["11110000", 2],
  #     ["11101011", 2],
  #     ["01101100", 2],
  #     ["11101101", 2],
  #     ["11100010", 2],
  #     ["11111001", 1],
  #     ["01010101", 1],
  #     ["11110110", 1],
  #     ["11000101", 1],
  #     ["11010100", 1],
  #     ["11000110", 1],
  #     ["11001001", 1],
  #     ["01110111", 1]]

# Guesswork and bodgery - presume that the most common symbol is a space, then
# look for common world lengths (like "to the" or "would like" especially where
# they involve symbol patterns ("t_ t__")
keys = {
  "10100000" => ' ',
  "01110100" => 't',
  "01101111" => 'o',
  "01110010" => 'r',
  "01101001" => 'i',
  "01100101" => 'e',
  "11101110" => 'n',
  "11110011" => 's',
  "11110101" => 'u',
  "11101000" => 'h',
  "01100110" => 'f',
  "01100011" => 'c',
  "11100111" => 'g',
  "11100100" => 'd',
  "11100001" => 'a',
  "11110000" => 'p',
  "11101011" => 'k',
  "01101100" => 'l',
  "11101101" => 'm',
  "11100010" => 'b',
  "11111001" => 'y',
  "01010101" => 'U',
  "11110110" => 'v',
  "11000101" => 'E',
  "11010100" => 'T',
  "11000110" => 'F',
  "11001001" => 'I',
  "01110111" => 'w'
}

# This isn't quite ascii - the top bit is made of wrong. The lower 7 are right, though.
# I think there's a pattern in the high bits, but haven't got round to figuring it out yet.

keys.to_a.sort_by{|a,b| a}
  # => [["01010101", "U"],
  #     ["01100011", "c"],
  #     ["01100101", "e"],
  #     ["01100110", "f"],
  #     ["01101001", "i"],
  #     ["01101100", "l"],
  #     ["01101111", "o"],
  #     ["01110010", "r"],
  #     ["01110100", "t"],
  #     ["01110111", "w"],
  #     ["10100000", " "],
  #     ["11000101", "E"],
  #     ["11000110", "F"],
  #     ["11001001", "I"],
  #     ["11010100", "T"],
  #     ["11100001", "a"],
  #     ["11100010", "b"],
  #     ["11100100", "d"],
  #     ["11100111", "g"],
  #     ["11101000", "h"],
  #     ["11101011", "k"],
  #     ["11101101", "m"],
  #     ["11101110", "n"],
  #     ["11110000", "p"],
  #     ["11110011", "s"],
  #     ["11110101", "u"],
  #     ["11110110", "v"],
  #     ["11111001", "y"]]

# This is what the words come out as
result = splitstring.map{|k| keys[k]}.join
  # => "The University of Edinburgh would like to thank its supporters for their generous contributions to the Informatics Forum"

# And this is their actual layout in the grid.
puts result.scan(/.{6}/)

# >> The Un
# >> iversi
# >> ty of
# >> Edinbu
# >> rgh wo
# >> uld li
# >> ke to
# >> thank
# >> its su
# >> pporte
# >> rs for
# >>  their
# >>  gener
# >> ous co
# >> ntribu
# >> tions
# >> to the
# >>  Infor
# >> matics
# >>  Forum
	bitstring = <<-BITS
	110101001110100001100101101000000101010111101110
	011010011111011001100101011100101111001101101001
	011101001111100110100000011011110110011010100000
	110001011110010001101001111011101110001011110101
	011100101110011111101000101000000111011101101111
	111101010110110011100100101000000110110001101001
	111010110110010110100000011101000110111110100000
	011101001110100011100001111011101110101110100000
	011010010111010011110011101000001111001111110101
	111100001111000001101111011100100111010001100101
	011100101111001110100000011001100110111101110010
	101000000111010011101000011001010110100101110010
	101000001110011101100101111011100110010101110010
	011011111111010111110011101000000110001101101111
	111011100111010001110010011010011110001011110101
	011101000110100101101111111011101111001110100000
	011101000110111110100000011101001110100001100101
	101000001100100111101110011001100110111101110010
	111011011110000101110100011010010110001111110011
	101000001100011001101111011100101111010111101101
	BITS

	bitstring.gsub!(/\n/,'') # Join it back together
	#bitstring.tr!("10", "01")
	bytelength = 8 # reasonable assumption
	# Now get an array of bytes
	splitstring = bitstring.scan(/.{#{bytelength}}/)
	# Make a keys lookup table for later
	keys = splitstring.inject({}){\|m,r\| m[r]=r;m}

	# Count how many times we see each one
	occurrences = splitstring.inject(Hash.new(0)){\|m,r\| m[r] += 1; m}

	most_frequent = occurrences.to_a.sort_by{\|sym,count\| count}.reverse
	# => [["10100000", 17],
	# ["01110100", 11],
	# ["01101111", 11],
	# ["01110010", 10],
	# ["01101001", 9],
	# ["01100101", 8],
	# ["11101110", 7],
	# ["11110011", 7],
	# ["11110101", 6],
	# ["11101000", 5],
	# ["01100110", 3],
	# ["01100011", 2],
	# ["11100111", 2],
	# ["11100100", 2],
	# ["11100001", 2],
	# ["11110000", 2],
	# ["11101011", 2],
	# ["01101100", 2],
	# ["11101101", 2],
	# ["11100010", 2],
	# ["11111001", 1],
	# ["01010101", 1],
	# ["11110110", 1],
	# ["11000101", 1],
	# ["11010100", 1],
	# ["11000110", 1],
	# ["11001001", 1],
	# ["01110111", 1]]

	# Guesswork and bodgery - presume that the most common symbol is a space, then
	# look for common world lengths (like "to the" or "would like" especially where
	# they involve symbol patterns ("t_ t__")
	keys = {
	"10100000" => ' ',
	"01110100" => 't',
	"01101111" => 'o',
	"01110010" => 'r',
	"01101001" => 'i',
	"01100101" => 'e',
	"11101110" => 'n',
	"11110011" => 's',
	"11110101" => 'u',
	"11101000" => 'h',
	"01100110" => 'f',
	"01100011" => 'c',
	"11100111" => 'g',
	"11100100" => 'd',
	"11100001" => 'a',
	"11110000" => 'p',
	"11101011" => 'k',
	"01101100" => 'l',
	"11101101" => 'm',
	"11100010" => 'b',
	"11111001" => 'y',
	"01010101" => 'U',
	"11110110" => 'v',
	"11000101" => 'E',
	"11010100" => 'T',
	"11000110" => 'F',
	"11001001" => 'I',
	"01110111" => 'w'
	}

	# This isn't quite ascii - the top bit is made of wrong. The lower 7 are right, though.
	# I think there's a pattern in the high bits, but haven't got round to figuring it out yet.

	keys.to_a.sort_by{\|a,b\| a}
	# => [["01010101", "U"],
	# ["01100011", "c"],
	# ["01100101", "e"],
	# ["01100110", "f"],
	# ["01101001", "i"],
	# ["01101100", "l"],
	# ["01101111", "o"],
	# ["01110010", "r"],
	# ["01110100", "t"],
	# ["01110111", "w"],
	# ["10100000", " "],
	# ["11000101", "E"],
	# ["11000110", "F"],
	# ["11001001", "I"],
	# ["11010100", "T"],
	# ["11100001", "a"],
	# ["11100010", "b"],
	# ["11100100", "d"],
	# ["11100111", "g"],
	# ["11101000", "h"],
	# ["11101011", "k"],
	# ["11101101", "m"],
	# ["11101110", "n"],
	# ["11110000", "p"],
	# ["11110011", "s"],
	# ["11110101", "u"],
	# ["11110110", "v"],
	# ["11111001", "y"]]

	# This is what the words come out as
	result = splitstring.map{\|k\| keys[k]}.join
	# => "The University of Edinburgh would like to thank its supporters for their generous contributions to the Informatics Forum"

	# And this is their actual layout in the grid.
	puts result.scan(/.{6}/)

	# >> The Un
	# >> iversi
	# >> ty of
	# >> Edinbu
	# >> rgh wo
	# >> uld li
	# >> ke to
	# >> thank
	# >> its su
	# >> pporte
	# >> rs for
	# >> their
	# >> gener
	# >> ous co
	# >> ntribu
	# >> tions
	# >> to the
	# >> Infor
	# >> matics
	# >> Forum