Created
August 24, 2009 23:02
-
-
Save regularfry/174286 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bitstring = <<-BITS | |
110101001110100001100101101000000101010111101110 | |
011010011111011001100101011100101111001101101001 | |
011101001111100110100000011011110110011010100000 | |
110001011110010001101001111011101110001011110101 | |
011100101110011111101000101000000111011101101111 | |
111101010110110011100100101000000110110001101001 | |
111010110110010110100000011101000110111110100000 | |
011101001110100011100001111011101110101110100000 | |
011010010111010011110011101000001111001111110101 | |
111100001111000001101111011100100111010001100101 | |
011100101111001110100000011001100110111101110010 | |
101000000111010011101000011001010110100101110010 | |
101000001110011101100101111011100110010101110010 | |
011011111111010111110011101000000110001101101111 | |
111011100111010001110010011010011110001011110101 | |
011101000110100101101111111011101111001110100000 | |
011101000110111110100000011101001110100001100101 | |
101000001100100111101110011001100110111101110010 | |
111011011110000101110100011010010110001111110011 | |
101000001100011001101111011100101111010111101101 | |
BITS | |
bitstring.gsub!(/\n/,'') # Join it back together | |
#bitstring.tr!("10", "01") | |
bytelength = 8 # reasonable assumption | |
# Now get an array of bytes | |
splitstring = bitstring.scan(/.{#{bytelength}}/) | |
# Make a keys lookup table for later | |
keys = splitstring.inject({}){|m,r| m[r]=r;m} | |
# Count how many times we see each one | |
occurrences = splitstring.inject(Hash.new(0)){|m,r| m[r] += 1; m} | |
most_frequent = occurrences.to_a.sort_by{|sym,count| count}.reverse | |
# => [["10100000", 17], | |
# ["01110100", 11], | |
# ["01101111", 11], | |
# ["01110010", 10], | |
# ["01101001", 9], | |
# ["01100101", 8], | |
# ["11101110", 7], | |
# ["11110011", 7], | |
# ["11110101", 6], | |
# ["11101000", 5], | |
# ["01100110", 3], | |
# ["01100011", 2], | |
# ["11100111", 2], | |
# ["11100100", 2], | |
# ["11100001", 2], | |
# ["11110000", 2], | |
# ["11101011", 2], | |
# ["01101100", 2], | |
# ["11101101", 2], | |
# ["11100010", 2], | |
# ["11111001", 1], | |
# ["01010101", 1], | |
# ["11110110", 1], | |
# ["11000101", 1], | |
# ["11010100", 1], | |
# ["11000110", 1], | |
# ["11001001", 1], | |
# ["01110111", 1]] | |
# Guesswork and bodgery - presume that the most common symbol is a space, then | |
# look for common world lengths (like "to the" or "would like" especially where | |
# they involve symbol patterns ("t_ t__") | |
keys = { | |
"10100000" => ' ', | |
"01110100" => 't', | |
"01101111" => 'o', | |
"01110010" => 'r', | |
"01101001" => 'i', | |
"01100101" => 'e', | |
"11101110" => 'n', | |
"11110011" => 's', | |
"11110101" => 'u', | |
"11101000" => 'h', | |
"01100110" => 'f', | |
"01100011" => 'c', | |
"11100111" => 'g', | |
"11100100" => 'd', | |
"11100001" => 'a', | |
"11110000" => 'p', | |
"11101011" => 'k', | |
"01101100" => 'l', | |
"11101101" => 'm', | |
"11100010" => 'b', | |
"11111001" => 'y', | |
"01010101" => 'U', | |
"11110110" => 'v', | |
"11000101" => 'E', | |
"11010100" => 'T', | |
"11000110" => 'F', | |
"11001001" => 'I', | |
"01110111" => 'w' | |
} | |
# This isn't quite ascii - the top bit is made of wrong. The lower 7 are right, though. | |
# I think there's a pattern in the high bits, but haven't got round to figuring it out yet. | |
keys.to_a.sort_by{|a,b| a} | |
# => [["01010101", "U"], | |
# ["01100011", "c"], | |
# ["01100101", "e"], | |
# ["01100110", "f"], | |
# ["01101001", "i"], | |
# ["01101100", "l"], | |
# ["01101111", "o"], | |
# ["01110010", "r"], | |
# ["01110100", "t"], | |
# ["01110111", "w"], | |
# ["10100000", " "], | |
# ["11000101", "E"], | |
# ["11000110", "F"], | |
# ["11001001", "I"], | |
# ["11010100", "T"], | |
# ["11100001", "a"], | |
# ["11100010", "b"], | |
# ["11100100", "d"], | |
# ["11100111", "g"], | |
# ["11101000", "h"], | |
# ["11101011", "k"], | |
# ["11101101", "m"], | |
# ["11101110", "n"], | |
# ["11110000", "p"], | |
# ["11110011", "s"], | |
# ["11110101", "u"], | |
# ["11110110", "v"], | |
# ["11111001", "y"]] | |
# This is what the words come out as | |
result = splitstring.map{|k| keys[k]}.join | |
# => "The University of Edinburgh would like to thank its supporters for their generous contributions to the Informatics Forum" | |
# And this is their actual layout in the grid. | |
puts result.scan(/.{6}/) | |
# >> The Un | |
# >> iversi | |
# >> ty of | |
# >> Edinbu | |
# >> rgh wo | |
# >> uld li | |
# >> ke to | |
# >> thank | |
# >> its su | |
# >> pporte | |
# >> rs for | |
# >> their | |
# >> gener | |
# >> ous co | |
# >> ntribu | |
# >> tions | |
# >> to the | |
# >> Infor | |
# >> matics | |
# >> Forum |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment