Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Tool to automatically download and combine all the hexes from
from binascii import unhexlify
from base64 import b64decode
import urllib
import re
import string
import sys
# Download the hexes from the reddit thread
data = urllib.urlopen("")
data =
data = re.findall(r"\b[0-9a-f]{100}\b", data)
data = sorted(list(set(data)))
# Remove some bad hexes (posted by myself)
# Finds the indexes of the first and last hex (which is known to be in the thread)
first = data.index("497a78705032357759326873634855675a43526c595341675044317a494852685a484a70636d39684c6e6c6f4b4434354369")
last = data.index("4270494630674f3349675a5831304948566c636d4e75614342764d4341374a79426c494363674f7941674944393950676f3d")
print "Something went wrong with downloading the hexes from the reddit thread!"
# For comparing how much of an overlap there is between two strings
def like(a, b):
if a == b: return -1
for n in range(min(len(a), len(b)), 0, -2):
if a[-n:] == b[:n]:
return n
return 0
# These are used a lot, defined for convenience (and speed)
indexes = range(len(data))
likeness = [[like(data[a],data[b]) for b in indexes] for a in indexes]
# Create a tables of (index, likeness) for the best possible next and previous indexes
next = [max(indexes, key=lambda b: likeness[a][b]) for a in indexes]
prev = [max(indexes, key=lambda a: likeness[a][b]) for b in indexes]
# Ignore overlaps in with than 50 hex chars
for a in indexes:
if likeness[a][next[a]] < 50:
next[a] = None
if likeness[prev[a]][a] < 50:
prev[a] = None
# We know a bit more about last and first
next[last] = None
prev[first] = None
# Try to figure out if there is a "perfect" best match, that does not conflict with other matches
perfect_prev = [True for _ in indexes]
perfect_next = [True for _ in indexes]
for a in indexes:
if next[a] == None:
perfect_next[a] = False
elif prev[next[a]] != a:
perfect_next[a] = False
perfect_prev[next[a]] = False
if prev[a] == None:
perfect_prev[a] = False
elif next[prev[a]] != a:
perfect_prev[a] = False
perfect_next[prev[a]] = False
# If it was not a perfect match, remove it
for a in indexes:
if not (perfect_next[a] and perfect_prev[next[a]]):
next[a] = None
if not (perfect_prev[a] and perfect_next[prev[a]]):
prev[a] = None
# Now put all the indexes into chains
chained = [False for _ in indexes]
def get_chain(i, direction):
if i == None:
return []
chained[i] = True
if direction == FORWARD:
return [i] + get_chain(next[i], direction)
return get_chain(prev[i], direction) + [i]
def get_full_chain(i):
return get_chain(i, BACKWARDS) + get_chain(next[i], FORWARD)
# Make sure that the known first chain comes first
chains = [get_full_chain(first)]
# And the the known last chain comes last
if not chained[last]:
for i in indexes:
if not chained[i]:
chains.insert(1, get_full_chain(i))
combined_chains = []
# Loop over the chains and print the result
for chain in chains:
# Create the full hex value of the chain
current = data[chain[0]]
for (a, b) in zip(chain, chain[1:]):
shared = likeness[a][b]
current += data[b][shared:]
# Combine the chains using a greedy algorithm
while len(combined_chains) > 1:
a,b = max([(a,b) for a in range(len(combined_chains)) for b in range(len(combined_chains))], key=lambda (a,b): like(combined_chains[a],combined_chains[b]))
if like(combined_chains[a], combined_chains[b]) < 8:
minval = min(a,b)
maxval = max(a,b)
adata = combined_chains[a]
bdata = combined_chains[b]
combined_chains[minval] = adata + bdata[like(adata, bdata):]
for chain in combined_chains:
# Unhex it
unhexed = unhexlify(chain)
# base64 only works, when the data is aligned correctly
# to a 4 byte boundary. It also needs the entire string
# to be of this length, though this can be fixed by
# appending "="
def fix(n):
padded1 = "A" * n + unhexed
padded2 = padded1 + "=" * ((4 - len(padded1) % 4) % 4)
return b64decode(padded2)[n:]
return "\xff" * len(unhexed)
# Try all possible alignments
possible = [fix(n) for n in range(4)]
# And find the one with the least number of printable characters
possible.sort(key = lambda s: sum(c not in string.printable for c in s))
print ""
print chain
print "Stream 1: " + possible[0][::2]
print "Stream 2: " + possible[0][1::2]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment