Last active
March 29, 2019 17:55
-
-
Save priestc/62e07e9234088c476ee286e9016b71b2 to your computer and use it in GitHub Desktop.
xthinner_fail.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import json, cStringIO, traceback, sys, os, urllib2, random, time | |
blockfilename = '000000000000000001c37467f0843dd9e09536c21938c5c20551191788a70541' | |
mempoolfilenames = ['00000000000000000008658cdaba34569f00748085df3923cb5287e55a2fe27c', | |
'000000000000000000328e4b6f34904c9525e4815c1a32a9e55a3ec6b0e5afa6', | |
'00000000000000000175b9186c4004b77ea9dc61976f4d7aed140d10919d0c05'] | |
debug = '--debug' in sys.argv or '-d' in sys.argv | |
def maybedownload(fn): | |
if not os.path.exists(fn): | |
print "downloading block %s" % fn | |
response = urllib2.urlopen('http://toom.im/files/%s' % fn) | |
html = response.read() | |
with open(fn, 'w') as f: | |
f.write(html) | |
def fetchtxsfromfile(fn): | |
maybedownload(fn) | |
with open(fn, 'r') as f: | |
raw = f.read() | |
block = json.loads(raw) | |
return [tx.decode('hex') for tx in block['tx']] | |
def buildmempool(fns): | |
mempool = [] | |
for fn in fns: | |
mempool.extend(fetchtxsfromfile(fn)) | |
#start modifications | |
import hashlib | |
random.shuffle(mempool) | |
for i in range(5): | |
mempool.pop() | |
txid = hashlib.sha256(str(random.random())).digest() | |
mempool.append(txid) | |
# end modifications | |
mempool.sort() | |
return mempool | |
def make_bitmap(bitlist): | |
""" | |
Generates a bitmap for which values in counts equal 1, plus a list of the residuals | |
""" | |
bits = [1<<(i%8) if bitlist[i] == 1 else 0 for i in range(len(bitlist))] | |
byts = [chr(sum(bits[8*i:8*(i+1)])) for i in range((len(bits)+7)/8)] | |
''.join(byts) | |
return ''.join(byts) | |
def unmake_bitmap(bitmap): | |
n_vals = [] | |
for byte in map(ord, bitmap): | |
for n in range(8): | |
n_vals.append(byte & 1<<n) | |
return n_vals | |
def encode_xthinner(block, mempool, checksumpositions=[], checksumintervals=[]): | |
# The encoding scheme is basically encoding the transactions as a prefix tree | |
# except that we omit all bytes that are not necessary to disambiguate a tx | |
# from the recipient's mempool. | |
# The algorithm for encoding our tree uses a stack which we interact with in four | |
# possible operations per transaction: | |
# 1. We can pop 1 or more bytes off the stack. | |
# 2. We can push 1 or more bytes onto the stack | |
# 3. We can commit to a transaction that uses the prefix encoded in the stack. | |
# 4. We can accumulate into zero or more checksum bytes | |
# For the pop stage, we encode a 1 for every pop we do after the first, and a 0 when done | |
# for the push stage, we encode a 1 for every push we do after the first, and a 0 when done | |
# For the commit stage, we encode nothing, as it's implicit as the 0 from the push stage. | |
# Finally, we encode 1-byte error detection checksums at a few different levels | |
# E.g. sum a byte from every 8 tx into a 1st-order checksum | |
# and sum a byte from every 32 tx into a 2nd-order checksum | |
# and sum a byte from every 128 tx into a 3rd-order checksum | |
stack = [] | |
blockposition = 0 | |
mempoolposition = 0 | |
pops = [] | |
pushes = [] | |
pushbytes = [] | |
assert len(checksumpositions) == len(checksumintervals) | |
checksums = [0 for i in range(len(checksumpositions))] | |
donechecksums = [[] for i in range(len(checksumpositions))] | |
mempool.append(chr(0)*32) # OOB-protection dummy hash | |
for tx in block: | |
# 1. We pop bytes off the stack that don't match our current tx | |
# First pop is a freebie (unless we're just getting started) | |
if debug: print "%s is next tx" % tx.encode('hex') | |
if stack: stack.pop() | |
try: | |
for i in range(len(stack)): | |
if stack[i] != tx[i]: | |
for j in range(len(stack)-i): | |
stack.pop() | |
pops.append(1) | |
if debug: print "%s popped %i bytes" % (''.join([s.encode('hex') for s in stack]), i) | |
break | |
except: | |
traceback.print_exc() | |
print "i=%i, stack=%s, tx=%s" % (i, ''.join(stack).encode('hex'), tx.encode('hex')) | |
sys.exit(1) | |
if debug and pops and pops[-1] == 0: # missed debug message from 0 bytes popped | |
print "%s popped 1 byte" % (''.join([s.encode('hex') for s in stack])) | |
pops.append(0) # end of pop stage | |
# 2. We push bytes onto the stack in order to disambiguate between neighboring | |
# mempool transactions (assuming mempool is sorted) | |
# 2(a). Where is this transaction in the mempool? | |
#mempoolposition = mempool.find(tx) # fixme: O(n); we should do a linear search from previous position instead | |
while mempool[mempoolposition] != tx: | |
mempoolposition += 1 | |
# 2(b) First push is a freebie | |
stack.append(tx[len(stack)]) | |
pushbytes.append(stack[-1]) | |
# 2(c). Push enough bytes so that we can disambiguate between mempool neighbors | |
i = 0 # for debug only | |
if debug: print "%s is mempoolprev" % mempool[mempoolposition-1].encode('hex') | |
if debug: print "%s is mempoolnext" % mempool[mempoolposition+1].encode('hex') | |
while ''.join(stack) in (mempool[mempoolposition-1][:len(stack)], | |
mempool[mempoolposition+1][:len(stack)]): | |
pushes.append(1) | |
stack.append(tx[len(stack)]) | |
pushbytes.append(stack[-1]) | |
i += 1 # for debug only | |
pushes.append(0) | |
if debug: print "%s pushed %i bytes: %s" % (''.join([s.encode('hex') for s in stack]), i+1, ''.join([s.encode('hex') for s in stack[-i:]])) | |
# 3. Commit the transaction | |
# This page is intentionally left blank | |
# 4. Calculate checksums | |
for i in range(len(checksumpositions)): | |
checksums[i] ^= ord(tx[checksumpositions[i]]) | |
if blockposition % checksumintervals[i] == 0 or blockposition == len(block): | |
donechecksums[i].append(checksums[i]) | |
checksums[i] = 0 | |
blockposition += 1 | |
mempoolposition += 1 | |
mempool.pop() # get rid of OOB-prevention dummy tx | |
bitpops = make_bitmap(pops) | |
bitpushes = make_bitmap(pushes) | |
if debug: | |
print pops | |
print pushes | |
print [s.encode('hex') for s in pushbytes] | |
for s in [s.encode('hex') for s in block[:10]]: | |
print s | |
print "Encoding finished! %i pops, %i pushes, %i pushbytes, %i checksum bytes" % (len(pops), len(pushes), len(pushbytes), sum(map(len, donechecksums))) | |
return bitpops, bitpushes, pushbytes, donechecksums, len(block), checksumpositions, checksumintervals | |
def decode_xthinner(encoding, mempool): | |
print "Beginning decode" | |
pops = unmake_bitmap(encoding[0]) | |
pushes = unmake_bitmap(encoding[1]) | |
pushbytes = encoding[2] | |
popi = 0 # pop index | |
pushi = 0 | |
pushbi = 0 | |
donechecksums = encoding[3] | |
txcount = encoding[4] | |
checksumpositions = encoding[5] | |
checksumintervals = encoding[6] | |
checksums = [0 for i in range(len(checksumpositions))] | |
badchecksums = [[] for i in range(len(checksumpositions))] | |
expectbadchecksums = [0 for i in range(len(checksumpositions))] | |
block = [] | |
stack = [] | |
mempoolposition = 0 | |
rerequests = [] | |
mempool.append(chr(255)*32) # OOB-protection dummy hash | |
for blockposition in range(txcount): | |
# 1. Pop bytes off the stack | |
if stack: stack.pop() | |
pop = pops[popi] | |
popi += 1 | |
while pop: | |
stack.pop() | |
pop = pops[popi] | |
popi += 1 | |
if debug: print "%s after pops" % (''.join([s.encode('hex') for s in stack])) | |
# 2. Push bytes onto the stack | |
stack.append(pushbytes[pushbi]) | |
pushbi += 1 | |
push = pushes[pushi] | |
pushi += 1 | |
while push: | |
stack.append(pushbytes[pushbi]) | |
pushbi += 1 | |
push = pushes[pushi] | |
pushi += 1 | |
if debug: print "%s after pushes" % (''.join([s.encode('hex') for s in stack])) | |
# 3. Commit the transaction to the block | |
while mempoolposition < len(mempool)-1 and ''.join(stack) > mempool[mempoolposition]: | |
mempoolposition += 1 | |
# Check for ambiguities (multiple possible matches to stack) | |
if mempoolposition < len(mempool)-2 and mempool[mempoolposition+1][:len(stack)] == ''.join(stack): | |
block.append(''.join(stack)) # We can't get the whole TXID, so we'll just insert this placeholder | |
rerequests.append(len(block)) # We'll have to ask the block sender to tell us the full TXID at this pos | |
if debug: print "%s is ambiguous with %s" % (''.join([s.encode('hex') for s in stack]), mempool[mempoolposition+1].encode('hex')) | |
else: | |
block.append(mempool[mempoolposition]) | |
if debug: print "%s uniquely matches with %s (pos=%i)" % (''.join([s.encode('hex') for s in stack]), mempool[mempoolposition].encode('hex'), mempoolposition) | |
for i in range(len(checksumpositions)): | |
checksums[i] ^= ord(block[-1][checksumpositions[i]]) | |
# 4. Check checksums | |
for i in range(len(checksumpositions)): | |
if blockposition % checksumintervals[i] == 0: | |
if not donechecksums[i][blockposition//checksumintervals[i]] == checksums[i]: | |
if debug: print "Checksum error at blockposition %i, checksumposition %i" % (blockposition, i) | |
badchecksums[i].append(blockposition) | |
checksums[i] = 0 | |
if debug: print "Remaining: %i pops, %i pushes, %i pushbytes, %i checksum bytes" % (len(pops), len(pushes), len(pushbytes), sum(map(len, donechecksums))) | |
# 4. Check checksums | |
for i in range(len(checksumpositions)): | |
if blockposition % checksumintervals[i] == 0 or blockposition == txcount-1: | |
if not donechecksums[i][blockposition//checksumintervals[i]] == checksums[i]: | |
if debug: print "Checksum error at blockposition %i, checksumposition %i" % (blockposition, i) | |
badchecksums[i].append(blockposition) | |
checksums[i] = 0 | |
mempool.pop() # get rid of OOB-prevention dummy tx | |
assert len(pushbytes) == pushbi | |
if debug: print "Recovered %i of %i transactions in block" % (len(block) - len(rerequests), len(block)) | |
return block, rerequests | |
if __name__ == '__main__': | |
if '-h' in sys.argv or '--help' in sys.argv: | |
print """Usage: | |
bitcoin-cli getblock somehash > someblock | |
# edit the filenames into the blockfilenames or mempoolfilenames variables in the code, then3 | |
python xthinner.py | |
""" | |
sys.exit() | |
block = fetchtxsfromfile(blockfilename) | |
coinbase = block.pop(0) | |
mempool = buildmempool(mempoolfilenames + [blockfilename]) | |
print "%i tx in block, %i tx in mempool (%2.0f%%)" % (len(block), len(mempool), 100.*len(block)/len(mempool)) | |
presort = time.time() | |
block.sort() | |
print "Sorting into LTOR took %1.6f seconds" % (time.time() - presort) | |
if debug: | |
print 'first 10 tx in block:' | |
for i in range(10): | |
print block[i].encode('hex') | |
print 'first 20 tx in mempool:' | |
for i in range(20): | |
print mempool[i].encode('hex') | |
start = time.time() | |
encoding = encode_xthinner(block, mempool, [random.randint(6, 32) for i in range(4)], [8, 64, 256, 1024]) | |
print "Encoding took %1.6f seconds" % (time.time() - start) | |
encodedsize = (sum(map(len, encoding[:3] + tuple(encoding[3]) + encoding[6:])) + 4) | |
print "Encoding is %i bytes total, or %2.5f bits per tx" % (encodedsize, 8.*encodedsize/len(block)) | |
start2 = time.time() | |
decoded, rerequests = decode_xthinner(encoding, mempool) | |
print "Decoding took %1.6f seconds" % (time.time() - start2) | |
print "Does decoded match original block?\t", decoded == block |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment