Skip to content

Instantly share code, notes, and snippets.

@warner
Created September 2, 2010 20:56
Show Gist options
  • Save warner/562934 to your computer and use it in GitHub Desktop.
Save warner/562934 to your computer and use it in GitHub Desktop.
#! /usr/bin/python
# connect to a BitCoin node and interpret the data it emits. first steps
# towards building a pure-python bitcoin client. Uses code and ideas from
# http://github.com/gavinandresen/bitcointools .
import struct, random, time, base64, socket, sys
from twisted.internet import reactor, protocol
from hashlib import sha256
from StringIO import StringIO
assert struct.unpack(">L4", "\x00\x00\x00\x01") == (1,)
FIRSTHEADER = 4+12+4
RESTHEADER = 4+12+4+4
TESTMAGIC = "\xfa\xbf\xb5\xda"
REALMAGIC = "\xf9\xbe\b4\xd9"
MAGIC = TESTMAGIC
# this comes from bitcointools. You probably want to add bitcointools to your
# PYTHONPATH to get it.
from deserialize import extract_public_key, decode_script
def short_hex(bytes):
t = bytes.encode('hex_codec')
if len(t) < 11:
return t
return t[0:4]+"..."+t[-4:]
class CAddress:
@classmethod
def from_stream(klass, s, from_disk=False):
self = klass()
if from_disk:
self.nVersion = s.read_uint32()
self.nTime = s.read_int64()
self.nServices = s.read_uint64()
ipv6_reserved = s.read_bytes(12)
ipv4 = s.read_bytes(4)
self.ipv4 = "???"
if ipv6_reserved == "\x00"*10+"\xff"*2:
# this is usually the case
self.ipv4 = socket.inet_ntoa(ipv4)
else:
print "ipv6_reserved", repr(ipv6_reserved)
# this is not yet defined. Clearly the intention is that
# ipv6_reserved+ipv4 is the 16-byte v6 address
#self.ipv6 = ...
pass
self.port = s.read_uint16()
return self
def __str__(self):
return "%s:%d" % (self.ipv4, self.port)
class CScript:
def __init__(self, script_s):
self.script_s = script_s
self.decoded = decode_script(self.script_s)
def __str__(self):
#return self.script_s.encode('hex_codec')
return self.decoded
class CTxIn:
@classmethod
def from_stream(klass, s, from_disk=False):
self = klass()
self.prevout_hash = s.read_hash256()
self.prevout_n = s.read_uint32()
self.scriptSig = CScript(s.read_string())
self.sequence = s.read_uint32()
return self
def __str__(self):
if self.prevout_hash == "\x00"*32:
result = "TxIn: COIN GENERATED"
result += " coinbase:"+self.scriptSig.script_s.encode("hex")
else:
result = "TxIn: prev("+short_hex(self.prevout_hash)+":"+str(self.prevout_n)+")"
pk = extract_public_key(self.scriptSig.script_s)
result += " pubkey: "+pk
result += " sig: "+self.scriptSig.decoded
if self.sequence < 0xffffffff:
result += " sequence: "+hex(self.sequence)
return result
class CTxOut:
@classmethod
def from_stream(klass, s, from_disk=False):
self = klass()
self.value = s.read_int64()
self.scriptPubKey = CScript(s.read_string())
return self
def __str__(self):
result = "TxOut: value: %.2f"%(self.value/1.0e8,)
pk = extract_public_key(self.scriptPubKey.script_s)
result += " pubkey: "+pk
result += " Script: "+self.scriptPubKey.decoded
return result
class CTransaction:
@classmethod
def from_stream(klass, s, from_disk=False):
self = klass()
self.version = s.read_int32()
self.txIn = list(s.read_vector(s.read_CTxIn))
self.txOut = list(s.read_vector(s.read_CTxOut))
self.lockTime = s.read_uint32()
return self
def __str__(self):
return "<CTransaction, %d-ins, %d-outs>" \
% (len(self.txIn), len(self.txOut))
class CBlock:
@classmethod
def from_stream(klass, s, from_disk=False):
self = klass()
start = s.tell()
self.version = s.read_int32()
self.hashPrev = s.read_hash256()
self.hashMerkleRoot = s.read_hash256()
self.nTime = s.read_uint32()
self.nBits = s.read_uint32()
self.nNonce = s.read_uint32()
end = s.tell() # transactions are covered in hashMerkleRoot
s.seek(start)
data = s.read(end-start)
s.seek(end)
self.block_hash = sha256(sha256(data).digest()).digest()
self.transactions = list(s.read_vector(s.read_CTransaction))
return self
class CVersion:
@classmethod
def from_stream(klass, s, from_disk=False):
self = klass()
self.nVersion = s.read_int32()
self.nServices = s.read_uint64()
self.nTime = s.read_int64()
self.addrMe = s.read_CAddress()
# copied from main.cpp
if self.nVersion == 10300: self.nVersion = 300
if self.nVersion >= 106 and s.remaining():
self.addrFrom = s.read_CAddress()
self.nNonce = s.read_uint64()
if s.remaining():
self.subVer = s.read_string()
if self.nVersion >= 209 and s.remaining():
self.nStartingHeight = s.read_int32()
return self
class Stream(StringIO):
def remaining(self):
p = self.tell()
if self.read(1) == "":
return False
self.seek(p)
return True
def read_string(self):
# Strings are encoded depending on length:
# 0 to 253 : 1-byte-length followed by bytes (if any)
# 254 to 65,535 : byte'253' 2-byte-length followed by bytes
# 65,536 to 4,294,967,295 : byte '254' 4-byte-length followed by bytes
# ... and the Bitcoin client is coded to understand:
# greater than 4,294,967,295 : byte '255' 8-byte-length followed by bytes of string
# ... but I don't think it actually handles any strings that big.
length = self.read_compact_size()
return self.read_bytes(length)
def read_bytes(self, length):
return self.read(length)
def read_boolean(self): return self.read_bytes(1) != 0
def read_int16(self): return self._read_num('<h')
def read_uint16(self): return self._read_num('<H')
def read_int32(self): return self._read_num('<i')
def read_uint32(self): return self._read_num('<I')
def read_int64(self): return self._read_num('<q')
def read_uint64(self): return self._read_num('<Q')
def read_hash256(self): return self.read_bytes(32)
def _read_num(self, format):
data = self.read(struct.calcsize(format))
return struct.unpack(format, data)[0]
def read_compact_size(self):
size = ord(self.read(1))
if size == 253:
size = self._read_num('<H')
elif size == 254:
size = self._read_num('<I')
elif size == 255:
size = self._read_num('<Q')
return size
def read_vector(self, unpacker):
count = self.read_compact_size()
for i in range(count):
# if your unpacker needs access to s, curry it in with a lambda
yield unpacker()
def read_CAddress(self): return CAddress.from_stream(self)
def read_COutPoint(self): return (self.read_hash256(), self.read_uint32())
def read_CTransaction(self): return CTransaction.from_stream(self)
def read_CTxIn(self): return CTxIn.from_stream(self)
def read_CTxOut(self): return CTxOut.from_stream(self)
def read_CBlock(self): return CBlock.from_stream(self)
def read_CVersion(self): return CVersion.from_stream(self)
class BitCoinProtocol(protocol.Protocol):
def __init__(self):
self.buffer = ""
self.their_version = None
self.expect_checksum = False # first message has no checksum
self.send_checksum = False # ditto
def connectionMade(self):
#self.sendVersion()
self.my_nonce = random.randint(0, 2**64-1)
print "connected"
self.send_version()
def dataReceived(self, data):
#print "dataReceived", len(data)
self.buffer += data
# this is inefficient for lots of small messages
while True:
if self.expect_checksum:
HEADER = RESTHEADER
else:
HEADER = FIRSTHEADER
if len(self.buffer) < HEADER:
# still waiting for header
return
checksum = None
if self.expect_checksum:
(magic, command, size,
checksum) = struct.unpack("<4s12sL4s", self.buffer[:HEADER])
else:
(magic, command, size) = struct.unpack("<4s12sL",
self.buffer[:HEADER])
if magic != MAGIC:
print "bad header", repr(magic), repr(self.buffer[:100])
if magic in (MAGIC, TESTMAGIC):
print "(probably using the wrong grid, edit line 13)"
self.transport.loseConnection()
return
if len(self.buffer) < HEADER+size:
# still waiting for data
print "waiting", HEADER, size, len(self.buffer)
return
#print base64.b16encode(self.buffer)
#print " ".join([base64.b16encode(self.buffer[i:i+4])
# for i in range(0, len(self.buffer), 4)])
data = self.buffer[HEADER:HEADER+size]
assert len(data) == size
self.buffer = self.buffer[HEADER+size:]
h = sha256(sha256(data).digest()).digest()
if checksum and checksum != h[:4]:
print "bad checksum", base64.b16encode(checksum), base64.b16encode(h)
self.transport.loseConnection()
return
# think "command" is null-terminated with leftover junk
# afterwards
null = command.index("\x00")
c = command[:null]
if command[null:] != "\x00"*len(command[null:]):
print "oh look, secret garbage", repr(command[null+1:])
self.commandReceived(c, data)
def dumphex(self, data):
return " ".join([base64.b16encode(data[i:i+4])
for i in range(0, len(data), 4)])
def commandReceived(self, command, data):
print
print "command_"+command #, self.dumphex(data)
fn = getattr(self, "command_"+command, None)
if not fn:
print "unhandled command '%s', datalen=%d" % (command, len(data))
return
s = Stream(data)
fn(s)
def send_version(self):
print "sending version"
us = self.transport.getHost()
them = self.transport.getPeer()
data = "".join([self.pack_int32(310), # my nVersion
self.pack_uint64(1), # nServices
self.pack_int64(int(time.time())),
self.pack_CAddress(us.host, us.port),
self.pack_CAddress(them.host, them.port),
self.pack_uint64(self.my_nonce),
self.pack_string(".123"),
self.pack_int32(7171),
])
self.send_command("version", data)
def send_command(self, command, data):
assert len(command) < 12
#command = command + "\x00"*(12-len(command))
command = struct.pack("12s", command)
assert len(data) < 2**32
size = struct.pack("<L4", len(data))
if self.send_checksum:
h = sha256(sha256(data).digest()).digest()
checksum = h[:4]
else:
checksum = ""
magic = MAGIC
message = "".join([magic, command, size, checksum, data])
if False:
print "tx:", len(message), " ".join([base64.b16encode(message[i:i+4])
for i in range(0, len(message), 4)])
self.transport.write(message)
def pack_compact_size(self, size):
if size <= 252:
return chr(size)
elif size < 2**16:
return struct.pack("<H", size)
elif size < 2**32:
return struct.pack("<I", size)
elif size < 2**64:
return struct.pack("<Q", size)
else:
raise ValueError("go back to the future, you troublemaker")
def pack_string(self, s):
return self.pack_compact_size(len(s)) + s
def pack_boolean(self, s):
return chr(s)
def pack_int16(self, s):
return struct.pack("<h", s)
def pack_uint16(self, s):
return struct.pack("<H", s)
def pack_int32(self, s):
return struct.pack("<i", s)
def pack_uint32(self, s):
return struct.pack("<I", s)
def pack_int64(self, s):
return struct.pack("<q", s)
def pack_uint64(self, s):
return struct.pack("<Q", s)
def pack_hash256(self, s):
return base64.b16decode(s)
def pack_vector(self, items, packer):
return (self.pack_compact_size(len(items))
+ "".join([packer(item) for item in items]))
def pack_inv(self, t, hash):
return chr(t) + base64.b16decode(hash)
def pack_CAddress(self, ipv4, port):
return "".join([self.pack_uint64(1),
"\x00"*10+"\xff"*2,
socket.inet_aton(ipv4),
self.pack_uint16(port),
])
class Watcher(BitCoinProtocol):
def command_version(self, s):
# FABFB5DA: magic
# 76657273 696F6E00 00000000: command "version"
# 57000000: size (0x57)
# (no checksum, first packet)
# 36010000: nVersion
# 01000000 00000000: nServices
# 0AFD794C 00000000: nTime
# CAddress addrMe:
# (on disk only): uint32 nVersion, ? nTime
# 01000000 00000000: nServices
# 00000000 00000000 0000FFFF: reserved-for-IPv6, this means IPv4
# 0A000001: IPv4 10.0.0.1
# A9A1: port 43425
# CAddress addrFrom
# 01000000 00000000: nServices
# 0000 00000000 00000000 FFFF: reserved
# 01020304 479D: IPv4 + port: 1.2.3.4:18333
# B8DE3AA7 4E4D151F: nNonce
# 022E33: strSubVer ".3"
# E0 1B0000: nStartingHeight 7136
#(nVersion, nServices, nTime) = struct.unpack(">LQq", data[:20])
# int nVersion #36010000 little-endian
# uint64 nServices
# int64 nTime
# CAddress addrMe
# CAddress addrFrom # if nVersion > 106
# uint64 nNonce
# string strSubVer # if nVersion > 106
# int nStartingHeight # if nVersion > 209
assert self.their_version == None # accept exactly one 'version' msg
v = s.read_CVersion()
self.their_version = v.nVersion
print " version:", v.nVersion, v.subVer
print " nServices:", v.nServices
print " nTime:", v.nTime
print " addrMe:", v.addrMe
print " addrFrom:", v.addrFrom
print " nNonce:", v.nNonce
print " nStartingHeight:", v.nStartingHeight
if v.nVersion >= 209:
self.send_command("verack", "")
def command_verack(self, s):
if self.their_version >= 209:
self.expect_checksum = True
self.send_checksum = True
self.send_command("getaddr", "")
sin = Stream(base64.b16decode("01020000 000CDB39 1F757F64 D7122F7B 4612234C 5D2F5AAC 661FFD04 5A671693 47000000 00".replace(" ","")))
self.command_inv(sin)
def command_addr(self, s):
for addr in s.read_vector(s.read_CAddress):
print " ", addr
def command_ping(self, s):
pass
def command_inv(self, s):
# sent when the other end hears about a new block or txn
def parse_inv():
inv_type = s.read_int32()
b16 = base64.b16encode
if inv_type == 1:
return ("tx", b16(s.read_hash256()))
elif inv_type == 2:
return ("block", b16(s.read_hash256()))
else:
raise ValueError("inv_type=%d" % inv_type)
for inv in s.read_vector(parse_inv):
print inv
def pack_inv(inv):
t,hash = inv
inv_type = {"tx":1, "block":2}[t]
return self.pack_int32(inv_type)+self.pack_hash256(hash)
self.send_command("getdata", self.pack_vector([inv], pack_inv))
def command_block(self, s):
# command_block 01000000 0727ADF1 050B6228 E210049A 7339CF27 0E1389A7 0CA1105A DDDA7C19 00000000 F558FE33 522A2124 E63CF437 AEB98272 673D47BD 2313873F D6B9A23E 4EDFE1C1 959B7C4C 349A001D 12201ADE 01010000 00010000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 0000FFFF FFFF0704 349A001D 010AFFFF FFFF0100 F2052A01 00000043 4104A26A 45E370AC C3C69AF0 4C6A1EFB 0F984EEA 7B6FF773 565CB008 05822F66 2DD783CA 2F7CD94B FBEA8A16 5A373FE7 70067C00 521EEBAD F8F3EE55 8DA7A293 C915AC00 000000
# 01000000
# 0727ADF1 050B6228 E210049A 7339CF27 0E1389A7 0CA1105A DDDA7C19 00000000
# F558FE33 522A2124 E63CF437 AEB98272 673D47BD 2313873F D6B9A23E 4EDFE1C1
# 959B7C4C nTime: also acts as extra nonce bits
# 349A001D nBits: compressed represenation of work factor
# 12201ADE nNonce: incremented to achieve collision
# 01010000 00010000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 0000FFFF FFFF0704 349A001D 010AFFFF FFFF0100 F2052A01 00000043 4104A26A 45E370AC C3C69AF0 4C6A1EFB 0F984EEA 7B6FF773 565CB008 05822F66 2DD783CA 2F7CD94B FBEA8A16 5A373FE7 70067C00 521EEBAD F8F3EE55 8DA7A293 C915AC00 000000
# block ID is sha256(sha256()) up through nNonce.
b = s.read_CBlock()
#version = s.read_int32()
#hashPrev = s.read_hash256()
#hashMerkleRoot = s.read_hash256()
#nTime = s.read_uint32()
#nBits = s.read_uint32()
#nNonce = s.read_uint32()
print " block_hash:", base64.b16encode(b.block_hash)
print " version:", b.version
print " hashPrev:", base64.b16encode(b.hashPrev)
print " hashMerkleRoot:", base64.b16encode(b.hashMerkleRoot)
print " nTime:", b.nTime, b.nBits, b.nNonce
for txn in b.transactions:
print " txn", txn
for txin in txn.txIn:
print " in", txin
for txout in txn.txOut:
print " out", txout
## def parse_script(s):
## def parse_byte(): return s.read_bytes(1)
## return list(s.read_vector(parse_byte))
## def parse_txin():
## #return (s.read_COutPoint(), parse_script(s), s.read_uint32())
## return deserialize_TxIn(s)
## def parse_txout():
## #return (s.read_int64(), parse_script(s))
## return deserialize_TxOut(s)
## def parse_txn():
## nVersion = s.read_int32()
## txins = list(s.read_vector(parse_txin))
## txouts = list(s.read_vector(parse_txout))
## nLockTime = s.read_uint32()
## return (nVersion, txins, txouts, nLockTime)
print "connecting to %s.." % sys.argv[1]
#from twisted.python import log
#import sys
#log.startLogging(sys.stderr)
c = protocol.ClientCreator(reactor, Watcher)
d = c.connectTCP(sys.argv[1], int(sys.argv[2])) # d is ignored
reactor.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment