Skip to content

Instantly share code, notes, and snippets.

Created December 13, 2017 21:51
Show Gist options
  • Save gurnec/8e675c291191dbfb14d5ff164aaeb3e9 to your computer and use it in GitHub Desktop.
Save gurnec/8e675c291191dbfb14d5ff164aaeb3e9 to your computer and use it in GitHub Desktop.
extracts unique P2PKH addresses from block files
#!/usr/bin/env python
from __future__ import print_function
import argparse, sys, atexit, hashlib, base64, itertools, struct
from os import path
from btcrecover.addressset import AddressSet, varint
def add(self, address):
pos = self._find(address)
if pos is True:
return False
bytes_to_add = address[ -(self._bytes_per_addr+self._hash_bytes) : -self._hash_bytes]
if bytes_to_add.endswith(self._null_addr):
return False # ignore these invalid addresses
if self._len >= self._max_len:
raise ValueError("addition to AddressSet exceeds load factor")
self._data[pos : pos+self._bytes_per_addr] = bytes_to_add
self._len += 1
return True
dec_digit_to_base58 = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
def bytes_to_base58(bytes_rep):
int_rep = long(base64.b16encode(bytes_rep), 16)
base58_rep = ''
while int_rep:
int_rep, remainder = divmod(int_rep, 58)
base58_rep = dec_digit_to_base58[remainder] + base58_rep
return base58_rep
def hash160_to_base58check(hash160_bytes, version_byte):
assert len(hash160_bytes) == 20
assert len(version_byte) == 1
all_bytes = str(version_byte + hash160_bytes)
all_bytes += hashlib.sha256(hashlib.sha256(all_bytes).digest()).digest()[:4]
base58_rep = bytes_to_base58(all_bytes)
zero_count = next(zeros for zeros,byte in enumerate(all_bytes) if byte != '\0')
return '1' * zero_count + base58_rep
parser = argparse.ArgumentParser()
parser.add_argument("--datadir", metavar="DIRECTORY", help="the Bitcoin data directory (default: auto)")
parser.add_argument("--force", action="store_true", help="overwrite any existing addresses file")
parser.add_argument("--no-pause", action="store_true", default=len(sys.argv)>1, help="never pause before exiting (default: auto)")
parser.add_argument("--no-progress",action="store_true", default=not sys.stdout.isatty(), help="disable the progress bar (shows cur. blockfile instead)")
parser.add_argument("addrfilename", nargs="?", default="addresses.txt", help="the name of the addresses file (default: addresses.txt)")
args = parser.parse_args()
if not args.no_pause:
atexit.register(lambda: raw_input("\nPress Enter to exit ..."))
if not args.force and path.exists(args.addrfilename):
sys.exit("Addresses file already exists (use --force to overwrite)")
if args.datadir:
blockdir = args.datadir
elif sys.platform == "win32":
blockdir = path.expandvars(r"%APPDATA%\Bitcoin")
elif sys.platform.startswith("linux"):
blockdir = path.expanduser("~/.bitcoin")
elif sys.platform == "darwin":
blockdir = path.expanduser("~/Library/Application Support/Bitcoin")
sys.exit("Can't automatically determine Bitcoin data directory (use --datadir)")
blockdir = path.join(blockdir, "blocks")
if not path.isfile(path.join(blockdir, "blk00000.dat")):
raise ValueError("first block file 'blk00000.dat' doesn't exist in blocks directory '{}'".format(blockdir))
address_set = AddressSet(1 << 29)
with open(args.addrfilename, "w") as addrfile:
if args.no_progress:
progress_bar = None
import progressbar
print("Parsing block files ...")
for filenum in itertools.count(0):
filename = path.join(blockdir, "blk{:05}.dat".format(filenum))
if not path.isfile(filename):
progress_label = progressbar.FormatLabel(" {:11,} addrs. %(elapsed)s, ".format(len(address_set)))
progress_bar = progressbar.ProgressBar(maxval=filenum, widgets=[
progressbar.SimpleProgress(), " ",
progressbar.Bar(left="[", fill="-", right="]"),
except ImportError:
progress_bar = None
if not progress_bar:
print("Block file Address count")
print("------------ -------------")
# e.g. blk00943.dat 255,212,706
for filenum in itertools.count(0):
filename = path.join(blockdir, "blk{:05}.dat".format(filenum))
if not path.isfile(filename):
address_set.last_filenum = filenum
with open(filename, "rb") as blockfile:
if not progress_bar:
print(path.basename(filename), end=" ")
header = # read in the magic and remaining (after these 8 bytes) block length
while len(header) == 8 and header[4:] != b"\0\0\0\0":
assert header[:4] == b"\xf9\xbe\xb4\xd9" # magic
block ="<I", header, 4)[0]) # read in the rest of the block
tx_count, offset = varint(block, 80) # skips 80 bytes of header
for tx_num in xrange(tx_count):
offset += 4 # skips 4-byte tx version
is_bip144 = block[offset] == b"\0" # bip-144 marker
if is_bip144:
offset += 2 # skips 1-byte marker & 1-byte flag
txin_count, offset = varint(block, offset)
for txin_num in xrange(txin_count):
sigscript_len, offset = varint(block, offset + 36) # skips 32-byte tx id & 4-byte tx index
offset += sigscript_len + 4 # skips sequence number & sigscript
txout_count, offset = varint(block, offset)
for txout_num in xrange(txout_count):
pkscript_len, offset = varint(block, offset + 8) # skips 8-byte satoshi count
# If this is a P2PKH script (OP_DUP OP_HASH160 PUSH(20) <20 address bytes> OP_EQUALVERIFY OP_CHECKSIG)
if pkscript_len == 25 and block[offset:offset+3] == b"\x76\xa9\x14" and block[offset+23:offset+25] == b"\x88\xac":
# Add the discovered address to the address set and print it if it's new
if add(address_set, block[offset+3:offset+23]):
print(hash160_to_base58check(block[offset+3:offset+23], '\0'), file=addrfile)
offset += pkscript_len # advances past the pubkey script
if is_bip144:
for txin_num in xrange(txin_count):
stackitem_count, offset = varint(block, offset)
for stackitem_num in xrange(stackitem_count):
stackitem_len, offset = varint(block, offset)
offset += stackitem_len # skips this stack item
offset += 4 # skips the 4-byte locktime
header = # read in the next magic and remaining block length
if progress_bar:
progress_label.format = " {:11,} addrs. %(elapsed)s, ".format(len(address_set)) # updates address count
nextval = progress_bar.currval + 1
if nextval > progress_bar.maxval: # can happen if the bitcoin client is left running
progress_bar.maxval = nextval
if progress_bar:
progress_bar.widgets.pop() # remove the ETA
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment