holgere/blocksync.py

## blocksync.py
#!/usr/bin/env python
"""
Synchronise block devices over the network

Copyright 2006-2008 Justin Azoff <justin@bouncybouncy.net>
Copyright 2011 Robert Coup <robert@coup.net.nz>
Copyright 2012 Holger Ernst <info@ernstdatenmedien.de>
License: GPL

This version is optimized for script usage - no human input needed.
Tested with Python 2.5.2 (Debian 5) and Python 2.6.6 (Debian 6).
Should run on Python 2.x.

Getting started:

* Copy blocksync.py to the home directory on the remote host + make executable
* Make sure your remote user is appropriately privileged.
* Make sure your local user can ssh to the remote host via publickey
* Invoke:
    python blocksync.py /dev/source user@remotehost /dev/dest -i pubkeyfile
"""

import sys
from zlib import adler32
import subprocess
import time

SAME = "same\n"
DIFF = "diff\n"


def do_open(f, mode):
    f = open(f, mode)
    f.seek(0, 2)
    size = f.tell()
    f.seek(0)
    return f, size


def getblocks(f, blocksize):
    while 1:
        block = f.read(blocksize)
        if not block:
            break
        yield block


def server(dev, blocksize):
    print dev, blocksize
    f, size = do_open(dev, 'r+')
    print size
    sys.stdout.flush()

    for block in getblocks(f, blocksize):
        print  "%08x" % (adler32(block) & 0xFFFFFFFF)
        sys.stdout.flush()
        res = sys.stdin.readline()
        if res != SAME:
            newblock = sys.stdin.read(blocksize)
            f.seek(-len(newblock), 1)
            f.write(newblock)

def sync(pause, srcdev, dsthost, dstdev=None, blocksize=1024 * 1024, keyfile=None):

    if not dstdev:
        dstdev = srcdev

    print "Block size is %0.1f MB" % (float(blocksize) / (1024 * 1024))

    if pause:
        # sleep() wants seconds...
        pause_ms=float(pause)/1000
        print "Slowing down for %d ms/block (%0.4f sec/block)" % (pause, pause_ms)

    if keyfile:
        keyfile="-i %s" % keyfile
    else:
        keyfile=" "
    # I removed sudo here
    cmd = ['ssh', '-c', 'blowfish', dsthost, './blocksync.py', 'server', dstdev, '-b', str(blocksize), keyfile]
    print "Running: %s" % " ".join(cmd)

    p = subprocess.Popen(cmd, bufsize=0, stdin=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=True)
    p_in, p_out = p.stdin, p.stdout

    line = p_out.readline()
    p.poll()
    if p.returncode is not None:
        print "Error connecting to or invoking blocksync on the remote host!"
        sys.exit(1)

    a, b = line.split()
    if a != dstdev:
        print "Dest device (%s) doesn't match with the remote host (%s)!" % (dstdev, a)
        sys.exit(1)
    # Changed that. Sometimes its tricky to produce the exact size. A larger destination dev does well!
    if int(b) < blocksize:
        print "Source block size (%d) doesn't fit into the remote host (%d)!" % (blocksize, int(b))
        sys.exit(1)

    try:
        f, size = do_open(srcdev, 'r')
    except Exception, e:
        print "Error accessing source device! %s" % e
        sys.exit(1)

    line = p_out.readline()
    p.poll()
    if p.returncode is not None:
        print "Error accessing device on remote host!"
        sys.exit(1)
    remote_size = int(line)
    # Changed that.
    if size > remote_size:
        print "Source device size (%d) doesn't fit into remote device size (%d)!" % (size, remote_size)
        sys.exit(1)

    same_blocks = diff_blocks = 0

    print "Starting sync..."
    t0 = time.time()
    t_last = t0
    for i, l_block in enumerate(getblocks(f, blocksize)):
        l_sum = "%08x" % (adler32(l_block) & 0xFFFFFFFF)
        r_sum = p_out.readline().strip()
        if pause:
            time.sleep(pause_ms)

        if l_sum == r_sum:
            p_in.write(SAME)
            p_in.flush()
            same_blocks += 1
        else:
            p_in.write(DIFF)
            p_in.flush()
            p_in.write(l_block)
            p_in.flush()
            diff_blocks += 1
    #Viewing progress makes no sense in batch operations, so only the result is shown
    rate = (i + 1.0) * blocksize / (1024.0 * 1024.0) / (time.time() - t0)
    print "Completed in %d seconds" % (time.time() - t0)
    print "same: %d, diff: %d, %d/%d, %5.1f MB/s\n####################################\n" % (same_blocks, diff_blocks, same_blocks + diff_blocks, size / blocksize, rate),

    return same_blocks, diff_blocks

if __name__ == "__main__":
    from optparse import OptionParser
    parser = OptionParser(usage="[nice] %prog [options] /dev/source user@remotehost [/dev/dest]")
    parser.add_option("-b", "--blocksize", dest="blocksize", action="store", type="int", help="block size (bytes), defaults to 1MB", default=1024 * 1024)
    parser.add_option("-i", "--id", dest="keyfile", help="ssh publickey file")
    parser.add_option("-p", "--pause", dest="pause", type="int", help="pause [ms] between processing blocks, reduces system load, defaults to 0", default=None)
    (options, args) = parser.parse_args()

    if len(args) < 2:
        parser.print_help()
        print __doc__
        sys.exit(1)

    if args[0] == 'server':
        dstdev = args[1]
        server(dstdev, options.blocksize)
    else:
        srcdev = args[0]
        dsthost = args[1]
        if len(args) > 2:
            dstdev = args[2]
        else:
            dstdev = None
        sync(options.pause, srcdev, dsthost, dstdev, options.blocksize, options.keyfile)
	#!/usr/bin/env python
	"""
	Synchronise block devices over the network

	Copyright 2006-2008 Justin Azoff <justin@bouncybouncy.net>
	Copyright 2011 Robert Coup <robert@coup.net.nz>
	Copyright 2012 Holger Ernst <info@ernstdatenmedien.de>
	License: GPL

	This version is optimized for script usage - no human input needed.
	Tested with Python 2.5.2 (Debian 5) and Python 2.6.6 (Debian 6).
	Should run on Python 2.x.

	Getting started:

	* Copy blocksync.py to the home directory on the remote host + make executable
	* Make sure your remote user is appropriately privileged.
	* Make sure your local user can ssh to the remote host via publickey
	* Invoke:
	python blocksync.py /dev/source user@remotehost /dev/dest -i pubkeyfile
	"""

	import sys
	from zlib import adler32
	import subprocess
	import time

	SAME = "same\n"
	DIFF = "diff\n"


	def do_open(f, mode):
	f = open(f, mode)
	f.seek(0, 2)
	size = f.tell()
	f.seek(0)
	return f, size


	def getblocks(f, blocksize):
	while 1:
	block = f.read(blocksize)
	if not block:
	break
	yield block


	def server(dev, blocksize):
	print dev, blocksize
	f, size = do_open(dev, 'r+')
	print size
	sys.stdout.flush()

	for block in getblocks(f, blocksize):
	print "%08x" % (adler32(block) & 0xFFFFFFFF)
	sys.stdout.flush()
	res = sys.stdin.readline()
	if res != SAME:
	newblock = sys.stdin.read(blocksize)
	f.seek(-len(newblock), 1)
	f.write(newblock)

	def sync(pause, srcdev, dsthost, dstdev=None, blocksize=1024 * 1024, keyfile=None):

	if not dstdev:
	dstdev = srcdev

	print "Block size is %0.1f MB" % (float(blocksize) / (1024 * 1024))

	if pause:
	# sleep() wants seconds...
	pause_ms=float(pause)/1000
	print "Slowing down for %d ms/block (%0.4f sec/block)" % (pause, pause_ms)

	if keyfile:
	keyfile="-i %s" % keyfile
	else:
	keyfile=" "
	# I removed sudo here
	cmd = ['ssh', '-c', 'blowfish', dsthost, './blocksync.py', 'server', dstdev, '-b', str(blocksize), keyfile]
	print "Running: %s" % " ".join(cmd)

	p = subprocess.Popen(cmd, bufsize=0, stdin=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=True)
	p_in, p_out = p.stdin, p.stdout

	line = p_out.readline()
	p.poll()
	if p.returncode is not None:
	print "Error connecting to or invoking blocksync on the remote host!"
	sys.exit(1)

	a, b = line.split()
	if a != dstdev:
	print "Dest device (%s) doesn't match with the remote host (%s)!" % (dstdev, a)
	sys.exit(1)
	# Changed that. Sometimes its tricky to produce the exact size. A larger destination dev does well!
	if int(b) < blocksize:
	print "Source block size (%d) doesn't fit into the remote host (%d)!" % (blocksize, int(b))
	sys.exit(1)

	try:
	f, size = do_open(srcdev, 'r')
	except Exception, e:
	print "Error accessing source device! %s" % e
	sys.exit(1)

	line = p_out.readline()
	p.poll()
	if p.returncode is not None:
	print "Error accessing device on remote host!"
	sys.exit(1)
	remote_size = int(line)
	# Changed that.
	if size > remote_size:
	print "Source device size (%d) doesn't fit into remote device size (%d)!" % (size, remote_size)
	sys.exit(1)

	same_blocks = diff_blocks = 0

	print "Starting sync..."
	t0 = time.time()
	t_last = t0
	for i, l_block in enumerate(getblocks(f, blocksize)):
	l_sum = "%08x" % (adler32(l_block) & 0xFFFFFFFF)
	r_sum = p_out.readline().strip()
	if pause:
	time.sleep(pause_ms)

	if l_sum == r_sum:
	p_in.write(SAME)
	p_in.flush()
	same_blocks += 1
	else:
	p_in.write(DIFF)
	p_in.flush()
	p_in.write(l_block)
	p_in.flush()
	diff_blocks += 1
	#Viewing progress makes no sense in batch operations, so only the result is shown
	rate = (i + 1.0) * blocksize / (1024.0 * 1024.0) / (time.time() - t0)
	print "Completed in %d seconds" % (time.time() - t0)
	print "same: %d, diff: %d, %d/%d, %5.1f MB/s\n####################################\n" % (same_blocks, diff_blocks, same_blocks + diff_blocks, size / blocksize, rate),

	return same_blocks, diff_blocks

	if __name__ == "__main__":
	from optparse import OptionParser
	parser = OptionParser(usage="[nice] %prog [options] /dev/source user@remotehost [/dev/dest]")
	parser.add_option("-b", "--blocksize", dest="blocksize", action="store", type="int", help="block size (bytes), defaults to 1MB", default=1024 * 1024)
	parser.add_option("-i", "--id", dest="keyfile", help="ssh publickey file")
	parser.add_option("-p", "--pause", dest="pause", type="int", help="pause [ms] between processing blocks, reduces system load, defaults to 0", default=None)
	(options, args) = parser.parse_args()

	if len(args) < 2:
	parser.print_help()
	print __doc__
	sys.exit(1)

	if args[0] == 'server':
	dstdev = args[1]
	server(dstdev, options.blocksize)
	else:
	srcdev = args[0]
	dsthost = args[1]
	if len(args) > 2:
	dstdev = args[2]
	else:
	dstdev = None
	sync(options.pause, srcdev, dsthost, dstdev, options.blocksize, options.keyfile)