Skip to content

Instantly share code, notes, and snippets.

@holgere
Forked from rcoup/blocksync.py
Last active October 19, 2017 16:26
Show Gist options
  • Save holgere/1843764 to your computer and use it in GitHub Desktop.
Save holgere/1843764 to your computer and use it in GitHub Desktop.
Block device sync between remote hosts. Based off http://www.bouncybouncy.net/programs/blocksync.py
#!/usr/bin/env python
"""
Synchronise block devices over the network
Copyright 2006-2008 Justin Azoff <justin@bouncybouncy.net>
Copyright 2011 Robert Coup <robert@coup.net.nz>
Copyright 2012 Holger Ernst <info@ernstdatenmedien.de>
License: GPL
This version is optimized for script usage - no human input needed.
Tested with Python 2.5.2 (Debian 5) and Python 2.6.6 (Debian 6).
Should run on Python 2.x.
Getting started:
* Copy blocksync.py to the home directory on the remote host + make executable
* Make sure your remote user is appropriately privileged.
* Make sure your local user can ssh to the remote host via publickey
* Invoke:
python blocksync.py /dev/source user@remotehost /dev/dest -i pubkeyfile
"""
import sys
from zlib import adler32
import subprocess
import time
SAME = "same\n"
DIFF = "diff\n"
def do_open(f, mode):
f = open(f, mode)
f.seek(0, 2)
size = f.tell()
f.seek(0)
return f, size
def getblocks(f, blocksize):
while 1:
block = f.read(blocksize)
if not block:
break
yield block
def server(dev, blocksize):
print dev, blocksize
f, size = do_open(dev, 'r+')
print size
sys.stdout.flush()
for block in getblocks(f, blocksize):
print "%08x" % (adler32(block) & 0xFFFFFFFF)
sys.stdout.flush()
res = sys.stdin.readline()
if res != SAME:
newblock = sys.stdin.read(blocksize)
f.seek(-len(newblock), 1)
f.write(newblock)
def sync(pause, srcdev, dsthost, dstdev=None, blocksize=1024 * 1024, keyfile=None):
if not dstdev:
dstdev = srcdev
print "Block size is %0.1f MB" % (float(blocksize) / (1024 * 1024))
if pause:
# sleep() wants seconds...
pause_ms=float(pause)/1000
print "Slowing down for %d ms/block (%0.4f sec/block)" % (pause, pause_ms)
if keyfile:
keyfile="-i %s" % keyfile
else:
keyfile=" "
# I removed sudo here
cmd = ['ssh', '-c', 'blowfish', dsthost, './blocksync.py', 'server', dstdev, '-b', str(blocksize), keyfile]
print "Running: %s" % " ".join(cmd)
p = subprocess.Popen(cmd, bufsize=0, stdin=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=True)
p_in, p_out = p.stdin, p.stdout
line = p_out.readline()
p.poll()
if p.returncode is not None:
print "Error connecting to or invoking blocksync on the remote host!"
sys.exit(1)
a, b = line.split()
if a != dstdev:
print "Dest device (%s) doesn't match with the remote host (%s)!" % (dstdev, a)
sys.exit(1)
# Changed that. Sometimes its tricky to produce the exact size. A larger destination dev does well!
if int(b) < blocksize:
print "Source block size (%d) doesn't fit into the remote host (%d)!" % (blocksize, int(b))
sys.exit(1)
try:
f, size = do_open(srcdev, 'r')
except Exception, e:
print "Error accessing source device! %s" % e
sys.exit(1)
line = p_out.readline()
p.poll()
if p.returncode is not None:
print "Error accessing device on remote host!"
sys.exit(1)
remote_size = int(line)
# Changed that.
if size > remote_size:
print "Source device size (%d) doesn't fit into remote device size (%d)!" % (size, remote_size)
sys.exit(1)
same_blocks = diff_blocks = 0
print "Starting sync..."
t0 = time.time()
t_last = t0
for i, l_block in enumerate(getblocks(f, blocksize)):
l_sum = "%08x" % (adler32(l_block) & 0xFFFFFFFF)
r_sum = p_out.readline().strip()
if pause:
time.sleep(pause_ms)
if l_sum == r_sum:
p_in.write(SAME)
p_in.flush()
same_blocks += 1
else:
p_in.write(DIFF)
p_in.flush()
p_in.write(l_block)
p_in.flush()
diff_blocks += 1
#Viewing progress makes no sense in batch operations, so only the result is shown
rate = (i + 1.0) * blocksize / (1024.0 * 1024.0) / (time.time() - t0)
print "Completed in %d seconds" % (time.time() - t0)
print "same: %d, diff: %d, %d/%d, %5.1f MB/s\n####################################\n" % (same_blocks, diff_blocks, same_blocks + diff_blocks, size / blocksize, rate),
return same_blocks, diff_blocks
if __name__ == "__main__":
from optparse import OptionParser
parser = OptionParser(usage="[nice] %prog [options] /dev/source user@remotehost [/dev/dest]")
parser.add_option("-b", "--blocksize", dest="blocksize", action="store", type="int", help="block size (bytes), defaults to 1MB", default=1024 * 1024)
parser.add_option("-i", "--id", dest="keyfile", help="ssh publickey file")
parser.add_option("-p", "--pause", dest="pause", type="int", help="pause [ms] between processing blocks, reduces system load, defaults to 0", default=None)
(options, args) = parser.parse_args()
if len(args) < 2:
parser.print_help()
print __doc__
sys.exit(1)
if args[0] == 'server':
dstdev = args[1]
server(dstdev, options.blocksize)
else:
srcdev = args[0]
dsthost = args[1]
if len(args) > 2:
dstdev = args[2]
else:
dstdev = None
sync(options.pause, srcdev, dsthost, dstdev, options.blocksize, options.keyfile)
@holgere
Copy link
Author

holgere commented Feb 16, 2012

This version is designed for use in script operation (i.e. cronjob) so to establish the ssh connection a publickey file is needed. Use parameter -i as in ssh itself.
You can reduce the load factor of this script (disk io and such) with parameter -p. A value in milliseconds will make it wait before processing another block, making it possible for other processes to use the resources. Try 20 to 50 for a starter...
Just to clarify: This script works on files as well.

@holgere
Copy link
Author

holgere commented Nov 12, 2013

@ramcq
Copy link

ramcq commented Jun 16, 2014

Hi there, I've merged in most of your changes into a new version which can detect interactive vs non-interactive use by itself, and cleaned up some code/bugs - https://gist.github.com/ramcq/0dc76d494598eb09740f/revisions

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment