shaitan/eblob_scan.py

## eblob_scan.py
#!/usr/bin/python
# -*- coding: utf-8 -*-

# =============================================================================
# 2013+ Copyright (c) Kirill Smorodinnikov <shaitkir@gmail.com>
# All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# =============================================================================
import elliptics

def print_flags(flags):
    sflags = ['remove', 'no_csum', 'compress', 'write_return', 'append', 'overwrite', 'exthdr',
              'uncommitted', 'chunked_csum']
    lflags = '|'.join(x for n, x in enumerate(sflags) if flags & (1 << n))
    return '{0:#6x} [{1}]'.format(flags, lflags)


class disk_constrol:
    size = 96
    def __init__(self, data, offset):
        import struct
        assert len(data) == disk_constrol.size
        unp = struct.unpack("64B4Q", data)
        self.id = elliptics.Id(list(unp[:64]), 0)
        self.flags = unp[64]
        self.data_size = unp[65]
        self.disk_size = unp[66]
        self.position = unp[67]
        self.file_offset = offset

    def __str__(self):
        return "{0}: data_size: {1}\tdisk_size: {2}\tposition: {3} file_offset: {4} flags: {5:x} {6}".format(str(self.id), self.data_size, self.disk_size, self.position, self.file_offset, self.flags, print_flags(self.flags))

    def __iter__(self):
        return iter((self.id, self.flags, self.data_size, self.disk_size, self.position))

def scan_index(path, out=True):
    dcs = []
    with open(path, 'r') as index_f:
        while True:
            offset = index_f.tell()
            rdata = index_f.read(disk_constrol.size)
            if len(rdata) != disk_constrol.size:
                break
            dc = disk_constrol(rdata, offset)
            if out: print dc
            dcs.append(dc)
    dcs = sorted(dcs, key=lambda x: (x.id, x.position))
    return dcs

def scan_blob(path, out=True):
    dcs = []
    with open(path, 'r') as blob_f:
        while True:
            offset = blob_f.tell()
            rdata = blob_f.read(disk_constrol.size)
            if len(rdata) != disk_constrol.size:
                break
            dc = disk_constrol(rdata, offset)
            if out: print dc
            dcs.append(dc)
            blob_f.seek(dc.disk_size - disk_constrol.size, 1)
    dcs = sorted(dcs, key=lambda x: x.id)
    return dcs

def filter_files(t, files):
    if t == 'si':
        return (f for f in files if f.endswith('.index.sorted'))
    elif t in ['i', 'd']:
        return (f for f in files if f.endswith('.index'))
    elif t == 'b':
        return (f for f in files if not f.endswith('.index.sorted') and
                                      not f.endswith('.index') and
                                      not f.endswith('.lock'))


def diff(path, path2):
    index = scan_index(path, False)
    print len(index)
    sort = scan_index(path2, False)
    print len(sort)
    for i, k in enumerate(sort):
        if tuple(index[i]) != tuple(k):
            print 'id', index[i].id == k.id
            print 'flags', index[i].flags == k.flags
            print 'data_size', index[i].data_size == k.data_size
            print 'disk_size', index[i].disk_size == k.disk_size
            print 'position', index[i].position == k.position
            print '__dict__', index[i].__dict__ == k.__dict__
            print 'nsorted: ', index[i]
            print 'sorted:  ', k, '\n'

if __name__ == '__main__':
    import sys
    for path in filter_files(sys.argv[1], sys.argv[2:]):
        print "File: ", path
        if sys.argv[1] == 'si': scan_index(path)
        elif sys.argv[1] == 'i': scan_index(path)
        elif sys.argv[1] == 'b': scan_blob(path)
        elif sys.argv[1] == 'd': diff(path, path + '.sorted')
	#!/usr/bin/python
	# -- coding: utf-8 --

	# =============================================================================
	# 2013+ Copyright (c) Kirill Smorodinnikov <shaitkir@gmail.com>
	# All rights reserved.
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 2 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	# =============================================================================
	import elliptics

	def print_flags(flags):
	sflags = ['remove', 'no_csum', 'compress', 'write_return', 'append', 'overwrite', 'exthdr',
	'uncommitted', 'chunked_csum']
	lflags = '\|'.join(x for n, x in enumerate(sflags) if flags & (1 << n))
	return '{0:#6x} [{1}]'.format(flags, lflags)


	class disk_constrol:
	size = 96
	def __init__(self, data, offset):
	import struct
	assert len(data) == disk_constrol.size
	unp = struct.unpack("64B4Q", data)
	self.id = elliptics.Id(list(unp[:64]), 0)
	self.flags = unp[64]
	self.data_size = unp[65]
	self.disk_size = unp[66]
	self.position = unp[67]
	self.file_offset = offset

	def __str__(self):
	return "{0}: data_size: {1}\tdisk_size: {2}\tposition: {3} file_offset: {4} flags: {5:x} {6}".format(str(self.id), self.data_size, self.disk_size, self.position, self.file_offset, self.flags, print_flags(self.flags))

	def __iter__(self):
	return iter((self.id, self.flags, self.data_size, self.disk_size, self.position))

	def scan_index(path, out=True):
	dcs = []
	with open(path, 'r') as index_f:
	while True:
	offset = index_f.tell()
	rdata = index_f.read(disk_constrol.size)
	if len(rdata) != disk_constrol.size:
	break
	dc = disk_constrol(rdata, offset)
	if out: print dc
	dcs.append(dc)
	dcs = sorted(dcs, key=lambda x: (x.id, x.position))
	return dcs

	def scan_blob(path, out=True):
	dcs = []
	with open(path, 'r') as blob_f:
	while True:
	offset = blob_f.tell()
	rdata = blob_f.read(disk_constrol.size)
	if len(rdata) != disk_constrol.size:
	break
	dc = disk_constrol(rdata, offset)
	if out: print dc
	dcs.append(dc)
	blob_f.seek(dc.disk_size - disk_constrol.size, 1)
	dcs = sorted(dcs, key=lambda x: x.id)
	return dcs

	def filter_files(t, files):
	if t == 'si':
	return (f for f in files if f.endswith('.index.sorted'))
	elif t in ['i', 'd']:
	return (f for f in files if f.endswith('.index'))
	elif t == 'b':
	return (f for f in files if not f.endswith('.index.sorted') and
	not f.endswith('.index') and
	not f.endswith('.lock'))


	def diff(path, path2):
	index = scan_index(path, False)
	print len(index)
	sort = scan_index(path2, False)
	print len(sort)
	for i, k in enumerate(sort):
	if tuple(index[i]) != tuple(k):
	print 'id', index[i].id == k.id
	print 'flags', index[i].flags == k.flags
	print 'data_size', index[i].data_size == k.data_size
	print 'disk_size', index[i].disk_size == k.disk_size
	print 'position', index[i].position == k.position
	print '__dict__', index[i].__dict__ == k.__dict__
	print 'nsorted: ', index[i]
	print 'sorted: ', k, '\n'

	if __name__ == '__main__':
	import sys
	for path in filter_files(sys.argv[1], sys.argv[2:]):
	print "File: ", path
	if sys.argv[1] == 'si': scan_index(path)
	elif sys.argv[1] == 'i': scan_index(path)
	elif sys.argv[1] == 'b': scan_blob(path)
	elif sys.argv[1] == 'd': diff(path, path + '.sorted')