Skip to content

Instantly share code, notes, and snippets.

@shaitan
Last active October 20, 2015 11:13
Show Gist options
  • Save shaitan/8ce876e7791557156594 to your computer and use it in GitHub Desktop.
Save shaitan/8ce876e7791557156594 to your computer and use it in GitHub Desktop.
Script for scanning eblob blob, blob's sorted and unsorted indexes.
#!/usr/bin/python
# -*- coding: utf-8 -*-
# =============================================================================
# 2013+ Copyright (c) Kirill Smorodinnikov <shaitkir@gmail.com>
# All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# =============================================================================
import elliptics
def print_flags(flags):
sflags = ['remove', 'no_csum', 'compress', 'write_return', 'append', 'overwrite', 'exthdr',
'uncommitted', 'chunked_csum']
lflags = '|'.join(x for n, x in enumerate(sflags) if flags & (1 << n))
return '{0:#6x} [{1}]'.format(flags, lflags)
class disk_constrol:
size = 96
def __init__(self, data, offset):
import struct
assert len(data) == disk_constrol.size
unp = struct.unpack("64B4Q", data)
self.id = elliptics.Id(list(unp[:64]), 0)
self.flags = unp[64]
self.data_size = unp[65]
self.disk_size = unp[66]
self.position = unp[67]
self.file_offset = offset
def __str__(self):
return "{0}: data_size: {1}\tdisk_size: {2}\tposition: {3} file_offset: {4} flags: {5:x} {6}".format(str(self.id), self.data_size, self.disk_size, self.position, self.file_offset, self.flags, print_flags(self.flags))
def __iter__(self):
return iter((self.id, self.flags, self.data_size, self.disk_size, self.position))
def scan_index(path, out=True):
dcs = []
with open(path, 'r') as index_f:
while True:
offset = index_f.tell()
rdata = index_f.read(disk_constrol.size)
if len(rdata) != disk_constrol.size:
break
dc = disk_constrol(rdata, offset)
if out: print dc
dcs.append(dc)
dcs = sorted(dcs, key=lambda x: (x.id, x.position))
return dcs
def scan_blob(path, out=True):
dcs = []
with open(path, 'r') as blob_f:
while True:
offset = blob_f.tell()
rdata = blob_f.read(disk_constrol.size)
if len(rdata) != disk_constrol.size:
break
dc = disk_constrol(rdata, offset)
if out: print dc
dcs.append(dc)
blob_f.seek(dc.disk_size - disk_constrol.size, 1)
dcs = sorted(dcs, key=lambda x: x.id)
return dcs
def filter_files(t, files):
if t == 'si':
return (f for f in files if f.endswith('.index.sorted'))
elif t in ['i', 'd']:
return (f for f in files if f.endswith('.index'))
elif t == 'b':
return (f for f in files if not f.endswith('.index.sorted') and
not f.endswith('.index') and
not f.endswith('.lock'))
def diff(path, path2):
index = scan_index(path, False)
print len(index)
sort = scan_index(path2, False)
print len(sort)
for i, k in enumerate(sort):
if tuple(index[i]) != tuple(k):
print 'id', index[i].id == k.id
print 'flags', index[i].flags == k.flags
print 'data_size', index[i].data_size == k.data_size
print 'disk_size', index[i].disk_size == k.disk_size
print 'position', index[i].position == k.position
print '__dict__', index[i].__dict__ == k.__dict__
print 'nsorted: ', index[i]
print 'sorted: ', k, '\n'
if __name__ == '__main__':
import sys
for path in filter_files(sys.argv[1], sys.argv[2:]):
print "File: ", path
if sys.argv[1] == 'si': scan_index(path)
elif sys.argv[1] == 'i': scan_index(path)
elif sys.argv[1] == 'b': scan_blob(path)
elif sys.argv[1] == 'd': diff(path, path + '.sorted')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment