Created
July 19, 2013 02:19
-
-
Save chaozh/6034608 to your computer and use it in GitHub Desktop.
a script to analyze innodb's table file, some problems about rec size
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
#encoding=utf-8 | |
import os | |
import sys | |
import getopt | |
import struct | |
def printf(format, *args): | |
print format % args | |
PAGE_SIZE = 16 * 1024 | |
INVALID_PAGE_NO = 0xFFFFFFFF | |
FIL_PAGE_SPACE_OR_CHKSUM = 0 | |
FIL_PAGE_OFFSET = 4 | |
FIL_PAGE_PREV = 8 | |
FIL_PAGE_NEXT = 12 | |
FIL_PAGE_LSN = 16 | |
FIL_PAGE_TYPE = 24 | |
# Page type of index | |
FIL_PAGE_INDEX = 17855 | |
# Offset of the data on the page | |
FIL_PAGE_DATA = 38 | |
# On a page of any file segment, data may be put starting from this offset | |
FSEG_PAGE_DATA = FIL_PAGE_DATA | |
FSEG_HEADER_SIZE = 10 | |
# Offset of page header | |
PAGE_HEADER = FIL_PAGE_DATA | |
# Offset of data on the page | |
PAGE_DATA = PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE | |
# Records in the page | |
PAGE_N_RECS = 16 | |
#Level of the node in the index tree; the leaf level is the level 0 */ | |
PAGE_LEVEL = 26 | |
REC_N_NEW_EXTRA_BYTES = 5 | |
PAGE_NEW_INFIMUM = PAGE_DATA + REC_N_NEW_EXTRA_BYTES | |
PAGE_NEW_SUPREMUM = PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8 | |
REC_NEXT = 2 | |
REC_NEW_INFO_BITS = 5 | |
REC_INFO_BITS_MASK = 0xF0 | |
REC_INFO_MIN_REC_FLAG = 0x10 | |
REC_INFO_DELETED_FLAG = 0x20 | |
class IBD_page: | |
def __init__(self, buf, page_no): | |
self.buf = buf | |
self.page_no = page_no | |
def type(self): | |
return IBD_page.get_int_n(self, FIL_PAGE_TYPE, 2) | |
def in_btr(self): | |
return IBD_page.type(self) == FIL_PAGE_INDEX | |
def is_btr_root(self): | |
return IBD_page.in_btr(self) and IBD_page.prev_page(self) == INVALID_PAGE_NO and IBD_page.next_page(self) == INVALID_PAGE_NO | |
def level(self): | |
return IBD_page.get_int_n(self,PAGE_HEADER + PAGE_LEVEL,2) | |
def record_count(self): | |
return IBD_page.get_int_n(self,PAGE_HEADER + PAGE_N_RECS,4) | |
def prev_page(self): | |
return IBD_page.get_int_n(self,FIL_PAGE_PREV,4) | |
def next_page(self): | |
return IBD_page.get_int_n(self,FIL_PAGE_NEXT,4) | |
def first_rec(self): | |
return IBD_page.next_rec(self,PAGE_NEW_INFIMUM) #FIX | |
def next_rec(self, rec): | |
# The offset of next record is against to current record's position | |
offset = IBD_page.get_int_n(self, rec - REC_NEXT,2) + rec | |
offset = (offset + PAGE_SIZE * self.page_no) & (PAGE_SIZE - 1) | |
if (offset == PAGE_NEW_SUPREMUM): | |
offset = 0 | |
return offset | |
def rec_key_str(self, rec, len): | |
return IBD_page.get_str(self, rec, len) | |
def rec_pointer(self, rec, key_len): | |
if (level == 0): | |
return 0 | |
else: | |
# Pointer is the last field of index row | |
return IBD_page.get_int_n(self,rec+key_len,4) | |
def rec_flags(self,rec): | |
flag = IBD_page.get_int_n(self,rec - REC_NEW_INFO_BITS,1) | |
str="" | |
if (flag & REC_INFO_DELETED_FLAG == REC_INFO_DELETED_FLAG): | |
str = "D" | |
if (flag & REC_INFO_MIN_REC_FLAG == REC_INFO_MIN_REC_FLAG): | |
str = str + "M" | |
return str | |
# Get a n byte number from page buffer | |
def get_int_n(self, offset, len): | |
ret = self.buf[offset:offset+len] | |
#unpack in big-edian | |
if len == 1: | |
return struct.unpack(">B",ret)[0] | |
if len == 2: | |
return struct.unpack(">H",ret)[0] | |
if len == 4: | |
return struct.unpack(">I",ret)[0] | |
if len == 8: | |
return struct.unpack(">L",ret)[0] | |
# Get a 'len' long string from page buffer | |
def get_str(self, offset, len): | |
return self.buf[offset:offset+len-1] | |
class IBD_file: | |
def __init__(self, filename): | |
self.file = file(filename, "r") | |
self.size = os.path.getsize(filename) | |
def __del__(self): | |
self.file.close() | |
def read_page(self,page_no): | |
if (page_no * PAGE_SIZE > self.size): | |
printf("Page No.(%d) is too large", page_no) | |
self.file.seek(page_no * PAGE_SIZE,0) | |
return IBD_page(self.file.read(PAGE_SIZE), page_no) | |
def page_count(self): | |
return self.size / PAGE_SIZE | |
def btr_roots(self): | |
a = [] | |
for i in range(IBD_file.page_count(self)): | |
page = IBD_file.read_page(self,i) | |
if (page.is_btr_root()): | |
a = a + [i] | |
return a | |
def print_btr_roots(self,print_key_len): | |
for page_no in IBD_file.btr_roots(self) : | |
page = IBD_file.read_page(self,page_no) | |
printf("B-Tree Root Page: %d, Level: %d, Records: %d", page_no, | |
page.level(), page.record_count()) | |
rec = page.first_rec() | |
while rec != 0 : | |
printf(" Key(%s)", page.rec_key_str(rec, print_key_len)) | |
rec = page.next_rec(rec) | |
def print_btr(self, root, key_len, print_key_len): | |
page_no = root | |
next_level_page_no = -1 | |
while page_no > 0 : | |
page = IBD_file.read_page(self, page_no) | |
if (not page) : | |
print "Reading page failed. You probable set a wrong key length" | |
sys.exit(1) | |
printf("Page: %d, Level: %d, Records: %d\n", page_no.to_s, page.level, | |
page.record_count) | |
rec = page.first_rec | |
while rec != 0 : | |
if page.level == 0 : | |
printf(" Flags(%s), Key(%s)\n", page.rec_flags(rec), | |
page.rec_key_str(rec, print_key_len)) | |
else : | |
printf(" Flags(%s), Key(%s), Pointer(%d)\n", page.rec_flags(rec), | |
page.rec_key_str(rec, print_key_len), | |
page.rec_pointer(rec, key_len)) | |
if next_level_page_no == -1 : | |
next_level_page_no = page.rec_pointer(rec, key_len) | |
rec = page.next_rec(rec) | |
page_no = page.next_page | |
if page_no == 0xFFFFFFFF : | |
page_no = next_level_page_no | |
next_level_page_no = -1 | |
def print_help(): | |
print '''ibd-analizer useage: | |
./ibd-analizer.py [OPTIONS] <ibdfile> | |
OPTIONS | |
======= | |
-h, --help: | |
show help. | |
-R, --roots: | |
print all root pages of b-trees. | |
-r n, --root-page n: | |
print the b-tree that starts at 'n' page. it will be ignored if '-R' is set. | |
-L n, --key-len n: | |
the key of the b-tree is 'n' bytes long. It has to be set with '-r' together. | |
In a clustered index b-tree, it is the sum of all primary key fields(length). | |
In a secondary index b-tree, it is the sum of all the secondary key fields and | |
primary key fields. E.g. | |
CREATE TABLE t1(c1 char(4) PRIMARY KEY, c2 char(5), INDEX(c2)); | |
In clustered index b-tree, the key length is 4. | |
In secondary index b-tree, the key length is 9. | |
-l n, --print-key-len n: | |
print only the first 'n' bytes of the key. The first 4 bytes will be printed, | |
if it is not set. | |
OUTPUT FIELDS | |
============= | |
Page | |
Page No. of current page. | |
Level | |
B-Tree level of current page. | |
Records | |
How many records(include deleted records) in the page. | |
Flags | |
M - It is the minimum key record in the same level of the b-tree. | |
D - The record was marked as 'deleted'. It has been delete by user. | |
Key | |
Value of the key. | |
Pointer | |
Page No. of the next level page belongs to the key. | |
EOF | |
''' | |
def main(): | |
roots = False | |
root_page = 0 | |
key_len = 0 | |
print_key_len = 4 # Print only the first 4 bytes of keys, by default | |
try: | |
opts,args = getopt.getopt(sys.argv[1:],"hRr:L:l:",["help","roots","root-page=","key-len=","print-key-len="]) | |
except getopt.GetoptError: | |
print "Missing ibdfile argument!" | |
print_help() | |
sys.exit(0) | |
if(not opts): | |
print "Missing ibdfile argument!" | |
print_help() | |
sys.exit(0) | |
for opt,arg in opts: | |
if opt in ('-h','--help'): | |
print_help() | |
if opt in ('-R''--roots'): | |
roots = True | |
if opt in ('-r','--root-page'): | |
root_page = int(arg) | |
if opt in ('-L','--key-len'): | |
key_len = int(arg) | |
if opt in ('-l','--print-key-len'): | |
print_key_len = int(arg) | |
f = IBD_file(args[0]) | |
if roots or not root_page : | |
f.print_btr_roots(print_key_len) | |
else: | |
if not key_len : | |
print "--key-len is not set. It has to be set when you print a b-tree." | |
sys.exit(1) | |
f.print_btr(root_page, key_len, print_key_len) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment