Skip to content

Instantly share code, notes, and snippets.

@chaozh
Created July 19, 2013 02:19
Show Gist options
  • Save chaozh/6034608 to your computer and use it in GitHub Desktop.
Save chaozh/6034608 to your computer and use it in GitHub Desktop.
a script to analyze innodb's table file, some problems about rec size
#! /usr/bin/env python
#encoding=utf-8
import os
import sys
import getopt
import struct
def printf(format, *args):
print format % args
PAGE_SIZE = 16 * 1024
INVALID_PAGE_NO = 0xFFFFFFFF
FIL_PAGE_SPACE_OR_CHKSUM = 0
FIL_PAGE_OFFSET = 4
FIL_PAGE_PREV = 8
FIL_PAGE_NEXT = 12
FIL_PAGE_LSN = 16
FIL_PAGE_TYPE = 24
# Page type of index
FIL_PAGE_INDEX = 17855
# Offset of the data on the page
FIL_PAGE_DATA = 38
# On a page of any file segment, data may be put starting from this offset
FSEG_PAGE_DATA = FIL_PAGE_DATA
FSEG_HEADER_SIZE = 10
# Offset of page header
PAGE_HEADER = FIL_PAGE_DATA
# Offset of data on the page
PAGE_DATA = PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE
# Records in the page
PAGE_N_RECS = 16
#Level of the node in the index tree; the leaf level is the level 0 */
PAGE_LEVEL = 26
REC_N_NEW_EXTRA_BYTES = 5
PAGE_NEW_INFIMUM = PAGE_DATA + REC_N_NEW_EXTRA_BYTES
PAGE_NEW_SUPREMUM = PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8
REC_NEXT = 2
REC_NEW_INFO_BITS = 5
REC_INFO_BITS_MASK = 0xF0
REC_INFO_MIN_REC_FLAG = 0x10
REC_INFO_DELETED_FLAG = 0x20
class IBD_page:
def __init__(self, buf, page_no):
self.buf = buf
self.page_no = page_no
def type(self):
return IBD_page.get_int_n(self, FIL_PAGE_TYPE, 2)
def in_btr(self):
return IBD_page.type(self) == FIL_PAGE_INDEX
def is_btr_root(self):
return IBD_page.in_btr(self) and IBD_page.prev_page(self) == INVALID_PAGE_NO and IBD_page.next_page(self) == INVALID_PAGE_NO
def level(self):
return IBD_page.get_int_n(self,PAGE_HEADER + PAGE_LEVEL,2)
def record_count(self):
return IBD_page.get_int_n(self,PAGE_HEADER + PAGE_N_RECS,4)
def prev_page(self):
return IBD_page.get_int_n(self,FIL_PAGE_PREV,4)
def next_page(self):
return IBD_page.get_int_n(self,FIL_PAGE_NEXT,4)
def first_rec(self):
return IBD_page.next_rec(self,PAGE_NEW_INFIMUM) #FIX
def next_rec(self, rec):
# The offset of next record is against to current record's position
offset = IBD_page.get_int_n(self, rec - REC_NEXT,2) + rec
offset = (offset + PAGE_SIZE * self.page_no) & (PAGE_SIZE - 1)
if (offset == PAGE_NEW_SUPREMUM):
offset = 0
return offset
def rec_key_str(self, rec, len):
return IBD_page.get_str(self, rec, len)
def rec_pointer(self, rec, key_len):
if (level == 0):
return 0
else:
# Pointer is the last field of index row
return IBD_page.get_int_n(self,rec+key_len,4)
def rec_flags(self,rec):
flag = IBD_page.get_int_n(self,rec - REC_NEW_INFO_BITS,1)
str=""
if (flag & REC_INFO_DELETED_FLAG == REC_INFO_DELETED_FLAG):
str = "D"
if (flag & REC_INFO_MIN_REC_FLAG == REC_INFO_MIN_REC_FLAG):
str = str + "M"
return str
# Get a n byte number from page buffer
def get_int_n(self, offset, len):
ret = self.buf[offset:offset+len]
#unpack in big-edian
if len == 1:
return struct.unpack(">B",ret)[0]
if len == 2:
return struct.unpack(">H",ret)[0]
if len == 4:
return struct.unpack(">I",ret)[0]
if len == 8:
return struct.unpack(">L",ret)[0]
# Get a 'len' long string from page buffer
def get_str(self, offset, len):
return self.buf[offset:offset+len-1]
class IBD_file:
def __init__(self, filename):
self.file = file(filename, "r")
self.size = os.path.getsize(filename)
def __del__(self):
self.file.close()
def read_page(self,page_no):
if (page_no * PAGE_SIZE > self.size):
printf("Page No.(%d) is too large", page_no)
self.file.seek(page_no * PAGE_SIZE,0)
return IBD_page(self.file.read(PAGE_SIZE), page_no)
def page_count(self):
return self.size / PAGE_SIZE
def btr_roots(self):
a = []
for i in range(IBD_file.page_count(self)):
page = IBD_file.read_page(self,i)
if (page.is_btr_root()):
a = a + [i]
return a
def print_btr_roots(self,print_key_len):
for page_no in IBD_file.btr_roots(self) :
page = IBD_file.read_page(self,page_no)
printf("B-Tree Root Page: %d, Level: %d, Records: %d", page_no,
page.level(), page.record_count())
rec = page.first_rec()
while rec != 0 :
printf(" Key(%s)", page.rec_key_str(rec, print_key_len))
rec = page.next_rec(rec)
def print_btr(self, root, key_len, print_key_len):
page_no = root
next_level_page_no = -1
while page_no > 0 :
page = IBD_file.read_page(self, page_no)
if (not page) :
print "Reading page failed. You probable set a wrong key length"
sys.exit(1)
printf("Page: %d, Level: %d, Records: %d\n", page_no.to_s, page.level,
page.record_count)
rec = page.first_rec
while rec != 0 :
if page.level == 0 :
printf(" Flags(%s), Key(%s)\n", page.rec_flags(rec),
page.rec_key_str(rec, print_key_len))
else :
printf(" Flags(%s), Key(%s), Pointer(%d)\n", page.rec_flags(rec),
page.rec_key_str(rec, print_key_len),
page.rec_pointer(rec, key_len))
if next_level_page_no == -1 :
next_level_page_no = page.rec_pointer(rec, key_len)
rec = page.next_rec(rec)
page_no = page.next_page
if page_no == 0xFFFFFFFF :
page_no = next_level_page_no
next_level_page_no = -1
def print_help():
print '''ibd-analizer useage:
./ibd-analizer.py [OPTIONS] <ibdfile>
OPTIONS
=======
-h, --help:
show help.
-R, --roots:
print all root pages of b-trees.
-r n, --root-page n:
print the b-tree that starts at 'n' page. it will be ignored if '-R' is set.
-L n, --key-len n:
the key of the b-tree is 'n' bytes long. It has to be set with '-r' together.
In a clustered index b-tree, it is the sum of all primary key fields(length).
In a secondary index b-tree, it is the sum of all the secondary key fields and
primary key fields. E.g.
CREATE TABLE t1(c1 char(4) PRIMARY KEY, c2 char(5), INDEX(c2));
In clustered index b-tree, the key length is 4.
In secondary index b-tree, the key length is 9.
-l n, --print-key-len n:
print only the first 'n' bytes of the key. The first 4 bytes will be printed,
if it is not set.
OUTPUT FIELDS
=============
Page
Page No. of current page.
Level
B-Tree level of current page.
Records
How many records(include deleted records) in the page.
Flags
M - It is the minimum key record in the same level of the b-tree.
D - The record was marked as 'deleted'. It has been delete by user.
Key
Value of the key.
Pointer
Page No. of the next level page belongs to the key.
EOF
'''
def main():
roots = False
root_page = 0
key_len = 0
print_key_len = 4 # Print only the first 4 bytes of keys, by default
try:
opts,args = getopt.getopt(sys.argv[1:],"hRr:L:l:",["help","roots","root-page=","key-len=","print-key-len="])
except getopt.GetoptError:
print "Missing ibdfile argument!"
print_help()
sys.exit(0)
if(not opts):
print "Missing ibdfile argument!"
print_help()
sys.exit(0)
for opt,arg in opts:
if opt in ('-h','--help'):
print_help()
if opt in ('-R''--roots'):
roots = True
if opt in ('-r','--root-page'):
root_page = int(arg)
if opt in ('-L','--key-len'):
key_len = int(arg)
if opt in ('-l','--print-key-len'):
print_key_len = int(arg)
f = IBD_file(args[0])
if roots or not root_page :
f.print_btr_roots(print_key_len)
else:
if not key_len :
print "--key-len is not set. It has to be set when you print a b-tree."
sys.exit(1)
f.print_btr(root_page, key_len, print_key_len)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment