Created
December 21, 2012 16:20
-
-
Save lichengwu/4353790 to your computer and use it in GitHub Desktop.
Garbage First Log Analyse Util
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
__author__ = 'lichengwu' | |
import datetime | |
import re | |
''' | |
Garbage First Log Analyse Util | |
''' | |
class G1LogUtil: | |
__path = '' | |
# some regular expression pattern | |
# like this '2012-12-19T10:25:19' | |
__START_LINE_PATTERN = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:') | |
# like this '112M(112M)->0B(112M)' | |
__MEMORY_INFO_PATTERN = re.compile('(\d+)([M|B|K|G])\((\d+)([M|B|K|G])\)->(\d+)([M|B|K|G])\((\d+)([M|B|K|G])\)') | |
# like this '16M->16M' | |
__SIMPLE_MEMORY_INFO_PATTERN = re.compile('(\d+)([M|B|K|G])->(\d+)([M|B|K|G])') | |
# constructor | |
def __init__(self, path): | |
self.__path = path | |
""" | |
analyse G1 log for java | |
""" | |
def analyse(self): | |
# get log reader | |
reader = self.__reader(self.__path) | |
# number of GC times | |
gc_count = 0 | |
gc_count_young = 0 | |
gc_count_mixed = 0 | |
# total stop the world time | |
total_stop_time = 0.000000 | |
# max heap size | |
max_heap_size = 0 | |
# min heap size | |
min_heap_size = 0xEFFFFFFFFFFFFFFFF | |
# max eden size | |
max_eden_size = 0 | |
# min eden size | |
min_eden_size = 0xEFFFFFFFFFFFFFFFF | |
# survivor size | |
survivor_size = None | |
# total eden size for statistics | |
total_eden_size = 0 | |
# map store memory info | |
memory_usage_map = {'young_garbage_percent': [], 'heap_garbage_percent': [], 'young_usage_percent': [], | |
'heap_usage_percent': []} | |
#log start time | |
start_time = None | |
# log end time | |
finish_time = None | |
# gc work thread count | |
gc_work_thread_number = None | |
each_line = reader.next() | |
while each_line: | |
if self.__is_start_line(each_line): | |
token = each_line.split(' ') | |
total_stop_time += float(token[5]) | |
if start_time is None: | |
start_time = self.__get_datetime(token[0]) | |
finish_time = token[0] | |
gc_count += 1 | |
gc_type = token[4][1:-2] | |
if gc_type == 'young': | |
gc_count_young += 1 | |
elif gc_type == 'mixed': | |
gc_count_mixed += 1 | |
elif each_line.find(' [Eden:') == 0: | |
''' | |
parse memory info | |
''' | |
memory_info = each_line.split(' ') | |
eden_info = self.__parse_memory_info(memory_info[4]) | |
survivor_info = self.__parse_memory_info(memory_info[6]) | |
if survivor_size is None: | |
survivor_size = survivor_info[1] | |
heap_info = self.__parse_memory_info(memory_info[8]) | |
max_heap_size = max(max_heap_size, heap_info[1]) | |
min_heap_size = min(heap_info[1], min_heap_size) | |
# garbage (heap) / before gc (heap) | |
memory_usage_map['heap_garbage_percent'].append(float(heap_info[0] - heap_info[2]) / heap_info[0]) | |
# before gc (heap) / heap size | |
memory_usage_map['heap_usage_percent'].append(float(heap_info[0]) / heap_info[1]) | |
max_eden_size = max(max_eden_size, eden_info[1]) | |
min_eden_size = min(eden_info[1], min_eden_size) | |
# garbage (eden+survivor) / before gc (eden+survivor) | |
memory_usage_map['young_garbage_percent'].append( | |
float(eden_info[0] + survivor_info[0] - eden_info[2] - survivor_info[1]) / ( | |
eden_info[0] + survivor_info[0])) | |
# before gc(eden+survivor) / eden+survivor*2 | |
memory_usage_map['young_usage_percent'].append( | |
float(eden_info[0] + survivor_info[0]) / (eden_info[1] + survivor_info[1] * 2)) | |
total_eden_size += eden_info[1] | |
elif gc_work_thread_number is None and each_line.find(' [GC Worker Start') == 0: | |
gc_work_thread_number = len(each_line.strip().split(' ')) - 1 | |
each_line = reader.next() | |
finish_time = self.__get_datetime(finish_time) | |
reader.close() | |
print '''G1 log Time: | |
[%s] - [%s]''' % ( | |
start_time.strftime('%Y-%m-%d %H:%M:%S'), finish_time.strftime('%Y-%m-%d %H:%M:%S')) | |
summary = '''Memory Info: | |
Min Heap Size\t= %sM | |
Max Heap Size\t= %sM | |
Min Eden Size\t= %sM | |
Max Eden Size\t= %sM | |
Avg Eden Size\t= %sM | |
Survivor Size\t= %sM''' % ( | |
(max_heap_size / 1024), (min_heap_size / 1024), (max_eden_size / 1024), (min_eden_size / 1023), | |
(total_eden_size / gc_count / 1024), survivor_size / 1024) | |
print summary | |
gc_info = '''GC Info: | |
GC Work Threads\t= %s | |
Avg Stop Time\t= %.2fms | |
GC Throughput\t= %.2f%% | |
''' % (gc_work_thread_number, (total_stop_time * 1000 / gc_count), | |
total_stop_time * 100 / (finish_time.second - start_time.second)) | |
gc_info += '''GC(yong) Times\t= %s | |
GC(mixed) Times\t= %s | |
Total GC Times\t= %s | |
''' % (gc_count_young, gc_count_mixed, gc_count) | |
gc_info += '''Avg Yong Generation Garbage Rate\t= %.2f%% | |
Avg Heap Garbage rate\t= %.2f%% | |
''' % (sum(memory_usage_map['young_garbage_percent']) * 100 / len(memory_usage_map['young_garbage_percent']), | |
sum(memory_usage_map['heap_garbage_percent']) * 100 / len(memory_usage_map['heap_garbage_percent'])) | |
gc_info += '''Avg Max Young Generation Usage Rate\t=%.2f%% | |
Avg Max Heap Usage Rate\t=%.2f%% | |
''' % (sum(memory_usage_map['young_usage_percent']) * 100 / len(memory_usage_map['young_garbage_percent']), | |
sum(memory_usage_map['heap_usage_percent']) * 100 / len(memory_usage_map['heap_usage_percent'])) | |
print gc_info | |
# get datetime from header line | |
def __get_datetime(self, str): | |
# time like this '2012-12-12T19:01:28.610' | |
datetime_string = str | |
if len(str) > 23: | |
datetime_string = str[0:23] | |
return datetime.datetime.strptime(datetime_string, '%Y-%m-%dT%H:%M:%S.%f') | |
# test if the line is g1 log start line | |
def __is_start_line(self, line): | |
#pattern = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:') | |
return self.__START_LINE_PATTERN.match(line) is not None | |
# private reader for read each line | |
def __reader(self, path): | |
log_file = open(path, 'r') | |
line = log_file.readline() | |
while line: | |
yield line | |
line = log_file.readline() | |
log_file.close() | |
yield None | |
''' | |
parse memory info to a tuple in kilobyte | |
eg: 1M->1M parse to (1024,1024) | |
2M(2M)->0B(1M) parse to (2028,2048,0,1024) | |
''' | |
def __parse_memory_info(self, info): | |
match = self.__MEMORY_INFO_PATTERN.match(info) | |
if match: | |
cell = match.groups() | |
return int(cell[0]) * self.__unit2kb(cell[1]), int(cell[2]) * self.__unit2kb(cell[3]), int( | |
cell[4]) * self.__unit2kb(cell[5]), int(cell[6]) * self.__unit2kb(cell[7]) | |
match = self.__SIMPLE_MEMORY_INFO_PATTERN.match(info) | |
if match: | |
cell = match.groups() | |
return int(cell[0]) * self.__unit2kb(cell[1]), int(cell[2]) * self.__unit2kb(cell[3]) | |
return None | |
# covert unit to KB | |
# M = 1024K | |
# G = 1024K = 1024K*1024 | |
def __unit2kb(self, unit): | |
if unit == 'M': | |
return 1024 | |
elif unit == 'K': | |
return 1 | |
elif unit == "G": | |
return 1048576 | |
else: | |
return 1 | |
if __name__ == '__main__': | |
analyseG1Log = G1LogUtil('/Users/lichengwu/tmp/mtcms-debug.gc.log.2012-12-20') | |
analyseG1Log.analyse() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
G1 log Time: | |
[2012-12-19 10:25:12] - [2012-12-20 17:22:16] | |
Memory Info: | |
Min Heap Size = 1024M | |
Max Heap Size = 1024M | |
Min Eden Size = 128M | |
Max Eden Size = 112M | |
Avg Eden Size = 117M | |
Survivor Size = 16M | |
GC Info: | |
GC Work Threads = 6 | |
Avg Stop Time = 61.31ms | |
GC Throughput = 91.97% | |
GC(yong) Times = 60 | |
GC(mixed) Times = 0 | |
Total GC Times = 60 | |
Avg Yong Generation Garbage Rate = 91.38% | |
Avg Heap Garbage rate = 39.49% | |
Avg Max Young Generation Usage Rate =92.06% | |
Avg Max Heap Usage Rate =29.38% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment