Skip to content

Instantly share code, notes, and snippets.

@lichengwu
Created December 21, 2012 16:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save lichengwu/4353790 to your computer and use it in GitHub Desktop.
Save lichengwu/4353790 to your computer and use it in GitHub Desktop.
Garbage First Log Analyse Util
# coding=utf-8
__author__ = 'lichengwu'
import datetime
import re
'''
Garbage First Log Analyse Util
'''
class G1LogUtil:
__path = ''
# some regular expression pattern
# like this '2012-12-19T10:25:19'
__START_LINE_PATTERN = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:')
# like this '112M(112M)->0B(112M)'
__MEMORY_INFO_PATTERN = re.compile('(\d+)([M|B|K|G])\((\d+)([M|B|K|G])\)->(\d+)([M|B|K|G])\((\d+)([M|B|K|G])\)')
# like this '16M->16M'
__SIMPLE_MEMORY_INFO_PATTERN = re.compile('(\d+)([M|B|K|G])->(\d+)([M|B|K|G])')
# constructor
def __init__(self, path):
self.__path = path
"""
analyse G1 log for java
"""
def analyse(self):
# get log reader
reader = self.__reader(self.__path)
# number of GC times
gc_count = 0
gc_count_young = 0
gc_count_mixed = 0
# total stop the world time
total_stop_time = 0.000000
# max heap size
max_heap_size = 0
# min heap size
min_heap_size = 0xEFFFFFFFFFFFFFFFF
# max eden size
max_eden_size = 0
# min eden size
min_eden_size = 0xEFFFFFFFFFFFFFFFF
# survivor size
survivor_size = None
# total eden size for statistics
total_eden_size = 0
# map store memory info
memory_usage_map = {'young_garbage_percent': [], 'heap_garbage_percent': [], 'young_usage_percent': [],
'heap_usage_percent': []}
#log start time
start_time = None
# log end time
finish_time = None
# gc work thread count
gc_work_thread_number = None
each_line = reader.next()
while each_line:
if self.__is_start_line(each_line):
token = each_line.split(' ')
total_stop_time += float(token[5])
if start_time is None:
start_time = self.__get_datetime(token[0])
finish_time = token[0]
gc_count += 1
gc_type = token[4][1:-2]
if gc_type == 'young':
gc_count_young += 1
elif gc_type == 'mixed':
gc_count_mixed += 1
elif each_line.find(' [Eden:') == 0:
'''
parse memory info
'''
memory_info = each_line.split(' ')
eden_info = self.__parse_memory_info(memory_info[4])
survivor_info = self.__parse_memory_info(memory_info[6])
if survivor_size is None:
survivor_size = survivor_info[1]
heap_info = self.__parse_memory_info(memory_info[8])
max_heap_size = max(max_heap_size, heap_info[1])
min_heap_size = min(heap_info[1], min_heap_size)
# garbage (heap) / before gc (heap)
memory_usage_map['heap_garbage_percent'].append(float(heap_info[0] - heap_info[2]) / heap_info[0])
# before gc (heap) / heap size
memory_usage_map['heap_usage_percent'].append(float(heap_info[0]) / heap_info[1])
max_eden_size = max(max_eden_size, eden_info[1])
min_eden_size = min(eden_info[1], min_eden_size)
# garbage (eden+survivor) / before gc (eden+survivor)
memory_usage_map['young_garbage_percent'].append(
float(eden_info[0] + survivor_info[0] - eden_info[2] - survivor_info[1]) / (
eden_info[0] + survivor_info[0]))
# before gc(eden+survivor) / eden+survivor*2
memory_usage_map['young_usage_percent'].append(
float(eden_info[0] + survivor_info[0]) / (eden_info[1] + survivor_info[1] * 2))
total_eden_size += eden_info[1]
elif gc_work_thread_number is None and each_line.find(' [GC Worker Start') == 0:
gc_work_thread_number = len(each_line.strip().split(' ')) - 1
each_line = reader.next()
finish_time = self.__get_datetime(finish_time)
reader.close()
print '''G1 log Time:
[%s] - [%s]''' % (
start_time.strftime('%Y-%m-%d %H:%M:%S'), finish_time.strftime('%Y-%m-%d %H:%M:%S'))
summary = '''Memory Info:
Min Heap Size\t= %sM
Max Heap Size\t= %sM
Min Eden Size\t= %sM
Max Eden Size\t= %sM
Avg Eden Size\t= %sM
Survivor Size\t= %sM''' % (
(max_heap_size / 1024), (min_heap_size / 1024), (max_eden_size / 1024), (min_eden_size / 1023),
(total_eden_size / gc_count / 1024), survivor_size / 1024)
print summary
gc_info = '''GC Info:
GC Work Threads\t= %s
Avg Stop Time\t= %.2fms
GC Throughput\t= %.2f%%
''' % (gc_work_thread_number, (total_stop_time * 1000 / gc_count),
total_stop_time * 100 / (finish_time.second - start_time.second))
gc_info += '''GC(yong) Times\t= %s
GC(mixed) Times\t= %s
Total GC Times\t= %s
''' % (gc_count_young, gc_count_mixed, gc_count)
gc_info += '''Avg Yong Generation Garbage Rate\t= %.2f%%
Avg Heap Garbage rate\t= %.2f%%
''' % (sum(memory_usage_map['young_garbage_percent']) * 100 / len(memory_usage_map['young_garbage_percent']),
sum(memory_usage_map['heap_garbage_percent']) * 100 / len(memory_usage_map['heap_garbage_percent']))
gc_info += '''Avg Max Young Generation Usage Rate\t=%.2f%%
Avg Max Heap Usage Rate\t=%.2f%%
''' % (sum(memory_usage_map['young_usage_percent']) * 100 / len(memory_usage_map['young_garbage_percent']),
sum(memory_usage_map['heap_usage_percent']) * 100 / len(memory_usage_map['heap_usage_percent']))
print gc_info
# get datetime from header line
def __get_datetime(self, str):
# time like this '2012-12-12T19:01:28.610'
datetime_string = str
if len(str) > 23:
datetime_string = str[0:23]
return datetime.datetime.strptime(datetime_string, '%Y-%m-%dT%H:%M:%S.%f')
# test if the line is g1 log start line
def __is_start_line(self, line):
#pattern = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:')
return self.__START_LINE_PATTERN.match(line) is not None
# private reader for read each line
def __reader(self, path):
log_file = open(path, 'r')
line = log_file.readline()
while line:
yield line
line = log_file.readline()
log_file.close()
yield None
'''
parse memory info to a tuple in kilobyte
eg: 1M->1M parse to (1024,1024)
2M(2M)->0B(1M) parse to (2028,2048,0,1024)
'''
def __parse_memory_info(self, info):
match = self.__MEMORY_INFO_PATTERN.match(info)
if match:
cell = match.groups()
return int(cell[0]) * self.__unit2kb(cell[1]), int(cell[2]) * self.__unit2kb(cell[3]), int(
cell[4]) * self.__unit2kb(cell[5]), int(cell[6]) * self.__unit2kb(cell[7])
match = self.__SIMPLE_MEMORY_INFO_PATTERN.match(info)
if match:
cell = match.groups()
return int(cell[0]) * self.__unit2kb(cell[1]), int(cell[2]) * self.__unit2kb(cell[3])
return None
# covert unit to KB
# M = 1024K
# G = 1024K = 1024K*1024
def __unit2kb(self, unit):
if unit == 'M':
return 1024
elif unit == 'K':
return 1
elif unit == "G":
return 1048576
else:
return 1
if __name__ == '__main__':
analyseG1Log = G1LogUtil('/Users/lichengwu/tmp/mtcms-debug.gc.log.2012-12-20')
analyseG1Log.analyse()
G1 log Time:
[2012-12-19 10:25:12] - [2012-12-20 17:22:16]
Memory Info:
Min Heap Size = 1024M
Max Heap Size = 1024M
Min Eden Size = 128M
Max Eden Size = 112M
Avg Eden Size = 117M
Survivor Size = 16M
GC Info:
GC Work Threads = 6
Avg Stop Time = 61.31ms
GC Throughput = 91.97%
GC(yong) Times = 60
GC(mixed) Times = 0
Total GC Times = 60
Avg Yong Generation Garbage Rate = 91.38%
Avg Heap Garbage rate = 39.49%
Avg Max Young Generation Usage Rate =92.06%
Avg Max Heap Usage Rate =29.38%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment