Skip to content

Instantly share code, notes, and snippets.

@kwbr
Forked from roymontecutli/ g1_log_parser.sh
Created April 30, 2019 13:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kwbr/ac77cd84f49d36c23786e6d4ec26aec6 to your computer and use it in GitHub Desktop.
Save kwbr/ac77cd84f49d36c23786e6d4ec26aec6 to your computer and use it in GitHub Desktop.
#!/usr/bin/env awk -f
# Based on the idea from https://blogs.oracle.com/taylor22/entry/using_r_to_analyze_g1gc, the
# script is updated to use the format of the gc logs as received with the parameters:
# -XX:+UseThreadPriorities
# -XX:ThreadPriorityPolicy=42
# -Xms1995M -Xmx1995M
# -Xss256k -XX:StringTableSize=1000003
# -XX:SurvivorRatio=8
# -XX:MaxTenuringThreshold=1
# -XX:+UseTLAB
# -XX:+ResizeTLAB
# -XX:CompileCommandFile=/hotspot_compiler
# -XX:MaxGCPauseMillis=500
# -XX:InitiatingHeapOccupancyPercent=25
# -XX:+UseG1GC
# -XX:G1RSetUpdatingPauseTimePercent=5
# -XX:+ParallelRefProcEnabled
# -XX:ParallelGCThreads=2
# -XX:ConcGCThreads=2
# -XX:+PrintGCDetails
# -XX:+PrintGCDateStamps
# -XX:+PrintHeapAtGC
# -XX:+PrintTenuringDistribution
# -XX:+PrintGCApplicationStoppedTime
# -XX:+PrintPromotionFailure
# -XX:PrintFLSStatistics=1
# -Xloggc:/<valid path>/gc.log
# -XX:+UseGCLogFileRotation
# -XX:NumberOfGCLogFiles=10
# -XX:GCLogFileSize=10M
function parse_size(val)
{
######################
# Standardizes all the values of memory sizes to GB, gc log will have the scalar value and the
# measurement unit ("B", "K", "M") referring to Bytes
# @param val - string with the value as received on the log. (i.e "9437184K")
# @returns float with the size in GB
######################
if ( split(val, b, "B") > 1 ) {result=b[1]/(1024.0^3);}
if ( split(val, b, "K") > 1 ) {result=b[1]/(1024.0^2);}
if ( split(val, b, "M") > 1 ) {result=b[1]/1024.0;}
if ( split(val, b, "G") > 1 ) {result=b[1];}
return result;
}
BEGIN {
printf("Timestamp SecondsSinceLaunch IncrementalCount UserTime SysTime RealTime EdenBeforeSize EdenAfterSize SurvivorBefore SurvivorAfter HeapBeforeSize HeapAfterSize ParallelTime ERSAvg ERSSum URSAvg URSSum SRSAvg SRSSum CRTAvg CRTSum GCObjCopyAvg GCObjCopySum GCTerminationAvg GCTerminationSum GCOtherAvg GCOtherSum GCTotalAvg GCTotalSum\n")
}
######################
# Save count data from lines that are at the start of each G1GC trace.
# Each trace starts out like this:
# {Heap before GC invocations=14 (full 0):
# garbage-first heap total 9437184K, used 325496K [0xfffffffd00000000, 0xffffffff40000000, 0xffffffff40000000)
######################
/{Heap.*full/{
gsub ( "\\)" , "" );
nf=split($0,a,"=");
split(a[2],b," ");
getline;
if ( match($0, "first") ) {
G1GC=1;
IncrementalCount=b[1];
FullCount=substr( b[3], 1, length(b[3])-1 );
} else {
G1GC=0;
}
}
######################
# Retrieves information of the actions and where the GC is spending more time.
# It considers the following format
# [Parallel Time: 50.6 ms, GC Workers: 2]
# [GC Worker Start (ms): Min: 2645.0, Avg: 2645.0, Max: 2645.0, Diff: 0.0]
# [Ext Root Scanning (ms): Min: 2.3, Avg: 2.5, Max: 2.6, Diff: 0.3, Sum: 4.9]
# [Update RS (ms): Min: 4.9, Avg: 4.9, Max: 5.0, Diff: 0.1, Sum: 9.8]
# [Processed Buffers: Min: 2, Avg: 2.5, Max: 3, Diff: 1, Sum: 5]
# [Scan RS (ms): Min: 6.8, Avg: 7.0, Max: 7.2, Diff: 0.3, Sum: 14.0]
# [Code Root Scanning (ms): Min: 0.0, Avg: 0.7, Max: 1.3, Diff: 1.3, Sum: 1.4]
# [Object Copy (ms): Min: 34.9, Avg: 35.5, Max: 36.1, Diff: 1.2, Sum: 70.9]
# [Termination (ms): Min: 0.0, Avg: 0.0, Max: 0.0, Diff: 0.0, Sum: 0.0]
# [Termination Attempts: Min: 1, Avg: 1.0, Max: 1, Diff: 0, Sum: 2]
# [GC Worker Other (ms): Min: 0.0, Avg: 0.0, Max: 0.0, Diff: 0.0, Sum: 0.0]
# [GC Worker Total (ms): Min: 50.5, Avg: 50.5, Max: 50.5, Diff: 0.0, Sum: 101.0]
# [GC Worker End (ms): Min: 2695.5, Avg: 2695.5, Max: 2695.5, Diff: 0.0]
######################
/\[Parallel Time:.*]$/{
split($0,pt," ");
ParallelTime=pt[3];
}
/Ext Root Scanning/{
gsub( ",|]", " ");
split($0,ers," ");
ERS_Min=ers[6];
ERS_Avg=ers[8];
ERS_Max=ers[10];
ERS_Sum=ers[14];
}
/Update RS \(ms\)\:/{
gsub( ",|]", " ");
split($0,urs," ");
URS_Min=urs[5];
URS_Avg=urs[7];
URS_Max=urs[9];
URS_Sum=urs[13];
}
/Scan RS \(ms\)\:/{
gsub( ",|]", " ");
split($0,srs," ");
SRS_Min=srs[5];
SRS_Avg=srs[7];
SRS_Max=srs[9];
SRS_Sum=srs[13];
}
/Code Root Scanning \(ms\)\:/{
gsub( ",|]", " ");
split($0,crt," ");
CRT_Min=crt[6];
CRT_Avg=crt[8];
CRT_Max=crt[10];
CRT_Sum=crt[14];
}
/Object Copy \(ms\)\:/{
gsub( ",|]", " ");
split($0,oc," ");
GC_obj_copy_Min=oc[5];
GC_obj_copy_Avg=oc[7];
GC_obj_copy_Max=oc[9];
GC_obj_copy_Sum=oc[13];
}
/Termination \(ms\)\:/{
gsub( ",|]", " ");
split($0,term," ");
GC_Termination_Min=term[4];
GC_Termination_Avg=term[6];
GC_Termination_Max=term[8];
GC_Termination_Sum=term[12];
}
/GC Worker Other \(ms\)\:/{
gsub( ",|]", " ");
split($0,oth," ");
GC_worker_other_Min=oth[6];
GC_worker_other_Avg=oth[8];
GC_worker_other_Max=oth[10];
GC_worker_other_Sum=oth[14];
}
/GC Worker Total \(ms\)\:/{
gsub( ",|]", " ");
split($0,tot," ");
GC_worker_total_Min=tot[6];
GC_worker_total_Avg=tot[8];
GC_worker_total_Max=tot[10];
GC_worker_total_Sum=tot[14];
}
######################
# Pull out time stamps that are in lines with this format:
# 2014-05-12T14:02:06.025-0700: 94.312: [GC pause (young), 0.08870154 secs]
######################
/GC pause/ {
DateTime=substr($1, 0, length($1)-1);
split($2, a, ":")
SecondsSinceLaunch=a[1]
}
######################
# Heap sizes are in lines that look like this:
# [ 4842M(9216M)->4838M(9216M)]
######################
/\[Eden:.*]$/ {
gsub ( "\\[" , "" );
gsub ( "\\]" , "" );
gsub ( "->" , " " );
gsub ( "\\(" , " " );
gsub ( "\\)" , " " );
gsub ( "Survivors:", "" );
gsub ( "Heap:", "" );
split($0,a," ");
EdenBeforeSize=parse_size(a[2]);
EdenBeforeCap=parse_size(a[3]);
EdenAfterSize=parse_size(a[4]);
EdenAfterCap=parse_size(a[5]);
SurvBeforeSize=parse_size(a[6]);
SurvAfterSize=parse_size(a[7]);
HeapBeforeSize=parse_size(a[8]);
HeapBeforeCap=parse_size(a[9]);
HeapAfterSize=parse_size(a[10]);
HeapAfterCap=parse_size(a[11]);
}
######################
# Emit an output line when you find input that looks like this:
# [Times: user=1.41 sys=0.08, real=0.24 secs]
######################
/\[Times/ {
if (G1GC==1) {
gsub ( "," , "" );
split($2,a,"=");
UserTime=a[2];
split($3,a,"=");
SysTime=a[2];
split($4,a,"=");
RealTime=a[2];
print DateTime,SecondsSinceLaunch,IncrementalCount,UserTime,SysTime,RealTime,EdenBeforeSize,EdenAfterSize,SurvBeforeSize,SurvAfterSize,HeapBeforeSize,HeapAfterSize,ParallelTime,ERS_Avg,ERS_Sum,URS_Avg,URS_Sum,SRS_Avg,SRS_Sum,CRT_Avg,CRT_Sum,GC_obj_copy_Avg,GC_obj_copy_Sum,GC_Termination_Avg,GC_Termination_Sum,GC_worker_other_Avg,GC_worker_other_Sum,GC_worker_total_Avg,GC_worker_total_Sum,IPAddress;
}
}
Before analysing, the recording of the gc log information should be enabled with the JVM arguments
-XX:+PrintGCDetails
-XX:+PrintGCDateStamps
-XX:+PrintHeapAtGC
-XX:+PrintTenuringDistribution
-XX:+PrintGCApplicationStoppedTime
-XX:+PrintPromotionFailure
-XX:PrintFLSStatistics=1
-Xloggc:/<valid path>/gc.log
# The following are optional
-XX:+UseGCLogFileRotation
-XX:NumberOfGCLogFiles=10
-XX:GCLogFileSize=10M
There are 2 main components:
1) g1_log_parser.sh which parses the logs (the path for the logs was defined with the -Xloggc JVM argument)
example
./g1_log_parser.sh /<valid path>/gc.log > data/gc_log_parsed
2) Execute R scripts
- R framework can be downloaded from https://www.r-project.org/;
- it is suggested to use RStudio as an IDE for the scripts https://www.rstudio.com/products/rstudio/download/, optionally there are also plugins for IntelliJ (R4Intellij) and Eclipse (StatET) that are able to interpret and execute R scripts.
- it is required to install ggplot2 executing on R
install.packages('ggplot2')
library(ggplot2)
# Presents an histogram for the time required to complete a garbage collection action
# Assumes that g1gc has been already initialized, as on g1_gc_analysis.R
source('multiplot.R')
border_color <- 'black'
label_x <- 'Time (seconds)'
label_y <- 'Frequency'
# Real time is the total time taken to execute the garbage collection
title_hist_1 = paste(variable_study,"Real Time")
freq_hist_real <- ggplot(data=g1gc, aes(g1gc$RealTime)) +
geom_histogram(col=border_color, aes(fill=..count..), bins = 15) +
labs(x = label_x, y = label_y) +
ggtitle(paste(title_hist_1, subtitle))
# User time considers only the time when operations occurred on the User space
title_hist_2 = paste(variable_study,"User Time")
freq_hist_user <- ggplot(data=g1gc, aes(g1gc$UserTime)) +
geom_histogram(col=border_color, aes(fill=..count..), bins = 15) +
labs(x = label_x, y = label_y) +
ggtitle(paste(title_hist_2, subtitle))
# Sys (System time) considers the time when operations occurred on the system space (i.e.: kernel
# operations)
title_hist_3 = paste(variable_study,"Sys Time")
freq_hist_sys <- ggplot(data=g1gc, aes(g1gc$SysTime)) +
geom_histogram(col=border_color, aes(fill=..count..), bins = 15) +
labs(x = label_x, y = label_y) +
ggtitle(paste(title_hist_3, subtitle))
multiplot(freq_hist_real, freq_hist_user, freq_hist_sys, cols = 1)
# work based on
# https://blogs.oracle.com/taylor22/entry/using_r_to_analyze_g1gc
# For more information regarding G1
# https://blogs.oracle.com/poonam/entry/understanding_g1_gc_logs
# Initializes the variable that contains the data, and calls the different scripts that contains the
# graphs to display
# Required for compatibility with the R4IntelliJ plugin
setwd('~/repo/db/tools/g1_analysis')
datetime_format <- "%Y-%m-%dT%H:%M:%OS%z"
# What is the variable that we are studying? This label will be appended on the title of all the
# graphs obtained, this will help to make easier the research
# Define the title that will appear on the plots, we usually set it to the environment and Cassandra cluster where the data was originated
variable_study <- "<Environment> <stack> -"
# Time range to apply for the test. Note that the times on date series will still be presented in UTC
lower_date <- "2016-05-12T00:00:00.000-0400"
upper_date <- "2016-06-18T00:00:00.000-0400"
timezone <- "America/New_York"
subtitle = paste("\n", lower_date, " - ", upper_date)
# Update the path to the location of the output from the logger
data_source <- "data/<input_parsed>.log"
g1gc_all <- read.csv(data_source, row.names = NULL, stringsAsFactors=FALSE, sep=" ")
g1gc <- subset(g1gc_all, as.POSIXlt(strptime(g1gc_all$Timestamp, datetime_format)) >= as.POSIXlt(strptime(lower_date, datetime_format)) &
as.POSIXlt(strptime(g1gc_all$Timestamp, datetime_format)) <= as.POSIXlt(strptime(upper_date, datetime_format)))
source('duration_histogram.R')
source('memory_composition.R')
source('gc_phases.R')
library(ggplot2)
# Presents the comparison of before and after the garbage collection is executed on the different
# sections of the memory.
# Assumes that g1gc has been already initialized, as on g1_gc_analysis.R
# More information of the meaning of the values is available at
# https://blogs.oracle.com/poonam/entry/understanding_g1_gc_logs
# https://blogs.oracle.com/g1gc/entry/g1gc_logs_how_to_print
source('multiplot.R')
# Constants
datetime_format <- "%Y-%m-%dT%H:%M:%S"
timeline_comp_fields <- c('Timestamp', 'GCTotalAvg', 'ERSAvg', 'URSAvg', 'SRSAvg', 'CRTAvg',
'GCObjCopyAvg', 'GCTerminationAvg', 'GCOtherAvg')
pie_phases_fields <- c('ERSSum', 'URSSum', 'SRSSum', 'CRTSum', 'GCObjCopySum', 'GCTerminationSum',
'GCOtherSum')
pie_phases_desc <- c('External Root Scanning', 'Remembered Sets - Update',
'Remembered Sets - Scan', 'Code Root Scanning', 'Objects Copy', 'Termination', 'Other')
timeline_comp_values <- g1gc[timeline_comp_fields]
grandtotal <- colSums(g1gc["GCTotalSum"])
pie_phases_aggr_values <- colSums(g1gc[pie_phases_fields]) * 100 / grandtotal
pie_phases_values <- data.frame(pie_phases_desc, pie_phases_aggr_values)
colnames(pie_phases_values) <- c("name", "sum")
# Models the distribution of GC phases across the studies time
title_gc_1 <- paste(variable_study, "GC phases composition")
comp <- ggplot() +
geom_line(data = timeline_comp_values,
aes(x=as.POSIXlt(strptime(timeline_comp_values$Timestamp, datetime_format)),
y = timeline_comp_values$GCTotalAvg, color = "Total")) +
geom_line(data = timeline_comp_values,
aes(x=as.POSIXlt(strptime(timeline_comp_values$Timestamp, datetime_format)),
y = timeline_comp_values$ERSAvg, color = "External Root Scanning")) +
geom_line(data = timeline_comp_values,
aes(x=as.POSIXlt(strptime(timeline_comp_values$Timestamp, datetime_format)),
y = timeline_comp_values$URSAvg, color = "Remembered Sets - Update")) +
geom_line(data = timeline_comp_values,
aes(x=as.POSIXlt(strptime(timeline_comp_values$Timestamp, datetime_format)),
y = timeline_comp_values$SRSAvg, color = "Remembered Sets - Scan")) +
geom_line(data = timeline_comp_values,
aes(x=as.POSIXlt(strptime(timeline_comp_values$Timestamp, datetime_format)),
y = timeline_comp_values$CRTAvg, color = "Code Root Scanning")) +
geom_line(data = timeline_comp_values,
aes(x=as.POSIXlt(strptime(timeline_comp_values$Timestamp, datetime_format)),
y = timeline_comp_values$GCObjCopyAvg, color = "Objects copy")) +
geom_line(data = timeline_comp_values,
aes(x=as.POSIXlt(strptime(timeline_comp_values$Timestamp, datetime_format)),
y = timeline_comp_values$GCTerminationAvg, color = "Termination")) +
geom_line(data = timeline_comp_values,
aes(x=as.POSIXlt(strptime(timeline_comp_values$Timestamp, datetime_format)),
y = timeline_comp_values$GCOtherAvg, color = "Other")) +
labs(x = "Date time (UTC)", y = "Time (ms.)") +
ggtitle(paste(title_gc_1, subtitle))
# Pie chart to show the distribution of the time spent in the different phases (actions)
# that are taken by Garbage Collection.
title_gc_2 <- paste(variable_study, "GC phase composition")
pie_phase <- ggplot(pie_phases_values, aes (x="", y=pie_phases_values$sum, fill=name)) +
geom_bar(width = 1, stat = "identity", color = "black") +
coord_polar(theta = "y") +
labs(x = "", y = "Percentage (%)") +
ggtitle(paste(title_gc_2, subtitle))
multiplot(comp, pie_phase, cols = 1)
library(ggplot2)
# Presents the comparison of begore and after the garbage collection is executed on the different
# sections of the memory.
# Assumes that g1gc has been already initialized, as on g1_gc_analysis.R
source('multiplot.R')
# Constants
datetime_format <- "%Y-%m-%dT%H:%M:%S"
label_x <- 'Date time (UTC)'
# Note that log_parser.h standardizes to GB
label_y <- 'Size (GB)'
# Heap is the total memory size used
title_mem_1 = paste(variable_study,"Heap size")
heap <- ggplot() +
geom_line(data = g1gc, aes(x=as.POSIXlt(strptime(g1gc$Timestamp, datetime_format)),
y = g1gc$HeapBeforeSize, color = "before")) +
geom_line(data = g1gc, aes(x=as.POSIXlt(strptime(g1gc$Timestamp, datetime_format)),
y = g1gc$HeapAfterSize, color = "after")) +
labs(x = label_x, y = label_y) +
ggtitle(paste(title_mem_1, subtitle))
# Eden refers to the memory pool where most of the new objects are created. After a garbage
# collection, it is expected that it will decrease (as objects are removed or promoted to
# "survivor" state.
title_mem_2 = paste(variable_study,"Eden size")
eden <- ggplot() +
geom_line(data = g1gc, aes(x=as.POSIXlt(strptime(g1gc$Timestamp, datetime_format)),
y = g1gc$EdenBeforeSize, color = "before")) +
geom_line(data = g1gc, aes(x=as.POSIXlt(strptime(g1gc$Timestamp, datetime_format)),
y = g1gc$EdenAfterSize, color = "after")) +
labs(x = label_x, y = label_y) +
ggtitle(paste(title_mem_2, subtitle))
# Survivor, as the name proposes, refers to the memory space where objects that survive gc
# operations that were previously on Eden. This is expected to increase after a GC execution.
title_mem_3 = paste(variable_study,"Survivor size")
survivor <- ggplot() +
geom_line(data = g1gc, aes(x=as.POSIXlt(strptime(g1gc$Timestamp, datetime_format)),
y = g1gc$SurvivorBefore, color = "before")) +
geom_line(data = g1gc, aes(x=as.POSIXlt(strptime(g1gc$Timestamp, datetime_format)),
y = g1gc$SurvivorAfter, color = "after")) +
labs(x = label_x, y = label_y) +
ggtitle(paste(title_mem_3, subtitle))
multiplot(heap, eden, survivor, cols = 1)
# Allows to present multiple graphs on the same canvas. Please note that it won't
# modify the logic of the plots received, but the size and proportions of the charts
# will be updated to fit them (with their legend boxes if present). Also this may
# depend on the resolution of the output device used. Most of the times
# with the default value of 1 for the cols will be fine.
# from http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
# Allows present on the canvas of the output device, different charts (plots) on
# the same canvas.
# @param ... plots to be included on the canvas
# @param plotlist optionally, the plots can be defined as a data frame that contains
# the graphs to show, this will be merged with the graphs received above
# @param cols number of columns to used on the template
# @param optionally the layout can be precalculated and applied, this option is useful
# if themes will be used
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment