theprogrammerin/mysql_slow_log_parser

## mysql_slow_log_parser
#!/usr/bin/perl
#
# Ashutosh Agrawal
# http://blog.theprogrammer.in
#
# 2014-11-20 v1.0
#
# This script is modified version of SQL parser written by
#
# Nathanial Hendler
# http://retards.org/
#
# This perl script parses a MySQL slow_queries log file
# ignoring all queries less than $min_time and prints
# the required data in a signle line, for easy parsing of it
# by any log parser.
#
#
# Usage: mysql_slow_log_parser logfile
#
# ------------------------
# SOMETHING TO THINK ABOUT (aka: how to read output)
# ------------------------
#
# Also, it does to regex substitutions to normalize
# the queries...
#
#   $query_string =~ s/\d+/XXX/g;
#   $query_string =~ s/([\'\"]).+?([\'\"])/$1XXX$2/g;
#
# These replace numbers with XXX and strings found in
# quotes with XXX so that the same select statement
# with different WHERE clauses will be considered
# as the same query.
#
# so these...
#
#   SELECT * FROM offices WHERE office_id = 3;
#   SELECT * FROM offices WHERE office_id = 19;
#
# become...
#
#   SELECT * FROM offices WHERE office_id = XXX;
#
#
# And these...
#
#   SELECT * FROM photos WHERE camera_model LIKE 'Nikon%';
#   SELECT * FROM photos WHERE camera_model LIKE '%Olympus';
#
# become...
#
#   SELECT * FROM photos WHERE camera_model LIKE 'XXX';
#
#
# ---------------------
# THIS MAY BE IMPORTANT (aka: Probably Not)
# ---------------------
#
# *SO* if you use numbers in your table names, or column
# names, you might get some oddities, but I doubt it.
# I mean, how different should the following queries be
# considered?
#
#   SELECT car1 FROM autos_10;
#   SELECT car54 FROM autos_11;
#
# I don't think so.
#


$min_time       = 0;	# Skip queries less than $min_time
$min_rows	= 0;
$max_display    = 10;	# Truncate display if more than $max_display occurances of a query

print "\n Starting... \n";

$query_string   = '';
$time           = 0;
$new_sql        = 0;
$user 					= '';
$host 					= '';

##############################################
# Loop Through The Logfile
##############################################

while (<>) {

	# Skip Bogus Lines

	next if ( m|/.*mysqld, Version:.+ started with:| );
	next if ( m|Tcp port: \d+  Unix socket: .*mysql.sock| );
	next if ( m|Time\s+Id\s+Command\s+Argument| );
	next if ( m|administrator\s+command:| );

	# if ( /Query_time:\s+(.*)\s+Lock_time:\s+(.*)\s/ ) {
	#if ( /Query_time:\s+(.*)\s+Lock_time:\s+(.*)\s+Rows_examined:\s+(\d+)/ ) {

	if ( /User\SHost:\s+([\S]*)\s+\S\s+\S*\s+\[+(.*)\][\s+\S]*/ ) {

  # Try This if the above doesn't work
  # if ( /User\SHost:\s+(.*)\s\S\s+\[+(.*)\][\s+\S]*/ ) {

  	$user = $1;
		$host	= $2;
		next;

	}

	if ( /Query_time:\s+(.*)\s+Lock_time:\s+(.*)\s+Rows_examined:\s+(.*)/ ) {

		$time    = $1;
		$rows	 = $3;
		$new_sql = 1;
		# print "found $1 $3\n";
		next;

	}


	if ( /^\#/ && $query_string ) {

			if (($time > $min_time) && ($rows >= $min_rows)) {
				$orig_query = $query_string;

				$query_string =~ s/\d+/XXX/g;
				$query_string =~ s/'([^'\\]*(\\.[^'\\]*)*)'/'XXX'/g;
				$query_string =~ s/"([^"\\]*(\\.[^"\\]*)*)"/"XXX"/g;
				#$query_string =~ s/([\'\"]).+?([\'\"])/$1XXX$2/g;
				#$query_string =~ s/\s+/ /g;
				#$query_string =~ s/\n+/\n/g;

				$users{$query_string} = $user;
				$hosts{$query_string} = $host;
				push @{$queries{$query_string}}, $time;
				push @{$queries_rows{$query_string}}, $rows;
				$queries_tot{$query_string} += $time;
				$queries_orig{$query_string} = $orig_query;
				$query_string = '';

			}

	} else {

		if ($new_sql) {
			$query_string = $_;
			$new_sql = 0;
		} else {
			$query_string .= $_;
		}
	}

}


##############################################
# Display Output
##############################################

foreach my $query ( sort { $queries_tot{$b} <=> $queries_tot{$a} } keys %queries_tot )  {
	my $total = 0;
	my $cnt = 0;
	my @seconds = sort { $a <=> $b } @{$queries{$query}};
	my @rows    = sort { $a <=> $b } @{$queries_rows{$query}};
	($total+=$_) for @seconds;
	($cnt++) for @seconds;

	print "Database: $ENV{DATABASE}";
	print " | User: ".$users{$query};
	print " | Host: ".$hosts{$query};
	print " | NoQuery: ".@{$queries{$query}};
	print " | TotalTime: " . $total;
	print " | AverageTime: ".($total/$cnt);
	print " | TimeTaken: ";
	print @seconds > $max_display ? "$seconds[0] to $seconds[-1]" : sec_joiner(\@seconds);
	print " seconds to complete ";
	print " | RowsAnalyzed: ";
  print @rows > $max_display ? "$rows[0] - $rows[-1]": sec_joiner(\@rows);
  $orignal = $query;
	$query =~ s/\n/ /g;
	$queries_orig{$orignal} =~ s/\n/ /g;
	print " | Pattern: $query";
	print " | Orignal: ";
	print $queries_orig{$orignal}."\n";
}


sub sec_joiner {
	my ($seconds) = @_;
	$string = join(", ", @{$seconds});
	$string =~ s/, (\d+)$/ and $1/;
	return $string;
}

exit(0);
	#!/usr/bin/perl
	#
	# Ashutosh Agrawal
	# http://blog.theprogrammer.in
	#
	# 2014-11-20 v1.0
	#
	# This script is modified version of SQL parser written by
	#
	# Nathanial Hendler
	# http://retards.org/
	#
	# This perl script parses a MySQL slow_queries log file
	# ignoring all queries less than $min_time and prints
	# the required data in a signle line, for easy parsing of it
	# by any log parser.
	#
	#
	# Usage: mysql_slow_log_parser logfile
	#
	# ------------------------
	# SOMETHING TO THINK ABOUT (aka: how to read output)
	# ------------------------
	#
	# Also, it does to regex substitutions to normalize
	# the queries...
	#
	# $query_string =~ s/\d+/XXX/g;
	# $query_string =~ s/([\'\"]).+?([\'\"])/$1XXX$2/g;
	#
	# These replace numbers with XXX and strings found in
	# quotes with XXX so that the same select statement
	# with different WHERE clauses will be considered
	# as the same query.
	#
	# so these...
	#
	# SELECT * FROM offices WHERE office_id = 3;
	# SELECT * FROM offices WHERE office_id = 19;
	#
	# become...
	#
	# SELECT * FROM offices WHERE office_id = XXX;
	#
	#
	# And these...
	#
	# SELECT * FROM photos WHERE camera_model LIKE 'Nikon%';
	# SELECT * FROM photos WHERE camera_model LIKE '%Olympus';
	#
	# become...
	#
	# SELECT * FROM photos WHERE camera_model LIKE 'XXX';
	#
	#
	# ---------------------
	# THIS MAY BE IMPORTANT (aka: Probably Not)
	# ---------------------
	#
	# SO if you use numbers in your table names, or column
	# names, you might get some oddities, but I doubt it.
	# I mean, how different should the following queries be
	# considered?
	#
	# SELECT car1 FROM autos_10;
	# SELECT car54 FROM autos_11;
	#
	# I don't think so.
	#


	$min_time = 0; # Skip queries less than $min_time
	$min_rows = 0;
	$max_display = 10; # Truncate display if more than $max_display occurances of a query

	print "\n Starting... \n";

	$query_string = '';
	$time = 0;
	$new_sql = 0;
	$user = '';
	$host = '';

	##############################################
	# Loop Through The Logfile
	##############################################

	while (<>) {

	# Skip Bogus Lines

	next if ( m\|/.*mysqld, Version:.+ started with:\| );
	next if ( m\|Tcp port: \d+ Unix socket: .*mysql.sock\| );
	next if ( m\|Time\s+Id\s+Command\s+Argument\| );
	next if ( m\|administrator\s+command:\| );

	# if ( /Query_time:\s+(.)\s+Lock_time:\s+(.)\s/ ) {
	#if ( /Query_time:\s+(.)\s+Lock_time:\s+(.)\s+Rows_examined:\s+(\d+)/ ) {

	if ( /User\SHost:\s+([\S])\s+\S\s+\S\s+\[+(.)\][\s+\S]/ ) {

	# Try This if the above doesn't work
	# if ( /User\SHost:\s+(.)\s\S\s+\[+(.)\][\s+\S]*/ ) {

	$user = $1;
	$host = $2;
	next;

	}

	if ( /Query_time:\s+(.)\s+Lock_time:\s+(.)\s+Rows_examined:\s+(.*)/ ) {

	$time = $1;
	$rows = $3;
	$new_sql = 1;
	# print "found $1 $3\n";
	next;

	}


	if ( /^\#/ && $query_string ) {

	if (($time > $min_time) && ($rows >= $min_rows)) {
	$orig_query = $query_string;

	$query_string =~ s/\d+/XXX/g;
	$query_string =~ s/'([^'\\](\\.[^'\\])*)'/'XXX'/g;
	$query_string =~ s/"([^"\\](\\.[^"\\])*)"/"XXX"/g;
	#$query_string =~ s/([\'\"]).+?([\'\"])/$1XXX$2/g;
	#$query_string =~ s/\s+/ /g;
	#$query_string =~ s/\n+/\n/g;

	$users{$query_string} = $user;
	$hosts{$query_string} = $host;
	push @{$queries{$query_string}}, $time;
	push @{$queries_rows{$query_string}}, $rows;
	$queries_tot{$query_string} += $time;
	$queries_orig{$query_string} = $orig_query;
	$query_string = '';

	}

	} else {

	if ($new_sql) {
	$query_string = $_;
	$new_sql = 0;
	} else {
	$query_string .= $_;
	}
	}

	}


	##############################################
	# Display Output
	##############################################

	foreach my $query ( sort { $queries_tot{$b} <=> $queries_tot{$a} } keys %queries_tot ) {
	my $total = 0;
	my $cnt = 0;
	my @seconds = sort { $a <=> $b } @{$queries{$query}};
	my @rows = sort { $a <=> $b } @{$queries_rows{$query}};
	($total+=$_) for @seconds;
	($cnt++) for @seconds;

	print "Database: $ENV{DATABASE}";
	print " \| User: ".$users{$query};
	print " \| Host: ".$hosts{$query};
	print " \| NoQuery: ".@{$queries{$query}};
	print " \| TotalTime: " . $total;
	print " \| AverageTime: ".($total/$cnt);
	print " \| TimeTaken: ";
	print @seconds > $max_display ? "$seconds[0] to $seconds[-1]" : sec_joiner(\@seconds);
	print " seconds to complete ";
	print " \| RowsAnalyzed: ";
	print @rows > $max_display ? "$rows[0] - $rows[-1]": sec_joiner(\@rows);
	$orignal = $query;
	$query =~ s/\n/ /g;
	$queries_orig{$orignal} =~ s/\n/ /g;
	print " \| Pattern: $query";
	print " \| Orignal: ";
	print $queries_orig{$orignal}."\n";
	}


	sub sec_joiner {
	my ($seconds) = @_;
	$string = join(", ", @{$seconds});
	$string =~ s/, (\d+)$/ and $1/;
	return $string;
	}

	exit(0);