Last active
September 3, 2024 02:49
-
-
Save GedowFather/29ab69b59f5c925965fd8caf42327e30 to your computer and use it in GitHub Desktop.
Python scirpt for counting slow query log and sort it.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Count slow query log and sort it. | |
# Result is output to stdout. | |
# | |
# Log example: | |
""" | |
/rdsdbbin/oscar/bin/mysqld, Version: 5.6.10-log (MySQL Community Server (GPL)). started with: | |
Tcp port: 3306 Unix socket: /tmp/mysql.sock | |
Time Id Command Argument | |
# Time: 201006 6:05:01 | |
# User@Host: test[test] @ [10.0.1.2] Id: 11198216 | |
# Query_time: 1.638045 Lock_time: 0.000057 Rows_sent: 1 Rows_examined: 837061 | |
use test_database; | |
SET timestamp=1601964301; | |
SELECT ...; | |
# Time: 201006 6:05:02 | |
# User@Host: test[test] @ [10.0.1.2] Id: 11198216 | |
# Query_time: 0.676726 Lock_time: 0.000215 Rows_sent: 0 Rows_examined: 325854 | |
SET timestamp=1601964302; | |
SELECT ...; | |
""" | |
import os | |
import textwrap | |
from datetime import datetime | |
from SqlLogger import counterSlowQuery | |
# | |
# Config | |
# | |
ranking = 100 | |
hostname = 'example-db-01-01' | |
filename = 'example-mysql-slowquery_20210701-2100.log' | |
# | |
# Read log | |
# | |
counters = {} | |
counters[hostname] = counterSlowQuery(hostname) | |
f = open(filename, "r") | |
log = f.read() | |
f.close() | |
counters[hostname].putLog(log) | |
# | |
# Sort and cut ranking | |
# | |
all_queries = [] | |
for hostname,counter in list(counters.items()): | |
all_queries.extend(counter.getQueries()) | |
if not all_queries: | |
print("Not found slow query.") | |
quit() | |
rank_queries = sorted(all_queries, key=lambda x:x['total_query_time'], reverse=True) | |
rank_count = len(rank_queries) | |
# | |
# Message | |
# | |
message = f""" | |
Reporting slow query log. | |
Found {rank_count} slow rank queries. | |
Sorted rank query by total time and picked up top parts. | |
""" | |
number = 0 | |
datetime_format = "%Y-%m-%dT%H:%M:%SZ" | |
for q in rank_queries[:ranking]: | |
number += 1 | |
oldest_datetime = datetime.fromtimestamp(q['oldest_unix_time']).strftime(datetime_format) | |
latest_datetime = datetime.fromtimestamp(q['latest_unix_time']).strftime(datetime_format) | |
message += textwrap.dedent(f""" | |
# | |
# {number} | |
# | |
Host : {q['host']} | |
Database : {q['database']} | |
Found Time : {oldest_datetime} - {latest_datetime} | |
Query Count : {q['count']} | |
Total Time : {round(q['total_query_time'], 1)} | |
Query Time : {round(q['min_query_time'], 2)} - {round(q['max_query_time'], 2)} | |
Lock Time : {round(q['min_lock_time'], 2)} - { round(q['max_lock_time'], 2)} | |
Target Rows : {q['min_rows']} - {q['max_rows']} | |
Query : | |
{q['unique_query']} | |
""") | |
print(message) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# スロークエリログをユニーク化整理します | |
# | |
import re | |
import hashlib | |
class counterSlowQuery: | |
init_keep_info = { | |
'unix_time' : None, | |
'src_ip' : None, | |
'query_time': None, | |
'lock_time' : None, | |
'rows' : None, | |
'query' : "", | |
} | |
def __init__(self, host): | |
self.host = host | |
self.queries = {} | |
self.keep_info = {} | |
self.keep_info.update(self.init_keep_info) | |
self.next_is_query = False | |
self.selected_database = None | |
return | |
def getQueries(self): | |
return list(self.queries.values()) | |
def putLog(self, log): | |
for line in log.splitlines(): | |
self.sortLog(line) | |
return | |
def sortLog(self, line): | |
if re.match(r"[/T]", line): | |
return | |
elif re.match("# Th", line): | |
return | |
elif re.match("# F", line): | |
return | |
elif re.match("# Time", line): | |
self.next_is_query = False | |
self.keep_info.update(self.init_keep_info) | |
elif re.match("# User", line): | |
match = re.match(r".*@.*\[([0-9.]+)\].*", line) | |
if not match: return | |
src_ip = match.group(1) | |
self.keep_info['src_ip'] = src_ip | |
elif re.match("# Query", line): | |
regex = r"^.*Query_time: *([0-9.]+) *Lock_time: *([0-9.]+) *.*Rows_examined: *([0-9]+).*$" | |
match = re.match(regex, line, flags=re.IGNORECASE) | |
if not match: return | |
query_time = match.group(1) | |
lock_time = match.group(2) | |
rows = match.group(3) | |
self.keep_info['query_time'] = float(query_time) | |
self.keep_info['lock_time'] = float(lock_time) | |
self.keep_info['rows'] = int(rows) | |
elif re.match(r"^use ", line, flags=re.IGNORECASE): | |
match = re.match(r"^use +([^;]+);?$", line, flags=re.IGNORECASE) | |
if not match: return | |
self.selected_database = match.group(1) | |
elif re.match("SET time", line, flags=re.IGNORECASE): | |
if self.keep_info['query_time'] is None: | |
self.next_is_query = False | |
self.keep_info.update(self.init_keep_info) | |
print("Not found comment out information") | |
else: | |
match = re.match(r"^SET timestamp=([0-9]+);?$", line, flags=re.IGNORECASE) | |
if not match: return | |
self.next_is_query = True | |
self.keep_info['unix_time'] = int(match.group(1)) | |
elif self.next_is_query is True: | |
self.keep_info['query'] += line | |
if not line: return | |
if line[-1] == ";" or line[-2] == ";": | |
self.saveQueryInfo() | |
self.keep_info.update(self.init_keep_info) | |
else: | |
self.keep_info['query'] += " " | |
else: | |
print("Found invalid line (%s)" % line) | |
print("Keep info is") | |
print(self.keep_info) | |
return | |
def saveQueryInfo(self): | |
if not (self.keep_info['unix_time'] and self.keep_info['query_time'] | |
and self.keep_info['lock_time'] and self.keep_info['rows']): | |
return | |
unique_query = self.uniqueQuery(self.keep_info['query']) | |
query_hash = hashlib.md5(unique_query.encode('utf-8')).hexdigest()[0:8] | |
if query_hash not in self.queries: | |
self.queries[query_hash] = { | |
'hash' : query_hash, | |
'count' : 0, | |
'host' : self.host, | |
'src_ip' : self.keep_info['src_ip'], | |
'database' : self.selected_database, | |
'unique_query' : unique_query, | |
'oldest_unix_time': self.keep_info['unix_time'], | |
'latest_unix_time': self.keep_info['unix_time'], | |
'min_query_time' : self.keep_info['query_time'], | |
'max_query_time' : self.keep_info['query_time'], | |
'min_lock_time' : self.keep_info['lock_time'], | |
'max_lock_time' : self.keep_info['lock_time'], | |
'min_rows' : self.keep_info['rows'], | |
'max_rows' : self.keep_info['rows'], | |
'total_query_time': 0.0, | |
} | |
self.queries[query_hash]['count'] += 1 | |
if self.queries[query_hash]['src_ip'] is None and self.keep_info['src_ip'] is not None: | |
self.queries[query_hash]['src_ip'] = self.keep_info['src_ip'] | |
if self.queries[query_hash]['database'] is None and self.selected_database is not None: | |
self.queries[query_hash]['database'] = self.selected_database | |
if self.keep_info['query_time']: | |
self.queries[query_hash]['total_query_time'] += self.keep_info['query_time'] | |
if self.keep_info['unix_time'] < self.queries[query_hash]['oldest_unix_time']: | |
self.queries[query_hash]['oldest_unix_time'] = self.keep_info['unix_time'] | |
elif self.keep_info['unix_time'] > self.queries[query_hash]['oldest_unix_time']: | |
self.queries[query_hash]['latest_unix_time'] = self.keep_info['unix_time'] | |
if self.keep_info['query_time'] < self.queries[query_hash]['min_query_time']: | |
self.queries[query_hash]['min_query_time'] = self.keep_info['query_time'] | |
elif self.keep_info['query_time'] > self.queries[query_hash]['max_query_time']: | |
self.queries[query_hash]['max_query_time'] = self.keep_info['query_time'] | |
if self.keep_info['lock_time'] < self.queries[query_hash]['min_lock_time']: | |
self.queries[query_hash]['min_lock_time'] = self.keep_info['lock_time'] | |
elif self.keep_info['lock_time'] > self.queries[query_hash]['max_lock_time']: | |
self.queries[query_hash]['max_lock_time'] = self.keep_info['lock_time'] | |
if self.keep_info['rows'] < self.queries[query_hash]['min_rows']: | |
self.queries[query_hash]['min_rows'] = self.keep_info['rows'] | |
elif self.keep_info['rows'] > self.queries[query_hash]['max_rows']: | |
self.queries[query_hash]['max_rows'] = self.keep_info['rows'] | |
return | |
def uniqueQuery(self, query): | |
query = query.rstrip(';') | |
# 日本語は削除して代替文字列に置換(カット前にしないと中途半端になる) | |
ja_regex = r'[0-9ぁ-んァ-ン一-龥,.、。ー-・]+' | |
query = re.sub(ja_regex, '[ja]', query, flags=re.IGNORECASE) | |
# クエリのNormalize | |
# - 複数スペースを単スペースに置換 | |
# - 変数値を ? に置換 | |
# > 日付と文字列を分けるために一度 !TIMESTAMP! に変換して戻す | |
# > IN , VALUES などの複数形は ,... に変換 | |
replacements = [ | |
[r" {2,}", " "], | |
[r"'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'", "!TIMESTAMP!"], | |
[r"'\d{4}-\d{2}-\d{2}'", "!DATE!"], | |
[r"'[^']*'", "'?'"], | |
[r"!TIMESTAMP!", "'????-??-?? ??:??:??'"], | |
[r"!DATE!", "'????-??-??'"], | |
[r"([ (,=])-*[\d.]+", r'\1?'], | |
[r" (IN) ([^\)]+)", r' \1 (?,...)'], | |
[r" (VALUES) (\([^\)]+\),).*", r' \1 \2...'], | |
] | |
for r in replacements: | |
query = re.sub(r[0], r[1], query, flags=re.IGNORECASE) | |
return query |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment