Last active
April 26, 2023 20:24
-
-
Save kyle-hailey/9fd5d01c21c8b2648c426dccffa2adcf to your computer and use it in GitHub Desktop.
parse postgres explain analyze buffers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
def parse_data(lines): | |
results = [] | |
current_object = "" | |
read_value = 0 | |
shared_hit_value = 0 | |
io_time = 0 | |
gb_value = 0 | |
finalize_aggregate_found = False | |
for line in lines: | |
if "Aggregate" in line: | |
finalize_aggregate_found = True | |
elif "Buffers: shared hit=" in line and finalize_aggregate_found: | |
finalize_aggregate_found = False | |
if "shared hit=" in line: | |
shared_hit_value = int(line.split("shared hit=")[1].split()[0]) | |
print("line:" ,line) | |
print("shared_hit:", shared_hit_value) | |
if "read=" in line: | |
read_value = int(line.split("read=")[1].split()[0]) | |
print("line:" ,line) | |
print("read_value:", read_value) | |
gb_value = 8 * (read_value + shared_hit_value) / (1024 * 1024) | |
elif "I/O Timings: read=" in line: | |
if "write=" in line: | |
line_parts = line.split() | |
line = ' '.join(part for part in line_parts if not part.startswith("write=")) | |
io_time_value = float(line.split("I/O Timings: read=")[1].split()[0]) | |
io_time = io_time_value / 1000 | |
print("line:" ,line) | |
print("io_time:" ,io_time_value) | |
elif "Execution Time:" in line: | |
time_value = float(line.split("Execution Time: ")[1].split(" ms")[0]) | |
running_time = time_value / 1000 | |
mb_per_second = (gb_value * 1024) / running_time | |
try: | |
print("io_mb_per_second", line) | |
print("read_value", read_value) | |
print("io_time", io_time) | |
io_mb_per_second = (read_value * 8)/1024 / io_time | |
print("io_mb_per_second", io_mb_per_second) | |
parallel_degree = io_time / running_time | |
except ZeroDivisionError: | |
io_mb_per_second = None | |
parallel_degree = None | |
if shared_hit_value is not None: | |
results.append((current_object, gb_value, mb_per_second, read_value, running_time, io_time, io_mb_per_second, parallel_degree, shared_hit_value)) | |
elif "Seq Scan on" in line: | |
current_object = line.split("Seq Scan on ")[1].split()[0].strip() | |
print(current_object) | |
return results | |
def print_table(results): | |
print("{:>15} {:>8} {:>8} {:>8} {:>8} {:>8} {:>8} {:>8} {:>8} {:>8}".format("Object", "size GB", "Hit %","tot MB/s", "IO MB/s", "IO Blcks", "Buf Blcks", "tot time", "I/O time", "Parallel")) | |
print("-" * 140) | |
for res in results: | |
current_object, gb_value, mb_per_second, read_value, running_time, io_time, io_mb_per_second, parallel_degree, shared_hit_value = res | |
hit_percentage = 100 * shared_hit_value / (shared_hit_value + read_value) | |
#if parallel_degree is not None: | |
if parallel_degree is not None and int(parallel_degree) > 0 : | |
print("{:>15} {:8d} {:>8d} {:>8d} {:>8d} {:>8d} {:>8d} {:>8d} {:>8d} {:>8d} ".format(current_object, | |
int(gb_value), | |
int(hit_percentage), | |
int(mb_per_second), | |
int(io_mb_per_second), | |
int(read_value), | |
int(shared_hit_value), | |
int(running_time), | |
int(io_time), | |
int(parallel_degree) | |
)) | |
else: | |
print("{:>15} {:8d} {:>8} {:>8d} {:>8d} {:>8d} {:>8d} {:>8d} {:>8d} {:>8} ".format(current_object, | |
int(gb_value), | |
int(hit_percentage), | |
int(mb_per_second), | |
int(io_mb_per_second or 0), | |
int(read_value), | |
int(shared_hit_value), | |
int(running_time), | |
int(io_time), | |
"-" | |
)) | |
for res in results: | |
current_object, gb_value, mb_per_second, read_value, running_time, io_time, io_mb_per_second, parallel_degree, shared_hit_value = res | |
hit_percentage = 100 * shared_hit_value / (shared_hit_value + read_value) | |
#if parallel_degree is not None: | |
if parallel_degree is not None and int(parallel_degree) > 0 : | |
print("{:8d}\t{:>8d} ".format( | |
int(hit_percentage), | |
int(mb_per_second) | |
)) | |
else: | |
print("{:8d}\t{:>8d} ".format( | |
int(hit_percentage), | |
int(mb_per_second) | |
)) | |
def process_file(file_path): | |
try: | |
with open(file_path, 'r') as f: | |
print("Successfully opened file:", file_path) | |
data = [] | |
results = [] | |
for line in f: | |
if "QUERY PLAN" in line: | |
if data: | |
results.extend(parse_data(data)) | |
data = [] | |
else: | |
data.append(line) | |
if "Aggregate" in line: | |
print("-" * 120) | |
results.extend(parse_data(data)) | |
print_table(results) | |
except FileNotFoundError: | |
print(f"Error: Could not open file {file_path}. Please check the file name and try again.") | |
except Exception as e: | |
print(f"Error: Could not process file {file_path}. Encountered error: {e}") | |
# Get file name from command line arguments | |
file_name = None | |
if len(sys.argv) > 1: | |
file_name = sys.argv[1] | |
# If file name is provided, process the file | |
if file_name: | |
process_file(file_name) | |
else: | |
# Prompt for file name if missing from command line | |
file_name = input("Enter file name: ") | |
if file_name: | |
process_file(file_name) | |
else: | |
print("Error: Please provide a file name") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment