Skip to content

Instantly share code, notes, and snippets.

@keithf4
Created January 7, 2019 20:16
Show Gist options
  • Save keithf4/e7226d6a1eaa1793aad20be219e3706b to your computer and use it in GitHub Desktop.
Save keithf4/e7226d6a1eaa1793aad20be219e3706b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# Script is maintained at https://github.com/keithf4/pg_bloat_check
import argparse, csv, json, psycopg2, re, sys
from psycopg2 import extras
from random import randint
version = "2.5.0"
parser = argparse.ArgumentParser(description="Provide a bloat report for PostgreSQL tables and/or indexes. This script uses the pgstattuple contrib module which must be installed first. Note that the query to check for bloat can be extremely expensive on very large databases or those with many tables. The script stores the bloat stats in a table so they can be queried again as needed without having to re-run the entire scan. The table contains a timestamp columns to show when it was obtained.")
args_general = parser.add_argument_group(title="General options")
args_general.add_argument('-c','--connection', default="host=", help="""Connection string for use by psycopg. Defaults to "host=" (local socket).""")
args_general.add_argument('-e', '--exclude_object_file', help="""Full path to file containing a list of objects to exclude from the report (tables and/or indexes). Each line is a CSV entry in the format: objectname,bytes_wasted,percent_wasted. All objects must be schema qualified. bytes_wasted & percent_wasted are additional filter values on top of -s, -p, and -z to exclude the given object unless these values are also exceeded. Set either of these values to zero (or leave them off entirely) to exclude the object no matter what its bloat level. Comments are allowed if the line is prepended with "#". See the README.md for clearer examples of how to use this for more fine grained filtering.""")
args_general.add_argument('-f', '--format', default="simple", choices=["simple", "json", "jsonpretty", "dict"], help="Output formats. Simple is a plaintext version suitable for any output (ex: console, pipe to email). Json provides standardized json output which may be useful if taking input into something that needs a more structured format. Json also provides more details about dead tuples, empty space & free space. jsonpretty outputs in a more human readable format. Dict is the same as json but in the form of a python dictionary. Default is simple.")
args_general.add_argument('-m', '--mode', choices=["tables", "indexes", "both"], default="both", help="""Provide bloat reports for tables, indexes or both. Index bloat is always distinct from table bloat and reported as separate entries in the report. Default is "both". NOTE: GIN indexes are not supported at this time and will be skipped.""")
args_general.add_argument('-n', '--schema', help="Comma separated list of schema to include in report. All other schemas will be ignored.")
args_general.add_argument('-N', '--exclude_schema', help="Comma separated list of schemas to exclude.")
args_general.add_argument('--noanalyze', action="store_true", help="To ensure accurate fillfactor statistics, an analyze if each object being scanned is done before the check for bloat. Set this to skip the analyze step and reduce overall runtime, however your bloat statistics may not be as accurate.")
args_general.add_argument('--noscan', action="store_true", help="Set this option to have the script just read from the bloat statistics table without doing a scan of any tables again.")
args_general.add_argument('-p', '--min_wasted_percentage', type=float, default=0.1, help="Minimum percentage of wasted space an object must have to be included in the report. Default and minimum value is 0.1 (DO NOT include percent sign in given value).")
args_general.add_argument('-q', '--quick', action="store_true", help="Use the pgstattuple_approx() function instead of pgstattuple() for a quicker, but possibly less accurate bloat report. Only works for tables. Sets the 'approximate' column in the bloat statistics table to True. Note this only works in PostgreSQL 9.5+.")
args_general.add_argument('--quiet', action="store_true", help="Insert the data into the bloat stastics table without providing any console output.")
args_general.add_argument('-r', '--commit_rate', type=int, default=5, help="Sets how many tables are scanned before commiting inserts into the bloat statistics table. Helps avoid long running transactions when scanning large tables. Default is 5. Set to 0 to avoid committing until all tables are scanned. NOTE: The bloat table is truncated on every run unless --noscan is set.")
args_general.add_argument('--rebuild_index', action="store_true", help="Output a series of SQL commands for each index that will rebuild it with minimal impact on database locks. This does NOT run the given sql, it only provides the commands to do so manually. This does not run a new scan and will use the indexes contained in the statistics table from the last run. If a unique index was previously defined as a constraint, it will be recreated as a unique index.")
args_general.add_argument('--recovery_mode_norun', action="store_true", help="Setting this option will cause the script to check if the database it is running against is a replica (in recovery mode) and cause it to skip running. Otherwise if it is not in recovery, it will run as normal. This is useful for when you want to ensure the bloat check always runs only on the primary after failover without having to edit crontabs or similar process managers.")
args_general.add_argument('-s', '--min_size', type=int, default=1, help="Minimum size in bytes of object to scan (table or index). Default and minimum value is 1.")
args_general.add_argument('-t', '--tablename', help="Scan for bloat only on the given table. Must be schema qualified. This always gets both table and index bloat and overrides all other filter options so you always get the bloat statistics for the table no matter what they are.")
args_general.add_argument('--version', action="store_true", help="Print version of this script.")
args_general.add_argument('-z', '--min_wasted_size', type=int, default=1, help="Minimum size of wasted space in bytes. Default and minimum is 1.")
args_general.add_argument('--debug', action="store_true", help="Output additional debugging information. Overrides quiet option.")
args_setup = parser.add_argument_group(title="Setup")
args_setup.add_argument('--pgstattuple_schema', help="If pgstattuple is not installed in the default search path, use this option to designate the schema where it is installed.")
args_setup.add_argument('--bloat_schema', help="Set the schema that the bloat report table is in if it's not in the default search path. Note this option can also be set when running --create_stats_table to set which schema you want the table created.")
args_setup.add_argument('--create_stats_table', action="store_true", help="Create the required tables that the bloat report uses (bloat_stats + two child tables). Places table in default search path unless --bloat_schema is set.")
args = parser.parse_args()
def check_pgstattuple(conn):
sql = "SELECT e.extversion, n.nspname FROM pg_catalog.pg_extension e JOIN pg_catalog.pg_namespace n ON e.extnamespace = n.oid WHERE extname = 'pgstattuple'"
cur = conn.cursor()
cur.execute(sql)
pgstattuple_info = cur.fetchone()
if pgstattuple_info == None:
print("pgstattuple extension not found. Please ensure it is installed in the database this script is connecting to.")
close_conn(conn)
sys.exit(2)
if args.pgstattuple_schema != None:
if args.pgstattuple_schema != pgstattuple_info[1]:
print("pgstattuple not found in the schema given by --pgstattuple_schema option: " + args.pgstattuple_schema + ". Found instead in: " + pgstattuple_info[1]+".")
close_conn(conn)
sys.exit(2)
return pgstattuple_info[0]
def check_recovery_status(conn):
sql = "SELECT pg_is_in_recovery FROM pg_catalog.pg_is_in_recovery()"
cur = conn.cursor()
cur.execute(sql)
is_in_recovery = cur.fetchone()[0]
cur.close()
return is_in_recovery
def create_conn():
conn = psycopg2.connect(args.connection)
return conn
def close_conn(conn):
conn.close()
def create_list(list_type, list_items):
split_list = []
if list_type == "csv":
split_list = list_items.split(',')
elif list_type == "file":
with open(list_items, 'r') as csvfile:
objectreader = csv.DictReader(csvfile, fieldnames=['objectname', 'max_wasted', 'max_perc'])
for o in objectreader:
if not o['objectname'].startswith('#'):
o['objectname'] = o['objectname'].strip()
if o['max_wasted'] != None:
o['max_wasted'] = int(o['max_wasted'])
else:
o['max_wasted'] = 0
if o['max_perc'] != None:
o['max_perc'] = float(o['max_perc'])
else:
o['max_perc'] = 0
split_list.append(o)
return split_list
def create_stats_table(conn):
if args.bloat_schema != None:
parent_sql = args.bloat_schema + "." + "bloat_stats"
tables_sql = args.bloat_schema + "." + "bloat_tables"
indexes_sql = args.bloat_schema + "." + "bloat_indexes"
else:
parent_sql = "bloat_stats"
tables_sql = "bloat_tables"
indexes_sql = "bloat_indexes"
drop_sql = "DROP TABLE IF EXISTS " + parent_sql + " CASCADE"
sql = "CREATE TABLE " + parent_sql + """ (
oid oid NOT NULL
, schemaname text NOT NULL
, objectname text NOT NULL
, objecttype text NOT NULL
, size_bytes bigint
, live_tuple_count bigint
, live_tuple_percent float8
, dead_tuple_count bigint
, dead_tuple_size_bytes bigint
, dead_tuple_percent float8
, free_space_bytes bigint
, free_percent float8
, stats_timestamp timestamptz NOT NULL DEFAULT CURRENT_TIMESTAMP
, approximate boolean NOT NULL DEFAULT false
, relpages bigint NOT NULL DEFAULT 1
, fillfactor float8 NOT NULL DEFAULT 100)"""
cur = conn.cursor()
if args.debug:
print(cur.mogrify("drop_sql: " + drop_sql))
cur.execute(drop_sql)
if args.debug:
print(cur.mogrify("sql: " + sql))
cur.execute(sql)
sql = "CREATE TABLE " + tables_sql + " (LIKE " + parent_sql + " INCLUDING ALL) INHERITS (" + parent_sql + ")"
if args.debug:
print(cur.mogrify("sql: " + sql))
cur.execute(sql)
sql = "CREATE TABLE " + indexes_sql + " (LIKE " + parent_sql + " INCLUDING ALL) INHERITS (" + parent_sql + ")"
if args.debug:
print(cur.mogrify("sql: " + sql))
cur.execute(sql)
sql = "COMMENT ON TABLE " + parent_sql + " IS 'Table providing raw data for table & index bloat'"
if args.debug:
print(cur.mogrify("sql: " + sql))
cur.execute(sql)
sql = "COMMENT ON TABLE " + tables_sql + " IS 'Table providing raw data for table bloat'"
if args.debug:
print(cur.mogrify("sql: " + sql))
cur.execute(sql)
sql = "COMMENT ON TABLE " + indexes_sql + " IS 'Table providing raw data for index bloat'"
if args.debug:
print(cur.mogrify("sql: " + sql))
cur.execute(sql)
conn.commit()
cur.close()
def get_bloat(conn, exclude_schema_list, include_schema_list, exclude_object_list):
sql = ""
commit_counter = 0
analyzed_tables = []
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
sql = "SELECT current_setting('block_size')"
cur.execute(sql)
block_size = int(cur.fetchone()[0])
sql_tables = """ SELECT c.oid, c.relkind, c.relname, n.nspname, 'false' as indisprimary, c.reloptions
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
WHERE relkind IN ('r', 'm')
AND c.relpersistence <> 't' """
sql_indexes = """ SELECT c.oid, c.relkind, c.relname, n.nspname, i.indisprimary, c.reloptions
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
JOIN pg_catalog.pg_index i ON c.oid = i.indexrelid
JOIN pg_catalog.pg_am a ON c.relam = a.oid
WHERE c.relkind = 'i'
AND a.amname <> 'gin' AND a.amname <> 'brin' """
cur.execute("SELECT current_setting('server_version_num')::int >= 90300")
if cur.fetchone()[0] == True:
sql_indexes += " AND indislive = 'true' "
if args.tablename != None:
sql_tables += " AND n.nspname||'.'||c.relname = %s "
sql_indexes += " AND i.indrelid::regclass = %s::regclass "
sql_class = sql_tables + """
UNION
""" + sql_indexes
if args.debug:
print("sql_class: " + cur.mogrify(sql_class, [args.tablename, args.tablename] ) )
cur.execute(sql_class, [args.tablename, args.tablename] )
else:
# IN clauses work with python tuples. lists were converted by get_bloat() call
if include_schema_list:
sql_tables += " AND n.nspname IN %s"
sql_indexes += " AND n.nspname IN %s"
filter_list = include_schema_list
elif exclude_schema_list:
sql_tables += " AND n.nspname NOT IN %s"
sql_indexes += " AND n.nspname NOT IN %s"
filter_list = exclude_schema_list
if args.mode == 'tables':
sql_class = sql_tables
elif args.mode == 'indexes':
sql_class = sql_indexes
elif args.mode == "both":
sql_class = sql_tables + """
UNION
""" + sql_indexes
if args.mode == "both":
if args.debug:
print("sql_class: " + str(cur.mogrify(sql_class, (filter_list,filter_list) )) )
cur.execute(sql_class, (filter_list,filter_list))
elif args.mode == "tables" or args.mode == "indexes":
if args.debug:
print("sql_class: " + cur.mogrify(sql_class, (filter_list,) ))
cur.execute(sql_class, (filter_list,) )
else:
cur.execute(sql)
object_list = cur.fetchall()
sql = "TRUNCATE "
if args.bloat_schema:
sql += args.bloat_schema + "."
if args.mode == "tables" or args.mode == "both":
sql_table = sql + "bloat_tables"
cur.execute(sql_table)
if args.mode == "indexes" or args.mode == "both":
sql_index = sql + "bloat_indexes"
cur.execute(sql_index)
conn.commit()
if args.quick:
approximate = True
else:
approximate = False
for o in object_list:
if args.debug:
print(o)
if exclude_object_list and args.tablename == None:
# completely skip object being scanned if it's in the excluded file list with max values equal to zero
match_found = False
for e in exclude_object_list:
if (e['objectname'] == o['nspname'] + "." + o['relname']) and (e['max_wasted'] == 0) and (e['max_perc'] == 0):
match_found = True
if match_found:
continue
if o['relkind'] == "i":
fillfactor = 90.0
else:
fillfactor = 100.0
if o['reloptions'] != None:
reloptions_dict = dict(o.split('=') for o in o['reloptions'])
if 'fillfactor' in reloptions_dict:
fillfactor = float(reloptions_dict['fillfactor'])
sql = """ SELECT count(*) FROM pg_catalog.pg_class WHERE oid = %s """
cur.execute(sql, [ o['oid'] ])
exists = cur.fetchone()[0]
if args.debug:
print("Checking for table existance before scanning: " + str(exists))
if exists == 0:
continue # just skip over it. object was dropped since initial list was made
if args.noanalyze != True:
if o['relkind'] == "r" or o['relkind'] == "m":
quoted_table = "\"" + o['nspname'] + "\".\"" + o['relname'] + "\""
else:
# get table that index is a part of
sql = """SELECT n.nspname, c.relname
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_index i ON c.oid = i.indrelid
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE indexrelid = %s"""
cur.execute(sql, [ o['oid'] ] )
result = cur.fetchone()
quoted_table = "\"" + result[0] + "\".\"" + result[1] + "\""
# maintain a list of analyzed tables so that if a table was already analyzed, it's not again (ex. mulitple indexes on same table)
if quoted_table in analyzed_tables:
if args.debug:
print("Table already analyzed. Skipping...")
pass
else:
sql = "ANALYZE " + quoted_table
if args.debug:
print(cur.mogrify(sql, [quoted_table]))
cur.execute(sql)
analyzed_tables.append(quoted_table)
# end noanalyze check
sql = """ SELECT c.relpages FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
WHERE n.nspname = %s
AND c.relname = %s """
cur.execute(sql, [o['nspname'], o['relname']])
relpages = int(cur.fetchone()[0])
if args.quick:
sql = "SELECT table_len, approx_tuple_count AS tuple_count, approx_tuple_len AS tuple_len, approx_tuple_percent AS tuple_percent, dead_tuple_count, "
sql += "dead_tuple_len, dead_tuple_percent, approx_free_space AS free_space, approx_free_percent AS free_percent FROM "
else:
sql = "SELECT table_len, tuple_count, tuple_len, tuple_percent, dead_tuple_count, dead_tuple_len, dead_tuple_percent, free_space, free_percent FROM "
if args.pgstattuple_schema != None:
sql += " \"" + args.pgstattuple_schema + "\"."
if args.quick:
sql += "pgstattuple_approx(%s::regclass) "
if args.tablename == None:
sql += " WHERE table_len > %s"
sql += " AND ( (dead_tuple_len + approx_free_space) > %s OR (dead_tuple_percent + approx_free_percent) > %s )"
else:
sql += "pgstattuple(%s::regclass) "
if args.tablename == None:
sql += " WHERE table_len > %s"
sql += " AND ( (dead_tuple_len + free_space) > %s OR (dead_tuple_percent + free_percent) > %s )"
if args.tablename == None:
if args.debug:
print("sql: " + str(cur.mogrify(sql, [ o['oid']
, args.min_size
, args.min_wasted_size
, args.min_wasted_percentage])) )
cur.execute(sql, [ o['oid']
, args.min_size
, args.min_wasted_size
, args.min_wasted_percentage ])
else:
if args.debug:
print("sql: " + cur.mogrify(sql, [ o['oid'] ]))
cur.execute(sql, [ o['oid'] ])
stats = cur.fetchall()
if args.debug:
print(stats)
if stats: # completely empty objects will be zero for all stats, so this would be an empty set
# determine byte size of fillfactor pages
ff_relpages_size = (relpages - ( fillfactor/100 * relpages ) ) * block_size
if exclude_object_list and args.tablename == None:
# If object in the exclude list has max values, compare them to see if it should be left out of report
wasted_space = stats[0]['dead_tuple_len'] + (stats[0]['free_space'] - ff_relpages_size)
wasted_perc = stats[0]['dead_tuple_percent'] + (stats[0]['free_percent'] - (100-fillfactor))
for e in exclude_object_list:
if (e['objectname'] == o['nspname'] + "." + o['relname']):
if ( (e['max_wasted'] < wasted_space ) or (e['max_perc'] < wasted_perc ) ):
match_found = False
else:
match_found = True
if match_found:
continue
sql = "INSERT INTO "
if args.bloat_schema != None:
sql += args.bloat_schema + "."
if o['relkind'] == "r" or o['relkind'] == "m":
sql+= "bloat_tables"
if o['relkind'] == "r":
objecttype = "table"
else:
objecttype = "materialized_view"
elif o['relkind'] == "i":
sql+= "bloat_indexes"
if o['indisprimary'] == True:
objecttype = "index_pk"
else:
objecttype = "index"
sql += """ (oid
, schemaname
, objectname
, objecttype
, size_bytes
, live_tuple_count
, live_tuple_percent
, dead_tuple_count
, dead_tuple_size_bytes
, dead_tuple_percent
, free_space_bytes
, free_percent
, approximate
, relpages
, fillfactor)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """
if args.debug:
print("insert sql: " + str(cur.mogrify(sql, [ o['oid']
, o['nspname']
, o['relname']
, objecttype
, stats[0]['table_len']
, stats[0]['tuple_count']
, stats[0]['tuple_percent']
, stats[0]['dead_tuple_count']
, stats[0]['dead_tuple_len']
, stats[0]['dead_tuple_percent']
, stats[0]['free_space']
, stats[0]['free_percent']
, approximate
, relpages
, fillfactor
])) )
cur.execute(sql, [ o['oid']
, o['nspname']
, o['relname']
, objecttype
, stats[0]['table_len']
, stats[0]['tuple_count']
, stats[0]['tuple_percent']
, stats[0]['dead_tuple_count']
, stats[0]['dead_tuple_len']
, stats[0]['dead_tuple_percent']
, stats[0]['free_space']
, stats[0]['free_percent']
, approximate
, relpages
, fillfactor
])
commit_counter += 1
if args.commit_rate > 0 and (commit_counter % args.commit_rate == 0):
if args.debug:
print("Batch committed. Object scanned count: " + str(commit_counter))
conn.commit()
conn.commit()
cur.close()
## end get_bloat()
def print_report(result_list):
if args.format == "simple":
for r in result_list:
print(r)
else:
print(result_list)
def print_version():
print("Version: " + version)
def rebuild_index(conn):
if args.bloat_schema != None:
index_table = args.bloat_schema + "bloat_indexes"
else:
index_table = "bloat_indexes"
sql = "SELECT oid, schemaname, objectname, objecttype FROM " + index_table + " ORDER BY 2,3,4"
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
cur.execute(sql)
result = cur.fetchall()
if result == []:
print("Bloat statistics table contains no indexes.")
close_conn(conn)
sys.exit(0)
for i in result:
temp_index_name = "pgbloatcheck_rebuild_" + str(randint(1000,9999))
quoted_index = "\"" + i['schemaname'] + "\".\"" + i['objectname'] + "\""
# get table index is in
sql = """SELECT n.nspname, c.relname, t.spcname
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_index i ON c.oid = i.indrelid
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
LEFT OUTER JOIN pg_catalog.pg_tablespace t ON c.reltablespace = t.oid
WHERE indexrelid = %s"""
cur.execute(sql, [ i['oid'] ] )
result = cur.fetchone()
quoted_table = "\"" + result[0] + "\".\"" + result[1] + "\""
if result[2] != None:
quoted_tablespace = "\"" + result[2] + "\""
else:
quoted_tablespace = None
# create temp index definition
sql = "SELECT pg_get_indexdef(%s::regclass)"
cur.execute(sql, [ "\"" + i['schemaname'] +"\".\""+ i['objectname'] + "\"" ])
index_def = cur.fetchone()[0]
index_def = re.sub(r' INDEX', ' INDEX CONCURRENTLY', index_def, 1)
index_def = index_def.replace(i['objectname'], temp_index_name, 1)
if quoted_tablespace != None:
index_def += " TABLESPACE " + quoted_tablespace
index_def += ";"
# check if index is clustered
sql = "SELECT indisclustered FROM pg_catalog.pg_index WHERE indexrelid = %s"
cur.execute(sql, [ i['oid'] ])
indisclustered = cur.fetchone()[0]
# start output
print("")
print(index_def)
if indisclustered == True:
print("ALTER TABLE " + quoted_table + " CLUSTER ON " + temp_index_name) + ";"
# analyze table
print("ANALYZE " + quoted_table + ";")
if i['objecttype'] == "index":
# drop old index or unique constraint
sql = "SELECT count(*) FROM pg_catalog.pg_constraint WHERE conindid = %s"
cur.execute(sql, [ i['oid'] ])
isconstraint = int(cur.fetchone()[0])
if isconstraint == 1:
print("ALTER TABLE " + quoted_table + " DROP CONSTRAINT " + "\"" + i['objectname'] + "\";")
else:
print("DROP INDEX CONCURRENTLY " + quoted_index + ";")
# analyze again
print("ANALYZE " + quoted_table + ";")
# rename temp index to original name
print("ALTER INDEX \"" + i['schemaname'] + "\"." + temp_index_name + " RENAME TO \"" + i['objectname'] + "\";")
elif i['objecttype'] == "index_pk":
print("ALTER TABLE " + quoted_table + " DROP CONSTRAINT " + "\"" + i['objectname'] + "\";")
# analyze again
print("ANALYZE " + quoted_table + ";")
print("ALTER TABLE " + quoted_table + " ADD CONSTRAINT " + i['objectname'] + " PRIMARY KEY USING INDEX " + temp_index_name + ";")
# analyze again
print("ANALYZE " + quoted_table + ";")
if indisclustered == True:
print("")
print("-- WARNING: The following statement will exclusively lock the table for the duration of its runtime.")
print("-- Uncomment it or manually run it to recluster the table on the newly created index.")
print("-- CLUSTER " + quoted_table + ";")
print("")
# end rebuild_index
if __name__ == "__main__":
if args.version:
print_version()
sys.exit(0)
if args.schema != None and args.exclude_schema != None:
print("--schema and --exclude_schema are exclusive options and cannot be set together")
sys.exit(2)
conn = create_conn()
if args.recovery_mode_norun == True:
is_in_recovery = check_recovery_status(conn)
if is_in_recovery == True:
if args.debug:
print("Recovery mode check found instance in recovery. Skipping run.")
close_conn(conn)
sys.exit(0)
else:
if args.debug:
print("Recovery mode check found primary instance. Running as normal.")
pgstattuple_version = float(check_pgstattuple(conn))
if args.quick:
if pgstattuple_version < 1.3:
print("--quick option requires pgstattuple version 1.3 or greater (PostgreSQL 9.5)")
close_conn(conn)
sys.exit(2)
if (args.mode == "indexes" or args.mode == "both"):
print("--quick option can only be used with --mode=tables")
close_conn(conn)
sys.exit(2)
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
if args.create_stats_table:
create_stats_table(conn)
close_conn(conn)
sys.exit(0)
sql = "SELECT tablename FROM pg_catalog.pg_tables WHERE tablename = %s"
if args.bloat_schema != None:
sql += " AND schemaname = %s"
cur.execute(sql, ['bloat_stats', args.bloat_schema])
else:
cur.execute(sql, ['bloat_stats'])
table_exists = cur.fetchone()
if table_exists == None:
print("Required statistics table does not exist. Please run --create_stats_table first before running a bloat scan.")
close_conn(conn)
sys.exit(2)
if args.rebuild_index:
rebuild_index(conn)
close_conn(conn)
sys.exit(0)
if args.exclude_schema != None:
exclude_schema_list = create_list('csv', args.exclude_schema)
else:
exclude_schema_list = []
exclude_schema_list.append('pg_toast')
if args.schema != None:
include_schema_list = create_list('csv', args.schema)
else:
include_schema_list = []
if args.exclude_object_file != None:
exclude_object_list = create_list('file', args.exclude_object_file)
else:
exclude_object_list = []
if args.noscan == False:
get_bloat(conn, tuple(exclude_schema_list), tuple(include_schema_list), exclude_object_list)
# Final commit to ensure transaction that inserted stats data closes
conn.commit()
counter = 1
result_list = []
if args.quiet == False or args.debug == True:
simple_cols = """schemaname
, objectname
, objecttype
, CASE
WHEN (dead_tuple_percent + (free_percent - (100-fillfactor))) < 0 THEN 0
ELSE (dead_tuple_percent + (free_percent - (100-fillfactor)))
END AS total_waste_percent
, CASE
WHEN (dead_tuple_size_bytes + (free_space_bytes - (relpages - (fillfactor/100) * relpages ) * current_setting('block_size')::int ) ) < 0 THEN '0 bytes'
ELSE pg_size_pretty((dead_tuple_size_bytes + (free_space_bytes - ((relpages - (fillfactor/100) * relpages ) * current_setting('block_size')::int ) ) )::bigint)
END AS total_wasted_size"""
dict_cols = "oid, schemaname, objectname, objecttype, size_bytes, live_tuple_count, live_tuple_percent, dead_tuple_count, dead_tuple_size_bytes, dead_tuple_percent, free_space_bytes, free_percent, approximate, relpages, fillfactor"
if args.format == "simple":
sql = "SELECT " + simple_cols + " FROM "
elif args.format == "dict" or args.format=="json" or args.format=="jsonpretty":
sql = "SELECT " + dict_cols + " FROM "
else:
print("Unsupported --format given. Use 'simple', 'dict' 'json', or 'jsonpretty'.")
close_conn(conn)
sys.exit(2)
if args.bloat_schema != None:
sql += args.bloat_schema + "."
if args.mode == "tables":
sql += "bloat_tables"
elif args.mode == "indexes":
sql += "bloat_indexes"
else:
sql += "bloat_stats"
sql += " WHERE (dead_tuple_size_bytes + (free_space_bytes - (relpages - (fillfactor/100) * relpages ) * current_setting('block_size')::int ) ) > %s "
sql += " AND (dead_tuple_percent + (free_percent - (100-fillfactor))) > %s "
sql += " ORDER BY (dead_tuple_size_bytes + (free_space_bytes - ((relpages - (fillfactor/100) * relpages ) * current_setting('block_size')::int ) )) DESC"
cur.execute(sql, [args.min_wasted_size, args.min_wasted_percentage])
result = cur.fetchall()
for r in result:
if args.format == "simple":
justify_space = 100 - len(str(counter) + ". " + r['schemaname'] + "." + r['objectname'] + "(" + str(r['total_waste_percent']) + "%)" + r['total_wasted_size'] + " wasted")
result_list.append(str(counter) + ". " + r['schemaname'] + "." + r['objectname'] + "."*justify_space + "(" + str(r['total_waste_percent']) + "%) " + r['total_wasted_size'] + " wasted")
counter += 1
elif args.format == "dict" or args.format == "json" or args.format == "jsonpretty":
result_dict = dict([ ('oid', r['oid'])
, ('schemaname', r['schemaname'])
, ('objectname', r['objectname'])
, ('objecttype', r['objecttype'])
, ('size_bytes', int(r['size_bytes']))
, ('live_tuple_count', int(r['live_tuple_count']))
, ('live_tuple_percent', str(r['live_tuple_percent'])+"%" )
, ('dead_tuple_count', int(r['dead_tuple_count']))
, ('dead_tuple_size_bytes', int(r['dead_tuple_size_bytes']))
, ('dead_tuple_percent', str(r['dead_tuple_percent'])+"%" )
, ('free_space_bytes', int(r['free_space_bytes']))
, ('free_percent', str(r['free_percent'])+"%" )
, ('approximate', r['approximate'])
])
result_list.append(result_dict)
if args.format == "json":
result_list = json.dumps(result_list)
elif args.format == "jsonpretty":
result_list = json.dumps(result_list, indent=4, separators=(',',': '))
if len(result_list) >= 1:
print_report(result_list)
else:
if args.quiet == False:
print("No bloat found for given parameters")
close_conn(conn)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment