Skip to content

Instantly share code, notes, and snippets.

@Andersson007
Last active July 26, 2018 05:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Andersson007/084e53dfba4e5c2e61042e9192b27eb1 to your computer and use it in GitHub Desktop.
Save Andersson007/084e53dfba4e5c2e61042e9192b27eb1 to your computer and use it in GitHub Desktop.
The script collects statistics about size growth of tables, saves it to stat files, calculate stat and sends a mail notification
#!/usr/bin/python
# pg_stat_growth_tables.py - Collects statistics
# about growth of table size, saves it to stat files,
# calculate stat and sends a mail notification that contents of:
# 1) top tables by size
# 2) top tables by 1 day growth (with a difference)
# 3) similar lists for 7 days and 30 days
#
# Also you can set up growth percentage threshold
# (GROWTH_PRSTG_ALERT in Main params section below).
# If a table has grown up more than it, a mail notification
# will be sent
#
# Author: Andrey Klychkov aaklychkov@mail.ru
# Licence: Copyleft free software
# Date: 25.07.2018
# Version: 1.1
#
# Requirements: psql,
# run it as the 'postgres' user
#
# Important: You must set up desired mail server
# settings to send mail notifications
#
# Usage: ./pg_stat_growth_tables.py DBNAME STAT_DIR
#
# Notification's content example:
# -------------------------------
# ## Top 20 tables by size (in MB) ##
# -----------------------------------
# 121025 test_table1
# 57025 test_table2
# 53670 test_table3
#
# ## Top tables by growth 1 day (in MB) ##
# ----------------------------------------------
# 229 test_table4 3%
# 211 test_table1 0%
# 125 test_table1 0%
# ...
# Similar lists for 7 and 30 days stat
from __future__ import print_function
import datetime
import os
import smtplib
import socket
import subprocess
import sys
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from pwd import getpwnam
########################
# PARAMETERS BLOCK #
########################
# Check a number of command-line arguments:
if len(sys.argv) != 3:
print('Syntax: ./pg_stat_growth_tables.py DBNAME STAT_DIR')
sys.exit(1)
# Main params:
# ------------
DBNAME = sys.argv[1]
STAT_DIR = sys.argv[2]+'/'
# min table size for accounting, in bytes:
MIN_TBL_SIZE = 1000000000
# line number for notifications:
NUM_LINES_REPORT = 20
# Table growth alert percentage:
GROWTH_PRSTG_ALERT = 100
# Common params:
# --------------
LOCAL_USER = 'postgres'
HOSTNAME = socket.gethostname()
NOW = datetime.datetime.now()
DATE = NOW.strftime('%Y%m%d')
TIME = NOW.strftime('%Y.%m.%d %H:%M')
# Mail params:
# ------------
SEND_MAIL = 1
SENDER = 'report.mycompany@gmail.com'
RECIPIENT = ['mailbox1@example.com', 'mailbox2@example.com']
SMTP_SRV = 'smtp.gmail.com'
SMTP_PORT = 587
SMTP_PASS = 'PasswordForSenderHere'
# Mail subject template:
mail_sbj = '%s: database %s statistic' % (HOSTNAME, DBNAME)
def send_mail(sbj, ms):
if SEND_MAIL:
msg = MIMEMultipart()
msg['Subject'] = (sbj)
msg['From'] = 'root@%s' % HOSTNAME
msg['To'] = RECIPIENT[0]
body = MIMEText(ms, 'plain')
msg.attach(body)
smtpconnect = smtplib.SMTP(SMTP_SRV, SMTP_PORT)
smtpconnect.starttls()
smtpconnect.login(SENDER, SMTP_PASS)
smtpconnect.sendmail(SENDER, RECIPIENT, msg.as_string())
smtpconnect.quit()
else:
pass
def create_dir(dr):
try:
os.mkdir(dr)
os.chown(dr, getpwnam(LOCAL_USER).pw_uid, getpwnam(LOCAL_USER).pw_gid)
os.chmod(dr, 0700)
except Exception as e:
print(e)
sys.exit(1)
def check_dir(dr):
if not os.path.isdir(dr):
create_dir(dr)
def do_shell_cmd(cmd):
ret = subprocess.Popen(cmd, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
out = ret.stdout.readlines()
ret.communicate()
return out
def do_psql_cmd(dbname, cmd):
psql_cmd = 'psql %s -t -A -c "%s"' % (dbname, cmd)
result = do_shell_cmd(psql_cmd)
return result
def db_write(db_file, entry):
'''
Write size stat from own stat files
'''
db = open(db_file, 'a')
try:
db.write(entry)
finally:
db.close()
def db_read(db_file):
'''
Read size stat from own stat files
'''
if not os.path.isfile(db_file):
with open(db_file, 'w') as f:
pass
db = open(db_file, 'r')
try:
stat = db.readlines()
return stat
finally:
db.close()
def growth_per_days(days):
growth_b = 0
days_num = len(t_db)
tm = days - 1
if days_num < tm:
t_size_futher_day = int(t_db[0].split(',')[1])
else:
t_size_futher_day = int(t_db[-tm].split(',')[1])
growth_b = t_size - t_size_futher_day
if t_size_futher_day != 0:
growth_p = int(growth_b / float(t_size_futher_day) * 100)
else:
growth_p = 0
return (growth_b, growth_p)
def get_top_by_size(t_list, num):
size_info = ['## Top %s tables by size (in MB) ##' % num,
'-----------------------------------']
for t in t_list[0:num]:
info = t.split('|')
name = info[0]
size = int(float(info[1]) / 1024 / 1024)
size_info.append('%s %s' % (size, name))
top_size = '\n'.join(size_info)
return top_size
if __name__ == '__main__':
# Initialize a notification's body:
report = ['%s\n\n' % TIME] # Report initializing
# Check script data directory,
# create it if doesn't exist:
check_dir(STAT_DIR)
# Get a table list with size in byte:
get_tbl_list_with_size = "SELECT c.relname,\
pg_catalog.pg_total_relation_size(c.oid)\
FROM pg_catalog.pg_class c LEFT JOIN pg_catalog.pg_namespace n\
ON n.oid = c.relnamespace WHERE c.relkind IN ('r','')\
AND n.nspname <> 'pg_catalog' AND n.nspname <> 'information_schema'\
AND n.nspname !~ '^pg_toast' AND pg_catalog.pg_table_is_visible(c.oid)\
ORDER BY pg_catalog.pg_total_relation_size(c.oid) DESC;"
tbl_list_with_size = do_psql_cmd(DBNAME, get_tbl_list_with_size)
# ####################################
# For you information: rows in a table
# stat file in the STAT_DIR contents:
# list = [0 date,
# 1 cursize,
# 2 growth_b_per_1_day,
# 3 growth_%_per_1_day,
# 4 growth_b_per_7_days,
# 5 growth_%_per_7_days,
# 6 growth_b_per_30_days,
# 7 growth_%_per_30_days,
# 8 growth_avrg_per_7_days,
# 9 growth_avrg_per_30_days]
######################################
for t in tbl_list_with_size:
t_spl = t.split('|')
t_name = t_spl[0]
t_size = int(t_spl[1].rstrip('\n'))
t_db_file = STAT_DIR+t_name+'.db'
if int(t_size) >= MIN_TBL_SIZE:
t_db = db_read(t_db_file)
# If t_db_file is empty:
if len(t_db) == 0:
t_stat_entry = '%s,%s,0,0,0,0,0,0,0,0\n' % (DATE, t_size)
db_write(t_db_file, t_stat_entry)
else:
prev_t_size = int(t_db[-1].split(',')[1])
# 2 growth_b_1_day:
growth_1_day_b = t_size - prev_t_size
# 3 growth_%_1_day:
if prev_t_size != 0:
growth_1_day_p = int(
growth_1_day_b / float(prev_t_size) * 100)
if growth_1_day_p >= GROWTH_PRSTG_ALERT:
mail_sbj = "ALERT: Table '%s' growth on %s" % (
t_name, HOSTNAME)
mail_msg = "table '%s' %s %% increase:\n"\
"prev size: %s B\n"\
"growth: %s B\n"\
"current size: %s B\n" % (
t_name, growth_1_day_p, prev_t_size,
growth_1_day_b, t_size)
send_mail(mail_sbj, mail_msg)
else:
growth_1_day_p = 0
# 4, 5 growth_7_days:
growth_7_days = growth_per_days(7)
growth_7_days_b = growth_7_days[0]
growth_7_days_p = growth_7_days[1]
# 6, 7 growth_30_days:
growth_30_days = growth_per_days(30)
growth_30_days_b = growth_30_days[0]
growth_30_days_p = growth_30_days[1]
# 8 growth_avrg_7_days:
growth_7_days_avg = growth_7_days_b / 7
# 9 growth_avrg_30_days:
growth_30_days_avg = growth_30_days_b / 30
entry_cur_day_stat = '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n' % (
DATE,
t_size,
growth_1_day_b,
growth_1_day_p,
growth_7_days_b,
growth_7_days_p,
growth_30_days_b,
growth_30_days_p,
growth_7_days_avg,
growth_30_days_avg)
db_write(t_db_file, entry_cur_day_stat)
else:
tbl_list_with_size.remove(t)
# Generation parts of a notification:
# 0) Top tables by size:
top_by_current_size = get_top_by_size(tbl_list_with_size, NUM_LINES_REPORT)
# 1) Top tables by day's growth:
top_by_1_day_growth_list = []
top_by_7_day_growth_list = []
top_by_30_day_growth_list = []
for t in tbl_list_with_size:
t_spl = t.split('|')
t_name = t_spl[0]
t_size = int(t_spl[1].rstrip('\n'))
t_db_file = STAT_DIR+t_name+'.db'
if int(t_size) >= MIN_TBL_SIZE:
t_db = db_read(t_db_file)
growth_1_day_mb = int(float(t_db[-1].split(',')[2]) / 1024 / 1024)
growth_7_day_mb = int(float(t_db[-1].split(',')[4]) / 1024 / 1024)
growth_30_day_mb = int(float(t_db[-1].split(',')[6]) / 1024 / 1024)
growth_1_day_p = t_db[-1].split(',')[3]
growth_7_day_p = t_db[-1].split(',')[5]
growth_30_day_p = t_db[-1].split(',')[7]
if growth_1_day_mb > 0:
entry_growth1 = "%s %s %s%%\n" % (
growth_1_day_mb, t_name, growth_1_day_p)
top_by_1_day_growth_list.append(entry_growth1)
if growth_7_day_mb > 0:
entry_growth7 = "%s %s %s%%\n" % (
growth_7_day_mb, t_name, growth_7_day_p)
top_by_7_day_growth_list.append(entry_growth7)
if growth_30_day_mb > 0:
entry_growth30 = "%s %s %s%%\n" % (
growth_30_day_mb, t_name, growth_7_day_p)
top_by_30_day_growth_list.append(entry_growth30)
# Make a notification:
top_by_1_day_growth_sorted = sorted(
top_by_1_day_growth_list, key=lambda x: int(
x[:x.index(' ')]), reverse=True)
top_by_7_day_growth_sorted = sorted(
top_by_7_day_growth_list, key=lambda x: int(
x[:x.index(' ')]), reverse=True)
top_by_30_day_growth_sorted = sorted(
top_by_30_day_growth_list, key=lambda x: int(
x[:x.index(' ')]), reverse=True)
report_1 = ''.join(top_by_1_day_growth_sorted[:NUM_LINES_REPORT-1])
report_7 = ''.join(top_by_7_day_growth_sorted[:NUM_LINES_REPORT-1])
report_30 = ''.join(top_by_30_day_growth_sorted[:NUM_LINES_REPORT-1])
delim = (46 * '-')+'\n'
completed_report = top_by_current_size+'\n'
completed_report += '\n## Top tables by growth 1 day (in MB) '
completed_report += '##\n%s%s\n' % (delim, report_1)
completed_report += '## Top tables by growth 7 days (in MB) '
completed_report += '##\n%s%s\n' % (delim, report_7)
completed_report += '## Top tables by growth 30 days (in MB) '
completed_report += '##\n%s%s' % (delim, report_30)
# Send a notification:
mail_sbj = '%s: table growth statistic information' % HOSTNAME
send_mail(mail_sbj, completed_report)
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment