Last active
July 26, 2018 05:34
-
-
Save Andersson007/084e53dfba4e5c2e61042e9192b27eb1 to your computer and use it in GitHub Desktop.
The script collects statistics about size growth of tables, saves it to stat files, calculate stat and sends a mail notification
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# pg_stat_growth_tables.py - Collects statistics | |
# about growth of table size, saves it to stat files, | |
# calculate stat and sends a mail notification that contents of: | |
# 1) top tables by size | |
# 2) top tables by 1 day growth (with a difference) | |
# 3) similar lists for 7 days and 30 days | |
# | |
# Also you can set up growth percentage threshold | |
# (GROWTH_PRSTG_ALERT in Main params section below). | |
# If a table has grown up more than it, a mail notification | |
# will be sent | |
# | |
# Author: Andrey Klychkov aaklychkov@mail.ru | |
# Licence: Copyleft free software | |
# Date: 25.07.2018 | |
# Version: 1.1 | |
# | |
# Requirements: psql, | |
# run it as the 'postgres' user | |
# | |
# Important: You must set up desired mail server | |
# settings to send mail notifications | |
# | |
# Usage: ./pg_stat_growth_tables.py DBNAME STAT_DIR | |
# | |
# Notification's content example: | |
# ------------------------------- | |
# ## Top 20 tables by size (in MB) ## | |
# ----------------------------------- | |
# 121025 test_table1 | |
# 57025 test_table2 | |
# 53670 test_table3 | |
# | |
# ## Top tables by growth 1 day (in MB) ## | |
# ---------------------------------------------- | |
# 229 test_table4 3% | |
# 211 test_table1 0% | |
# 125 test_table1 0% | |
# ... | |
# Similar lists for 7 and 30 days stat | |
from __future__ import print_function | |
import datetime | |
import os | |
import smtplib | |
import socket | |
import subprocess | |
import sys | |
from email.mime.multipart import MIMEMultipart | |
from email.mime.text import MIMEText | |
from pwd import getpwnam | |
######################## | |
# PARAMETERS BLOCK # | |
######################## | |
# Check a number of command-line arguments: | |
if len(sys.argv) != 3: | |
print('Syntax: ./pg_stat_growth_tables.py DBNAME STAT_DIR') | |
sys.exit(1) | |
# Main params: | |
# ------------ | |
DBNAME = sys.argv[1] | |
STAT_DIR = sys.argv[2]+'/' | |
# min table size for accounting, in bytes: | |
MIN_TBL_SIZE = 1000000000 | |
# line number for notifications: | |
NUM_LINES_REPORT = 20 | |
# Table growth alert percentage: | |
GROWTH_PRSTG_ALERT = 100 | |
# Common params: | |
# -------------- | |
LOCAL_USER = 'postgres' | |
HOSTNAME = socket.gethostname() | |
NOW = datetime.datetime.now() | |
DATE = NOW.strftime('%Y%m%d') | |
TIME = NOW.strftime('%Y.%m.%d %H:%M') | |
# Mail params: | |
# ------------ | |
SEND_MAIL = 1 | |
SENDER = 'report.mycompany@gmail.com' | |
RECIPIENT = ['mailbox1@example.com', 'mailbox2@example.com'] | |
SMTP_SRV = 'smtp.gmail.com' | |
SMTP_PORT = 587 | |
SMTP_PASS = 'PasswordForSenderHere' | |
# Mail subject template: | |
mail_sbj = '%s: database %s statistic' % (HOSTNAME, DBNAME) | |
def send_mail(sbj, ms): | |
if SEND_MAIL: | |
msg = MIMEMultipart() | |
msg['Subject'] = (sbj) | |
msg['From'] = 'root@%s' % HOSTNAME | |
msg['To'] = RECIPIENT[0] | |
body = MIMEText(ms, 'plain') | |
msg.attach(body) | |
smtpconnect = smtplib.SMTP(SMTP_SRV, SMTP_PORT) | |
smtpconnect.starttls() | |
smtpconnect.login(SENDER, SMTP_PASS) | |
smtpconnect.sendmail(SENDER, RECIPIENT, msg.as_string()) | |
smtpconnect.quit() | |
else: | |
pass | |
def create_dir(dr): | |
try: | |
os.mkdir(dr) | |
os.chown(dr, getpwnam(LOCAL_USER).pw_uid, getpwnam(LOCAL_USER).pw_gid) | |
os.chmod(dr, 0700) | |
except Exception as e: | |
print(e) | |
sys.exit(1) | |
def check_dir(dr): | |
if not os.path.isdir(dr): | |
create_dir(dr) | |
def do_shell_cmd(cmd): | |
ret = subprocess.Popen(cmd, shell=True, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.STDOUT) | |
out = ret.stdout.readlines() | |
ret.communicate() | |
return out | |
def do_psql_cmd(dbname, cmd): | |
psql_cmd = 'psql %s -t -A -c "%s"' % (dbname, cmd) | |
result = do_shell_cmd(psql_cmd) | |
return result | |
def db_write(db_file, entry): | |
''' | |
Write size stat from own stat files | |
''' | |
db = open(db_file, 'a') | |
try: | |
db.write(entry) | |
finally: | |
db.close() | |
def db_read(db_file): | |
''' | |
Read size stat from own stat files | |
''' | |
if not os.path.isfile(db_file): | |
with open(db_file, 'w') as f: | |
pass | |
db = open(db_file, 'r') | |
try: | |
stat = db.readlines() | |
return stat | |
finally: | |
db.close() | |
def growth_per_days(days): | |
growth_b = 0 | |
days_num = len(t_db) | |
tm = days - 1 | |
if days_num < tm: | |
t_size_futher_day = int(t_db[0].split(',')[1]) | |
else: | |
t_size_futher_day = int(t_db[-tm].split(',')[1]) | |
growth_b = t_size - t_size_futher_day | |
if t_size_futher_day != 0: | |
growth_p = int(growth_b / float(t_size_futher_day) * 100) | |
else: | |
growth_p = 0 | |
return (growth_b, growth_p) | |
def get_top_by_size(t_list, num): | |
size_info = ['## Top %s tables by size (in MB) ##' % num, | |
'-----------------------------------'] | |
for t in t_list[0:num]: | |
info = t.split('|') | |
name = info[0] | |
size = int(float(info[1]) / 1024 / 1024) | |
size_info.append('%s %s' % (size, name)) | |
top_size = '\n'.join(size_info) | |
return top_size | |
if __name__ == '__main__': | |
# Initialize a notification's body: | |
report = ['%s\n\n' % TIME] # Report initializing | |
# Check script data directory, | |
# create it if doesn't exist: | |
check_dir(STAT_DIR) | |
# Get a table list with size in byte: | |
get_tbl_list_with_size = "SELECT c.relname,\ | |
pg_catalog.pg_total_relation_size(c.oid)\ | |
FROM pg_catalog.pg_class c LEFT JOIN pg_catalog.pg_namespace n\ | |
ON n.oid = c.relnamespace WHERE c.relkind IN ('r','')\ | |
AND n.nspname <> 'pg_catalog' AND n.nspname <> 'information_schema'\ | |
AND n.nspname !~ '^pg_toast' AND pg_catalog.pg_table_is_visible(c.oid)\ | |
ORDER BY pg_catalog.pg_total_relation_size(c.oid) DESC;" | |
tbl_list_with_size = do_psql_cmd(DBNAME, get_tbl_list_with_size) | |
# #################################### | |
# For you information: rows in a table | |
# stat file in the STAT_DIR contents: | |
# list = [0 date, | |
# 1 cursize, | |
# 2 growth_b_per_1_day, | |
# 3 growth_%_per_1_day, | |
# 4 growth_b_per_7_days, | |
# 5 growth_%_per_7_days, | |
# 6 growth_b_per_30_days, | |
# 7 growth_%_per_30_days, | |
# 8 growth_avrg_per_7_days, | |
# 9 growth_avrg_per_30_days] | |
###################################### | |
for t in tbl_list_with_size: | |
t_spl = t.split('|') | |
t_name = t_spl[0] | |
t_size = int(t_spl[1].rstrip('\n')) | |
t_db_file = STAT_DIR+t_name+'.db' | |
if int(t_size) >= MIN_TBL_SIZE: | |
t_db = db_read(t_db_file) | |
# If t_db_file is empty: | |
if len(t_db) == 0: | |
t_stat_entry = '%s,%s,0,0,0,0,0,0,0,0\n' % (DATE, t_size) | |
db_write(t_db_file, t_stat_entry) | |
else: | |
prev_t_size = int(t_db[-1].split(',')[1]) | |
# 2 growth_b_1_day: | |
growth_1_day_b = t_size - prev_t_size | |
# 3 growth_%_1_day: | |
if prev_t_size != 0: | |
growth_1_day_p = int( | |
growth_1_day_b / float(prev_t_size) * 100) | |
if growth_1_day_p >= GROWTH_PRSTG_ALERT: | |
mail_sbj = "ALERT: Table '%s' growth on %s" % ( | |
t_name, HOSTNAME) | |
mail_msg = "table '%s' %s %% increase:\n"\ | |
"prev size: %s B\n"\ | |
"growth: %s B\n"\ | |
"current size: %s B\n" % ( | |
t_name, growth_1_day_p, prev_t_size, | |
growth_1_day_b, t_size) | |
send_mail(mail_sbj, mail_msg) | |
else: | |
growth_1_day_p = 0 | |
# 4, 5 growth_7_days: | |
growth_7_days = growth_per_days(7) | |
growth_7_days_b = growth_7_days[0] | |
growth_7_days_p = growth_7_days[1] | |
# 6, 7 growth_30_days: | |
growth_30_days = growth_per_days(30) | |
growth_30_days_b = growth_30_days[0] | |
growth_30_days_p = growth_30_days[1] | |
# 8 growth_avrg_7_days: | |
growth_7_days_avg = growth_7_days_b / 7 | |
# 9 growth_avrg_30_days: | |
growth_30_days_avg = growth_30_days_b / 30 | |
entry_cur_day_stat = '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n' % ( | |
DATE, | |
t_size, | |
growth_1_day_b, | |
growth_1_day_p, | |
growth_7_days_b, | |
growth_7_days_p, | |
growth_30_days_b, | |
growth_30_days_p, | |
growth_7_days_avg, | |
growth_30_days_avg) | |
db_write(t_db_file, entry_cur_day_stat) | |
else: | |
tbl_list_with_size.remove(t) | |
# Generation parts of a notification: | |
# 0) Top tables by size: | |
top_by_current_size = get_top_by_size(tbl_list_with_size, NUM_LINES_REPORT) | |
# 1) Top tables by day's growth: | |
top_by_1_day_growth_list = [] | |
top_by_7_day_growth_list = [] | |
top_by_30_day_growth_list = [] | |
for t in tbl_list_with_size: | |
t_spl = t.split('|') | |
t_name = t_spl[0] | |
t_size = int(t_spl[1].rstrip('\n')) | |
t_db_file = STAT_DIR+t_name+'.db' | |
if int(t_size) >= MIN_TBL_SIZE: | |
t_db = db_read(t_db_file) | |
growth_1_day_mb = int(float(t_db[-1].split(',')[2]) / 1024 / 1024) | |
growth_7_day_mb = int(float(t_db[-1].split(',')[4]) / 1024 / 1024) | |
growth_30_day_mb = int(float(t_db[-1].split(',')[6]) / 1024 / 1024) | |
growth_1_day_p = t_db[-1].split(',')[3] | |
growth_7_day_p = t_db[-1].split(',')[5] | |
growth_30_day_p = t_db[-1].split(',')[7] | |
if growth_1_day_mb > 0: | |
entry_growth1 = "%s %s %s%%\n" % ( | |
growth_1_day_mb, t_name, growth_1_day_p) | |
top_by_1_day_growth_list.append(entry_growth1) | |
if growth_7_day_mb > 0: | |
entry_growth7 = "%s %s %s%%\n" % ( | |
growth_7_day_mb, t_name, growth_7_day_p) | |
top_by_7_day_growth_list.append(entry_growth7) | |
if growth_30_day_mb > 0: | |
entry_growth30 = "%s %s %s%%\n" % ( | |
growth_30_day_mb, t_name, growth_7_day_p) | |
top_by_30_day_growth_list.append(entry_growth30) | |
# Make a notification: | |
top_by_1_day_growth_sorted = sorted( | |
top_by_1_day_growth_list, key=lambda x: int( | |
x[:x.index(' ')]), reverse=True) | |
top_by_7_day_growth_sorted = sorted( | |
top_by_7_day_growth_list, key=lambda x: int( | |
x[:x.index(' ')]), reverse=True) | |
top_by_30_day_growth_sorted = sorted( | |
top_by_30_day_growth_list, key=lambda x: int( | |
x[:x.index(' ')]), reverse=True) | |
report_1 = ''.join(top_by_1_day_growth_sorted[:NUM_LINES_REPORT-1]) | |
report_7 = ''.join(top_by_7_day_growth_sorted[:NUM_LINES_REPORT-1]) | |
report_30 = ''.join(top_by_30_day_growth_sorted[:NUM_LINES_REPORT-1]) | |
delim = (46 * '-')+'\n' | |
completed_report = top_by_current_size+'\n' | |
completed_report += '\n## Top tables by growth 1 day (in MB) ' | |
completed_report += '##\n%s%s\n' % (delim, report_1) | |
completed_report += '## Top tables by growth 7 days (in MB) ' | |
completed_report += '##\n%s%s\n' % (delim, report_7) | |
completed_report += '## Top tables by growth 30 days (in MB) ' | |
completed_report += '##\n%s%s' % (delim, report_30) | |
# Send a notification: | |
mail_sbj = '%s: table growth statistic information' % HOSTNAME | |
send_mail(mail_sbj, completed_report) | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment