duchenpaul/Linux_hardware_emonitor.sh

## Linux_hardware_emonitor.sh
#!/bin/bash
#filename seerver_moniter.sh

mem_quota=20
hd_quota=50
cpu_quota=80

# watch memory usage

watch_mem()
{
  memtotal=`cat /proc/meminfo |grep "MemTotal"|awk '{print $2}'`
  memfree=`cat /proc/meminfo |grep "MemFree"|awk '{print $2}'`
  cached=`cat /proc/meminfo |grep "^Cached"|awk '{print $2}'`
  buffers=`cat /proc/meminfo |grep "Buffers"|awk '{print $2}'`

  mem_usage=$((100-memfree*100/memtotal-buffers*100/memtotal-cached*100/memtotal))

  if [ $mem_usage -gt $mem_quota ];then
    mem_message="WARN! The Memory usage is over than $mem_usage%"
    return 1
    else
    return 0
  fi
}
# watch disk usage

watch_hd()
{
  sda1_usage=`df |grep 'sda1'|awk '{print $5}'|sed 's/%//g'`
  sda2_usage=`df |grep 'sda2'|awk '{print $5}'|sed 's/%//g'`
  lv01_usage=`df |grep opt|awk '{print $4}'|sed 's/\%//g'`

  if [ $sda1_usage -gt $hd_quota ] || [ $sda2_usage -gt $hd_quota ] || [ $lv01_usage -gt $hd_quota ]; then
    hd_message="WARN! The Hard Disk usage is over than $hd_quota%"
    return 1
    else
    return 0
  fi
}

# watch cpu usage in one minute
get_cpu_info()
{
  cat /proc/stat|grep '^cpu[0-9]'|awk '{used+=$2+$3+$4;unused+=$5+$6+$7+$8} END{print used,unused}'
}

watch_cpu()
{
  time_point_1=`get_cpu_info`
  sleep 60
  time_point_2=`get_cpu_info`
  cpu_usage=`echo $time_point_1 $time_point_2|awk '{used=$3-$1;total=$3+$4-$1-$2;print used*100/total}'`

  if [[ $cpu_usage > $cpu_quota ]]; then
    cpu_message="WARN! The CPU Usage is over than $cpu_quota%"
    return 1
    else
    return 0
  fi
}

proc_cpu_top10()
{
  proc_busiest=`ps aux|sort -nk3r|head -n 11`
}

report=/root/server_report.log
watch_mem
if [ $? -eq 1 ]; then
  date >> $report
  echo "$mem_message" >> $report
fi

watch_hd
if [ $? -eq 1 ]; then
  date >> $report
  echo "$hd_message" >> $report
fi

watch_cpu
if [ $? -eq 1 ]; then
  date >> $report
  echo "$cpu_message" >> $report
  proc_cpu_top10
  echo "$proc_busiest" >> $report
fi

if [ -a $report ]; then
  mail -s "CUP/MEM/DISK Stat of Alarm" monitor@ahlinux.com < $report
  rm -rf $report #为看到测试结果，可注释此行
fi

脚本说明：


# 服务器CPU/MEM/DISK监控脚本(server_moniter.sh)
#------------------------------------------------------------------
# 对服务器的CPU/MEM/DISK设定阈值，动态监控利用率，超过阈值发送邮件
# 或者短信告警
#
# 本脚本通过调用watch_mem函数实现内存利用率监控，这里的内存利用率计算是进程实
# 际使用的内存，也就是used-buffer/cache，超过阈值发送息。
#
# 通过调用watch_hd函数实现磁盘利用率监控，这里的磁盘利用率，我采用
# 一个一个磁盘或分区检索，这种方式对于磁盘或分区较多时，可能不太方
# 便，可以采用循环判断。
#
# 通过调用wath_cpu函数实现CPU利用率监控，这里是通过在一分钟内2次采
# 集/proc/stat中的CPU数据，再对每1次采集点的数据中的使用CPU时间与空闲CPU时
# 间累加求和，最后将2次采集点运算结果求差，获得CPU在一分钟类# 使用时间利用率，
# 这种计算方式比较准确，如果超过阈值发送消息。
#
# 在每一个函数中设置一个判断返回值，如果超过阈值，条件为真则返回1，如果返回值
# 为1则 将告警消息附加到report中，最后如果report这个文件
# 存在，发送邮件通知管理员，将report做为邮件的正文。
#
# 可以将这个脚本添加到定时任务，每隔10分种执行一次检查。
#------------------------------------------------------------------
	#!/bin/bash
	#filename seerver_moniter.sh

	mem_quota=20
	hd_quota=50
	cpu_quota=80

	# watch memory usage

	watch_mem()
	{
	memtotal=`cat /proc/meminfo \|grep "MemTotal"\|awk '{print $2}'`
	memfree=`cat /proc/meminfo \|grep "MemFree"\|awk '{print $2}'`
	cached=`cat /proc/meminfo \|grep "^Cached"\|awk '{print $2}'`
	buffers=`cat /proc/meminfo \|grep "Buffers"\|awk '{print $2}'`

	mem_usage=$((100-memfree100/memtotal-buffers100/memtotal-cached*100/memtotal))

	if [ $mem_usage -gt $mem_quota ];then
	mem_message="WARN! The Memory usage is over than $mem_usage%"
	return 1
	else
	return 0
	fi
	}
	# watch disk usage

	watch_hd()
	{
	sda1_usage=`df \|grep 'sda1'\|awk '{print $5}'\|sed 's/%//g'`
	sda2_usage=`df \|grep 'sda2'\|awk '{print $5}'\|sed 's/%//g'`
	lv01_usage=`df \|grep opt\|awk '{print $4}'\|sed 's/\%//g'`

	if [ $sda1_usage -gt $hd_quota ] \|\| [ $sda2_usage -gt $hd_quota ] \|\| [ $lv01_usage -gt $hd_quota ]; then
	hd_message="WARN! The Hard Disk usage is over than $hd_quota%"
	return 1
	else
	return 0
	fi
	}

	# watch cpu usage in one minute
	get_cpu_info()
	{
	cat /proc/stat\|grep '^cpu[0-9]'\|awk '{used+=$2+$3+$4;unused+=$5+$6+$7+$8} END{print used,unused}'
	}

	watch_cpu()
	{
	time_point_1=`get_cpu_info`
	sleep 60
	time_point_2=`get_cpu_info`
	cpu_usage=`echo $time_point_1 $time_point_2\|awk '{used=$3-$1;total=$3+$4-$1-$2;print used*100/total}'`

	if [[ $cpu_usage > $cpu_quota ]]; then
	cpu_message="WARN! The CPU Usage is over than $cpu_quota%"
	return 1
	else
	return 0
	fi
	}

	proc_cpu_top10()
	{
	proc_busiest=`ps aux\|sort -nk3r\|head -n 11`
	}

	report=/root/server_report.log
	watch_mem
	if [ $? -eq 1 ]; then
	date >> $report
	echo "$mem_message" >> $report
	fi

	watch_hd
	if [ $? -eq 1 ]; then
	date >> $report
	echo "$hd_message" >> $report
	fi

	watch_cpu
	if [ $? -eq 1 ]; then
	date >> $report
	echo "$cpu_message" >> $report
	proc_cpu_top10
	echo "$proc_busiest" >> $report
	fi

	if [ -a $report ]; then
	mail -s "CUP/MEM/DISK Stat of Alarm" monitor@ahlinux.com < $report
	rm -rf $report #为看到测试结果，可注释此行
	fi

	脚本说明：


	# 服务器CPU/MEM/DISK监控脚本(server_moniter.sh)
	#------------------------------------------------------------------
	# 对服务器的CPU/MEM/DISK设定阈值，动态监控利用率，超过阈值发送邮件
	# 或者短信告警
	#
	# 本脚本通过调用watch_mem函数实现内存利用率监控，这里的内存利用率计算是进程实
	# 际使用的内存，也就是used-buffer/cache，超过阈值发送息。
	#
	# 通过调用watch_hd函数实现磁盘利用率监控，这里的磁盘利用率，我采用
	# 一个一个磁盘或分区检索，这种方式对于磁盘或分区较多时，可能不太方
	# 便，可以采用循环判断。
	#
	# 通过调用wath_cpu函数实现CPU利用率监控，这里是通过在一分钟内2次采
	# 集/proc/stat中的CPU数据，再对每1次采集点的数据中的使用CPU时间与空闲CPU时
	# 间累加求和，最后将2次采集点运算结果求差，获得CPU在一分钟类# 使用时间利用率，
	# 这种计算方式比较准确，如果超过阈值发送消息。
	#
	# 在每一个函数中设置一个判断返回值，如果超过阈值，条件为真则返回1，如果返回值
	# 为1则将告警消息附加到report中，最后如果report这个文件
	# 存在，发送邮件通知管理员，将report做为邮件的正文。
	#
	# 可以将这个脚本添加到定时任务，每隔10分种执行一次检查。
	#------------------------------------------------------------------