Skip to content

Instantly share code, notes, and snippets.

@hrhv
Created May 23, 2026 08:00
Show Gist options
  • Select an option

  • Save hrhv/66e175d8a47d3bf0bf582344e80e80ca to your computer and use it in GitHub Desktop.

Select an option

Save hrhv/66e175d8a47d3bf0bf582344e80e80ca to your computer and use it in GitHub Desktop.
Audit your entire proxmox setup in one shot.
#!/usr/bin/env bash
if [ -z "$BASH_VERSION" ]; then
echo "Please run with bash, not sh"
exit 1
fi
#!/bin/bash
# =============================================================
# Proxmox Homelab Audit Script
# Run as root: bash proxmox-audit.sh 2>&1 | tee audit-output.txt
# =============================================================
RED='\033[0;31m'; YEL='\033[0;33m'; GRN='\033[0;32m'; BLU='\033[0;34m'; NC='\033[0m'; BOLD='\033[1m'
sep() { echo -e "\n${BLU}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; }
hdr() { sep; echo -e "${BOLD}$1${NC}"; sep; }
ok() { echo -e " ${GRN}✓${NC} $1"; }
warn(){ echo -e " ${YEL}⚠${NC} $1"; }
err() { echo -e " ${RED}✗${NC} $1"; }
info(){ echo -e " ${BLU}→${NC} $1"; }
echo -e "${BOLD}"
echo "╔══════════════════════════════════════════════╗"
echo "║ PROXMOX HOMELAB AUDIT REPORT ║"
echo "║ $(date '+%Y-%m-%d %H:%M:%S') ║"
echo "╚══════════════════════════════════════════════╝"
echo -e "${NC}"
# ── 1. SYSTEM OVERVIEW ────────────────────────────────────────
hdr "1. SYSTEM OVERVIEW"
echo "Hostname : $(hostname)"
echo "Proxmox VE : $(pveversion 2>/dev/null | head -1)"
echo "Kernel : $(uname -r)"
echo "Uptime : $(uptime -p)"
echo "Last boot : $(who -b | awk '{print $3,$4}')"
echo ""
echo "CPU : $(grep 'model name' /proc/cpuinfo | head -1 | cut -d: -f2 | xargs)"
echo "CPU cores : $(nproc)"
echo "RAM total : $(free -h | awk '/Mem:/{print $2}')"
echo "Swap : $(free -h | awk '/Swap:/{print $2}')"
# ── 2. KERNEL & CRITICAL ERRORS (last 2 boots) ───────────────
hdr "2. KERNEL ERRORS FROM LAST 2 BOOTS"
for b in -1 0; do
label=$( [ "$b" = "0" ] && echo "Current boot" || echo "Previous boot" )
echo -e "\n--- $label ---"
journalctl -k -b $b --no-pager 2>/dev/null | grep -iE \
"error|fail|hang|panic|oom|killed|lockup|mce|hardware|e1000e|corrupt|bad|warn" \
| grep -v "^--" | tail -30
done
# ── 3. DISK HEALTH (SMART) ────────────────────────────────────
hdr "3. DISK HEALTH (SMART)"
if ! command -v smartctl &>/dev/null; then
warn "smartmontools not installed — installing now..."
apt-get install -y smartmontools -qq
fi
for disk in $(lsblk -dno NAME,TYPE | awk '$2=="disk"{print "/dev/"$1}'); do
echo -e "\n${BOLD}$disk${NC}"
smartctl -H $disk 2>/dev/null | grep -E "SMART overall|result"
smartctl -A $disk 2>/dev/null | grep -E "Reallocated|Pending|Uncorrectable|Spin_Retry|Current_Pending|Power_On|Temperature|UDMA_CRC" | while read line; do
val=$(echo "$line" | awk '{print $10}')
attr=$(echo "$line" | awk '{print $2}')
if echo "$line" | grep -qiE "Reallocated_Sector|Pending_Sector|Uncorrectable" && [ "$val" != "0" ] 2>/dev/null; then
err "$line"
else
info "$line"
fi
done
# NVMe support
smartctl -a $disk 2>/dev/null | grep -E "Percentage Used|Available Spare|Media and Data" | while read line; do
warn "NVMe: $line"
done
done
# ── 4. ZFS POOL STATUS ────────────────────────────────────────
hdr "4. ZFS POOL STATUS"
if command -v zpool &>/dev/null && zpool list &>/dev/null 2>&1; then
zpool status -v
echo ""
zpool list
echo ""
echo "--- ZFS ARC Stats ---"
arc_max=$(cat /sys/module/zfs/parameters/zfs_arc_max 2>/dev/null)
arc_cur=$(awk '/^size/{print $3}' /proc/spl/kstat/zfs/arcstats 2>/dev/null)
[ -n "$arc_max" ] && info "ARC max: $(echo "$arc_max / 1024 / 1024" | bc 2>/dev/null) MB"
[ -n "$arc_cur" ] && info "ARC current: $(echo "$arc_cur / 1024 / 1024" | bc 2>/dev/null) MB"
echo "--- ZFS I/O Stats ---"
zpool iostat -v 2>/dev/null | head -30
else
info "No ZFS pool found (using LVM or directory storage)"
echo ""
echo "--- LVM status ---"
vgdisplay 2>/dev/null | grep -E "VG Name|VG Size|Free PE"
lvdisplay 2>/dev/null | grep -E "LV Name|LV Size|Block device"
fi
# ── 5. STORAGE / DISK USAGE ───────────────────────────────────
hdr "5. STORAGE & DISK USAGE"
df -h | grep -v tmpfs | grep -v udev
echo ""
echo "--- Proxmox storage ---"
pvesm status 2>/dev/null
# ── 6. MEMORY & SWAP ──────────────────────────────────────────
hdr "6. MEMORY ANALYSIS"
free -h
echo ""
RAM_TOTAL=$(free -m | awk '/Mem:/{print $2}')
RAM_AVAIL=$(free -m | awk '/Mem:/{print $7}')
SWAP_TOTAL=$(free -m | awk '/Swap:/{print $2}')
PCT_USED=$(echo "scale=1; (($RAM_TOTAL - $RAM_AVAIL) * 100) / $RAM_TOTAL" | bc 2>/dev/null)
info "RAM used: ${PCT_USED}%"
if [ "$SWAP_TOTAL" -eq 0 ] 2>/dev/null; then
err "NO SWAP configured — OOM risk when RAM is full"
else
ok "Swap present: ${SWAP_TOTAL}MB"
fi
echo ""
echo "--- VM memory allocation vs host RAM ---"
ALLOC=0
for vmid in $(qm list 2>/dev/null | awk 'NR>1{print $1}'); do
vmem=$(qm config $vmid 2>/dev/null | awk '/^memory:/{print $2}')
vname=$(qm config $vmid 2>/dev/null | awk '/^name:/{print $2}')
[ -n "$vmem" ] && ALLOC=$((ALLOC + vmem))
[ -n "$vmem" ] && info "VM $vmid ($vname): ${vmem}MB allocated"
done
info "Total VM allocation: ${ALLOC}MB vs host RAM: ${RAM_TOTAL}MB"
if [ "$ALLOC" -gt "$RAM_TOTAL" ] 2>/dev/null; then
err "OVERCOMMITTED: VMs allocated more RAM than host has"
elif [ "$ALLOC" -gt "$((RAM_TOTAL * 85 / 100))" ] 2>/dev/null; then
warn "High allocation: VMs using >85% of host RAM"
else
ok "RAM allocation looks healthy"
fi
# ── 7. CPU & LOAD ─────────────────────────────────────────────
hdr "7. CPU & LOAD"
uptime
echo ""
echo "--- CPU frequency / governor ---"
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor 2>/dev/null && \
cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_cur_freq 2>/dev/null | awk '{sum+=$1;n++} END{printf "Avg freq: %.0f MHz\n", sum/n/1000}' || \
info "cpufreq info not available"
echo ""
echo "--- Top 10 processes by CPU ---"
ps aux --sort=-%cpu | head -11
# ── 8. THERMAL / TEMPS ────────────────────────────────────────
hdr "8. THERMAL STATUS"
if ! command -v sensors &>/dev/null; then
warn "lm-sensors not installed — installing..."
apt-get install -y lm-sensors -qq
sensors-detect --auto &>/dev/null
fi
sensors 2>/dev/null || info "No sensor data available"
echo ""
echo "--- Thermal zones ---"
for zone in /sys/class/thermal/thermal_zone*; do
type=$(cat $zone/type 2>/dev/null)
temp=$(cat $zone/temp 2>/dev/null)
[ -n "$temp" ] && temp_c=$((temp / 1000)) && \
( [ "$temp_c" -gt 85 ] && err "$type: ${temp_c}°C (CRITICAL)" || \
[ "$temp_c" -gt 70 ] && warn "$type: ${temp_c}°C (High)" || \
ok "$type: ${temp_c}°C" )
done
echo ""
echo "--- NVMe / drive temps ---"
for disk in $(lsblk -dno NAME,TYPE | awk '$2=="disk"{print "/dev/"$1}'); do
smartctl -A $disk 2>/dev/null | grep -i "temperature" | while read line; do
info "$disk — $line"
done
done
# ── 9. NETWORK INTERFACES & NIC HEALTH ───────────────────────
hdr "9. NETWORK INTERFACES & NIC HEALTH"
ip -s link show
echo ""
echo "--- EEE status (Energy Efficient Ethernet) ---"
for iface in $(ip link show | awk -F': ' '/^[0-9]/{print $2}' | grep -v "^lo\|^vmbr\|^tap\|^fwbr\|^fwln\|^fwpr"); do
echo -n "$iface: "
result=$(ethtool --show-eee $iface 2>&1)
if echo "$result" | grep -q "EEE status: enabled"; then
err "EEE is ENABLED on $iface — this caused your hang!"
elif echo "$result" | grep -q "EEE status: disabled"; then
ok "EEE disabled on $iface"
else
info "EEE not supported / not applicable on $iface"
fi
done
echo ""
echo "--- NIC offload settings ---"
for iface in $(ip link show | awk -F': ' '/^[0-9]/{print $2}' | grep -v "^lo\|^vmbr\|^tap\|^fwbr\|^fwln\|^fwpr"); do
echo "$iface offload features:"
ethtool -k $iface 2>/dev/null | grep -E "tcp-segmentation|generic-segmentation|generic-receive|scatter-gather" | while read l; do
if echo "$l" | grep -q "on$"; then warn " $l"; else ok " $l"; fi
done
done
echo ""
echo "--- e1000e driver params ---"
cat /sys/module/e1000e/parameters/EEE 2>/dev/null && info "e1000e EEE param: $(cat /sys/module/e1000e/parameters/EEE 2>/dev/null)" || info "e1000e not loaded or param not exposed"
echo ""
echo "--- Network errors ---"
ip -s link show | grep -A4 "state UP" | grep -E "errors|dropped" | grep -v "^$"
# ── 10. VM CONFIGURATION AUDIT ────────────────────────────────
hdr "10. VM CONFIGURATION AUDIT"
for vmid in $(qm list 2>/dev/null | awk 'NR>1{print $1}'); do
vname=$(qm config $vmid 2>/dev/null | awk '/^name:/{print $2}')
echo -e "\n${BOLD}VM $vmid — $vname${NC}"
vmem=$(qm config $vmid 2>/dev/null | awk '/^memory:/{print $2}')
balloon=$(qm config $vmid 2>/dev/null | awk '/^balloon:/{print $2}')
cores=$(qm config $vmid 2>/dev/null | awk '/^cores:/{print $2}')
cpu=$(qm config $vmid 2>/dev/null | awk '/^cpu:/{print $2}')
net=$(qm config $vmid 2>/dev/null | grep "^net")
onboot=$(qm config $vmid 2>/dev/null | awk '/^onboot:/{print $2}')
agent=$(qm config $vmid 2>/dev/null | awk '/^agent:/{print $2}')
info "RAM: ${vmem}MB | Cores: ${cores} | CPU type: ${cpu:-default}"
info "Boot on start: ${onboot:-0} | QEMU agent: ${agent:-not set}"
# Balloon check
if [ -z "$balloon" ] || [ "$balloon" = "0" ]; then
warn "Memory ballooning DISABLED — fixed allocation (OK if no balloon driver in guest)"
elif [ "$balloon" -gt 0 ] 2>/dev/null; then
warn "Memory ballooning enabled with min=${balloon}MB — ensure balloon driver is in guest OS"
fi
# CPU type check
if [ -z "$cpu" ] || [ "$cpu" = "kvm64" ]; then
warn "CPU type is kvm64 (generic) — consider 'host' for NUC single-node setups for better performance"
elif [ "$cpu" = "host" ]; then
ok "CPU type: host (optimal for single-node)"
fi
# QEMU agent
if [ -z "$agent" ] || [ "$agent" = "0" ]; then
warn "QEMU guest agent not enabled — graceful shutdown/freeze-fs won't work"
else
ok "QEMU agent enabled"
fi
# Network type
echo "$net" | while read nline; do
if echo "$nline" | grep -q "e1000"; then
warn "NIC type: e1000 — consider virtio for better performance"
elif echo "$nline" | grep -q "virtio"; then
ok "NIC type: virtio (optimal)"
fi
done
done
# ── 11. PROXMOX SERVICES ──────────────────────────────────────
hdr "11. PROXMOX SERVICE STATUS"
for svc in pve-cluster pvedaemon pvestatd pveproxy corosync pve-ha-lrm pve-ha-crm; do
status=$(systemctl is-active $svc 2>/dev/null)
[ "$status" = "active" ] && ok "$svc: active" || warn "$svc: $status"
done
# ── 12. UPDATES & PACKAGE STATE ───────────────────────────────
hdr "12. UPDATES & PACKAGE STATE"
apt-get update -qq 2>/dev/null
upgrades=$(apt list --upgradable 2>/dev/null | grep -c upgradable || echo 0)
if [ "$upgrades" -gt 0 ]; then
warn "$upgrades package(s) available to upgrade"
apt list --upgradable 2>/dev/null | grep -v "Listing" | head -20
else
ok "System is up to date"
fi
echo ""
echo "--- Proxmox version detail ---"
pveversion -v 2>/dev/null
# ── 13. CRON & BACKUP JOBS ────────────────────────────────────
hdr "13. BACKUP & MAINTENANCE JOBS"
echo "--- Scheduled backups ---"
cat /etc/pve/jobs.cfg 2>/dev/null || info "No jobs.cfg found"
echo ""
echo "--- Cron jobs ---"
crontab -l 2>/dev/null || info "No root crontab"
ls /etc/cron.d/ 2>/dev/null && cat /etc/cron.d/* 2>/dev/null | grep -v "^#\|^$"
# ── 14. OPEN FILES & SYSTEM LIMITS ───────────────────────────
hdr "14. SYSTEM LIMITS & OPEN FILES"
echo "Open file descriptors: $(cat /proc/sys/fs/file-nr | awk '{print $1"/"$3}')"
echo "Max file descriptors : $(cat /proc/sys/fs/file-max)"
echo "Inotify watches : $(cat /proc/sys/fs/inotify/max_user_watches)"
echo ""
echo "--- Kernel parameters relevant to virtualization ---"
sysctl vm.swappiness vm.dirty_ratio vm.dirty_background_ratio \
net.core.rmem_max net.core.wmem_max net.ipv4.tcp_rmem net.ipv4.tcp_wmem 2>/dev/null
# ── 15. RECENT CRASH / REBOOT HISTORY ────────────────────────
hdr "15. REBOOT & CRASH HISTORY"
echo "--- Last 10 reboots ---"
last reboot | head -10
echo ""
echo "--- OOM events (all boots in journal) ---"
journalctl --no-pager -b -1 2>/dev/null | grep -iE "oom|out of memory|killed process" | tail -20 || info "No OOM events in previous boot"
echo ""
echo "--- Kernel panics / tracebacks ---"
journalctl -k -b -1 --no-pager 2>/dev/null | grep -iE "panic|BUG:|kernel BUG|general protection|segfault" | tail -20 || info "No panics in previous boot"
# ── 16. SUMMARY ───────────────────────────────────────────────
hdr "16. QUICK SUMMARY — PASTE THIS TO CLAUDE"
echo "Run: grep -E '✗|⚠' audit-output.txt to see all warnings and errors"
echo ""
echo "Critical items to review:"
journalctl -k -b -1 --no-pager 2>/dev/null | grep -c "Hardware Unit Hang" | xargs -I{} echo " e1000e hardware hangs in last boot: {}"
[ "$SWAP_TOTAL" -eq 0 ] 2>/dev/null && echo " ✗ NO SWAP" || echo " ✓ Swap present"
[ "$ALLOC" -gt "$RAM_TOTAL" ] 2>/dev/null && echo " ✗ RAM OVERCOMMITTED" || echo " ✓ RAM allocation OK"
zpool status 2>/dev/null | grep -q "DEGRADED\|FAULTED" && echo " ✗ ZFS POOL DEGRADED" || echo " ✓ ZFS healthy (or not in use)"
echo ""
echo -e "${GRN}${BOLD}Audit complete. Share the full output of audit-output.txt for analysis.${NC}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment