Created
May 23, 2026 08:00
-
-
Save hrhv/66e175d8a47d3bf0bf582344e80e80ca to your computer and use it in GitHub Desktop.
Audit your entire proxmox setup in one shot.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| if [ -z "$BASH_VERSION" ]; then | |
| echo "Please run with bash, not sh" | |
| exit 1 | |
| fi | |
| #!/bin/bash | |
| # ============================================================= | |
| # Proxmox Homelab Audit Script | |
| # Run as root: bash proxmox-audit.sh 2>&1 | tee audit-output.txt | |
| # ============================================================= | |
| RED='\033[0;31m'; YEL='\033[0;33m'; GRN='\033[0;32m'; BLU='\033[0;34m'; NC='\033[0m'; BOLD='\033[1m' | |
| sep() { echo -e "\n${BLU}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; } | |
| hdr() { sep; echo -e "${BOLD}$1${NC}"; sep; } | |
| ok() { echo -e " ${GRN}✓${NC} $1"; } | |
| warn(){ echo -e " ${YEL}⚠${NC} $1"; } | |
| err() { echo -e " ${RED}✗${NC} $1"; } | |
| info(){ echo -e " ${BLU}→${NC} $1"; } | |
| echo -e "${BOLD}" | |
| echo "╔══════════════════════════════════════════════╗" | |
| echo "║ PROXMOX HOMELAB AUDIT REPORT ║" | |
| echo "║ $(date '+%Y-%m-%d %H:%M:%S') ║" | |
| echo "╚══════════════════════════════════════════════╝" | |
| echo -e "${NC}" | |
| # ── 1. SYSTEM OVERVIEW ──────────────────────────────────────── | |
| hdr "1. SYSTEM OVERVIEW" | |
| echo "Hostname : $(hostname)" | |
| echo "Proxmox VE : $(pveversion 2>/dev/null | head -1)" | |
| echo "Kernel : $(uname -r)" | |
| echo "Uptime : $(uptime -p)" | |
| echo "Last boot : $(who -b | awk '{print $3,$4}')" | |
| echo "" | |
| echo "CPU : $(grep 'model name' /proc/cpuinfo | head -1 | cut -d: -f2 | xargs)" | |
| echo "CPU cores : $(nproc)" | |
| echo "RAM total : $(free -h | awk '/Mem:/{print $2}')" | |
| echo "Swap : $(free -h | awk '/Swap:/{print $2}')" | |
| # ── 2. KERNEL & CRITICAL ERRORS (last 2 boots) ─────────────── | |
| hdr "2. KERNEL ERRORS FROM LAST 2 BOOTS" | |
| for b in -1 0; do | |
| label=$( [ "$b" = "0" ] && echo "Current boot" || echo "Previous boot" ) | |
| echo -e "\n--- $label ---" | |
| journalctl -k -b $b --no-pager 2>/dev/null | grep -iE \ | |
| "error|fail|hang|panic|oom|killed|lockup|mce|hardware|e1000e|corrupt|bad|warn" \ | |
| | grep -v "^--" | tail -30 | |
| done | |
| # ── 3. DISK HEALTH (SMART) ──────────────────────────────────── | |
| hdr "3. DISK HEALTH (SMART)" | |
| if ! command -v smartctl &>/dev/null; then | |
| warn "smartmontools not installed — installing now..." | |
| apt-get install -y smartmontools -qq | |
| fi | |
| for disk in $(lsblk -dno NAME,TYPE | awk '$2=="disk"{print "/dev/"$1}'); do | |
| echo -e "\n${BOLD}$disk${NC}" | |
| smartctl -H $disk 2>/dev/null | grep -E "SMART overall|result" | |
| smartctl -A $disk 2>/dev/null | grep -E "Reallocated|Pending|Uncorrectable|Spin_Retry|Current_Pending|Power_On|Temperature|UDMA_CRC" | while read line; do | |
| val=$(echo "$line" | awk '{print $10}') | |
| attr=$(echo "$line" | awk '{print $2}') | |
| if echo "$line" | grep -qiE "Reallocated_Sector|Pending_Sector|Uncorrectable" && [ "$val" != "0" ] 2>/dev/null; then | |
| err "$line" | |
| else | |
| info "$line" | |
| fi | |
| done | |
| # NVMe support | |
| smartctl -a $disk 2>/dev/null | grep -E "Percentage Used|Available Spare|Media and Data" | while read line; do | |
| warn "NVMe: $line" | |
| done | |
| done | |
| # ── 4. ZFS POOL STATUS ──────────────────────────────────────── | |
| hdr "4. ZFS POOL STATUS" | |
| if command -v zpool &>/dev/null && zpool list &>/dev/null 2>&1; then | |
| zpool status -v | |
| echo "" | |
| zpool list | |
| echo "" | |
| echo "--- ZFS ARC Stats ---" | |
| arc_max=$(cat /sys/module/zfs/parameters/zfs_arc_max 2>/dev/null) | |
| arc_cur=$(awk '/^size/{print $3}' /proc/spl/kstat/zfs/arcstats 2>/dev/null) | |
| [ -n "$arc_max" ] && info "ARC max: $(echo "$arc_max / 1024 / 1024" | bc 2>/dev/null) MB" | |
| [ -n "$arc_cur" ] && info "ARC current: $(echo "$arc_cur / 1024 / 1024" | bc 2>/dev/null) MB" | |
| echo "--- ZFS I/O Stats ---" | |
| zpool iostat -v 2>/dev/null | head -30 | |
| else | |
| info "No ZFS pool found (using LVM or directory storage)" | |
| echo "" | |
| echo "--- LVM status ---" | |
| vgdisplay 2>/dev/null | grep -E "VG Name|VG Size|Free PE" | |
| lvdisplay 2>/dev/null | grep -E "LV Name|LV Size|Block device" | |
| fi | |
| # ── 5. STORAGE / DISK USAGE ─────────────────────────────────── | |
| hdr "5. STORAGE & DISK USAGE" | |
| df -h | grep -v tmpfs | grep -v udev | |
| echo "" | |
| echo "--- Proxmox storage ---" | |
| pvesm status 2>/dev/null | |
| # ── 6. MEMORY & SWAP ────────────────────────────────────────── | |
| hdr "6. MEMORY ANALYSIS" | |
| free -h | |
| echo "" | |
| RAM_TOTAL=$(free -m | awk '/Mem:/{print $2}') | |
| RAM_AVAIL=$(free -m | awk '/Mem:/{print $7}') | |
| SWAP_TOTAL=$(free -m | awk '/Swap:/{print $2}') | |
| PCT_USED=$(echo "scale=1; (($RAM_TOTAL - $RAM_AVAIL) * 100) / $RAM_TOTAL" | bc 2>/dev/null) | |
| info "RAM used: ${PCT_USED}%" | |
| if [ "$SWAP_TOTAL" -eq 0 ] 2>/dev/null; then | |
| err "NO SWAP configured — OOM risk when RAM is full" | |
| else | |
| ok "Swap present: ${SWAP_TOTAL}MB" | |
| fi | |
| echo "" | |
| echo "--- VM memory allocation vs host RAM ---" | |
| ALLOC=0 | |
| for vmid in $(qm list 2>/dev/null | awk 'NR>1{print $1}'); do | |
| vmem=$(qm config $vmid 2>/dev/null | awk '/^memory:/{print $2}') | |
| vname=$(qm config $vmid 2>/dev/null | awk '/^name:/{print $2}') | |
| [ -n "$vmem" ] && ALLOC=$((ALLOC + vmem)) | |
| [ -n "$vmem" ] && info "VM $vmid ($vname): ${vmem}MB allocated" | |
| done | |
| info "Total VM allocation: ${ALLOC}MB vs host RAM: ${RAM_TOTAL}MB" | |
| if [ "$ALLOC" -gt "$RAM_TOTAL" ] 2>/dev/null; then | |
| err "OVERCOMMITTED: VMs allocated more RAM than host has" | |
| elif [ "$ALLOC" -gt "$((RAM_TOTAL * 85 / 100))" ] 2>/dev/null; then | |
| warn "High allocation: VMs using >85% of host RAM" | |
| else | |
| ok "RAM allocation looks healthy" | |
| fi | |
| # ── 7. CPU & LOAD ───────────────────────────────────────────── | |
| hdr "7. CPU & LOAD" | |
| uptime | |
| echo "" | |
| echo "--- CPU frequency / governor ---" | |
| cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor 2>/dev/null && \ | |
| cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_cur_freq 2>/dev/null | awk '{sum+=$1;n++} END{printf "Avg freq: %.0f MHz\n", sum/n/1000}' || \ | |
| info "cpufreq info not available" | |
| echo "" | |
| echo "--- Top 10 processes by CPU ---" | |
| ps aux --sort=-%cpu | head -11 | |
| # ── 8. THERMAL / TEMPS ──────────────────────────────────────── | |
| hdr "8. THERMAL STATUS" | |
| if ! command -v sensors &>/dev/null; then | |
| warn "lm-sensors not installed — installing..." | |
| apt-get install -y lm-sensors -qq | |
| sensors-detect --auto &>/dev/null | |
| fi | |
| sensors 2>/dev/null || info "No sensor data available" | |
| echo "" | |
| echo "--- Thermal zones ---" | |
| for zone in /sys/class/thermal/thermal_zone*; do | |
| type=$(cat $zone/type 2>/dev/null) | |
| temp=$(cat $zone/temp 2>/dev/null) | |
| [ -n "$temp" ] && temp_c=$((temp / 1000)) && \ | |
| ( [ "$temp_c" -gt 85 ] && err "$type: ${temp_c}°C (CRITICAL)" || \ | |
| [ "$temp_c" -gt 70 ] && warn "$type: ${temp_c}°C (High)" || \ | |
| ok "$type: ${temp_c}°C" ) | |
| done | |
| echo "" | |
| echo "--- NVMe / drive temps ---" | |
| for disk in $(lsblk -dno NAME,TYPE | awk '$2=="disk"{print "/dev/"$1}'); do | |
| smartctl -A $disk 2>/dev/null | grep -i "temperature" | while read line; do | |
| info "$disk — $line" | |
| done | |
| done | |
| # ── 9. NETWORK INTERFACES & NIC HEALTH ─────────────────────── | |
| hdr "9. NETWORK INTERFACES & NIC HEALTH" | |
| ip -s link show | |
| echo "" | |
| echo "--- EEE status (Energy Efficient Ethernet) ---" | |
| for iface in $(ip link show | awk -F': ' '/^[0-9]/{print $2}' | grep -v "^lo\|^vmbr\|^tap\|^fwbr\|^fwln\|^fwpr"); do | |
| echo -n "$iface: " | |
| result=$(ethtool --show-eee $iface 2>&1) | |
| if echo "$result" | grep -q "EEE status: enabled"; then | |
| err "EEE is ENABLED on $iface — this caused your hang!" | |
| elif echo "$result" | grep -q "EEE status: disabled"; then | |
| ok "EEE disabled on $iface" | |
| else | |
| info "EEE not supported / not applicable on $iface" | |
| fi | |
| done | |
| echo "" | |
| echo "--- NIC offload settings ---" | |
| for iface in $(ip link show | awk -F': ' '/^[0-9]/{print $2}' | grep -v "^lo\|^vmbr\|^tap\|^fwbr\|^fwln\|^fwpr"); do | |
| echo "$iface offload features:" | |
| ethtool -k $iface 2>/dev/null | grep -E "tcp-segmentation|generic-segmentation|generic-receive|scatter-gather" | while read l; do | |
| if echo "$l" | grep -q "on$"; then warn " $l"; else ok " $l"; fi | |
| done | |
| done | |
| echo "" | |
| echo "--- e1000e driver params ---" | |
| cat /sys/module/e1000e/parameters/EEE 2>/dev/null && info "e1000e EEE param: $(cat /sys/module/e1000e/parameters/EEE 2>/dev/null)" || info "e1000e not loaded or param not exposed" | |
| echo "" | |
| echo "--- Network errors ---" | |
| ip -s link show | grep -A4 "state UP" | grep -E "errors|dropped" | grep -v "^$" | |
| # ── 10. VM CONFIGURATION AUDIT ──────────────────────────────── | |
| hdr "10. VM CONFIGURATION AUDIT" | |
| for vmid in $(qm list 2>/dev/null | awk 'NR>1{print $1}'); do | |
| vname=$(qm config $vmid 2>/dev/null | awk '/^name:/{print $2}') | |
| echo -e "\n${BOLD}VM $vmid — $vname${NC}" | |
| vmem=$(qm config $vmid 2>/dev/null | awk '/^memory:/{print $2}') | |
| balloon=$(qm config $vmid 2>/dev/null | awk '/^balloon:/{print $2}') | |
| cores=$(qm config $vmid 2>/dev/null | awk '/^cores:/{print $2}') | |
| cpu=$(qm config $vmid 2>/dev/null | awk '/^cpu:/{print $2}') | |
| net=$(qm config $vmid 2>/dev/null | grep "^net") | |
| onboot=$(qm config $vmid 2>/dev/null | awk '/^onboot:/{print $2}') | |
| agent=$(qm config $vmid 2>/dev/null | awk '/^agent:/{print $2}') | |
| info "RAM: ${vmem}MB | Cores: ${cores} | CPU type: ${cpu:-default}" | |
| info "Boot on start: ${onboot:-0} | QEMU agent: ${agent:-not set}" | |
| # Balloon check | |
| if [ -z "$balloon" ] || [ "$balloon" = "0" ]; then | |
| warn "Memory ballooning DISABLED — fixed allocation (OK if no balloon driver in guest)" | |
| elif [ "$balloon" -gt 0 ] 2>/dev/null; then | |
| warn "Memory ballooning enabled with min=${balloon}MB — ensure balloon driver is in guest OS" | |
| fi | |
| # CPU type check | |
| if [ -z "$cpu" ] || [ "$cpu" = "kvm64" ]; then | |
| warn "CPU type is kvm64 (generic) — consider 'host' for NUC single-node setups for better performance" | |
| elif [ "$cpu" = "host" ]; then | |
| ok "CPU type: host (optimal for single-node)" | |
| fi | |
| # QEMU agent | |
| if [ -z "$agent" ] || [ "$agent" = "0" ]; then | |
| warn "QEMU guest agent not enabled — graceful shutdown/freeze-fs won't work" | |
| else | |
| ok "QEMU agent enabled" | |
| fi | |
| # Network type | |
| echo "$net" | while read nline; do | |
| if echo "$nline" | grep -q "e1000"; then | |
| warn "NIC type: e1000 — consider virtio for better performance" | |
| elif echo "$nline" | grep -q "virtio"; then | |
| ok "NIC type: virtio (optimal)" | |
| fi | |
| done | |
| done | |
| # ── 11. PROXMOX SERVICES ────────────────────────────────────── | |
| hdr "11. PROXMOX SERVICE STATUS" | |
| for svc in pve-cluster pvedaemon pvestatd pveproxy corosync pve-ha-lrm pve-ha-crm; do | |
| status=$(systemctl is-active $svc 2>/dev/null) | |
| [ "$status" = "active" ] && ok "$svc: active" || warn "$svc: $status" | |
| done | |
| # ── 12. UPDATES & PACKAGE STATE ─────────────────────────────── | |
| hdr "12. UPDATES & PACKAGE STATE" | |
| apt-get update -qq 2>/dev/null | |
| upgrades=$(apt list --upgradable 2>/dev/null | grep -c upgradable || echo 0) | |
| if [ "$upgrades" -gt 0 ]; then | |
| warn "$upgrades package(s) available to upgrade" | |
| apt list --upgradable 2>/dev/null | grep -v "Listing" | head -20 | |
| else | |
| ok "System is up to date" | |
| fi | |
| echo "" | |
| echo "--- Proxmox version detail ---" | |
| pveversion -v 2>/dev/null | |
| # ── 13. CRON & BACKUP JOBS ──────────────────────────────────── | |
| hdr "13. BACKUP & MAINTENANCE JOBS" | |
| echo "--- Scheduled backups ---" | |
| cat /etc/pve/jobs.cfg 2>/dev/null || info "No jobs.cfg found" | |
| echo "" | |
| echo "--- Cron jobs ---" | |
| crontab -l 2>/dev/null || info "No root crontab" | |
| ls /etc/cron.d/ 2>/dev/null && cat /etc/cron.d/* 2>/dev/null | grep -v "^#\|^$" | |
| # ── 14. OPEN FILES & SYSTEM LIMITS ─────────────────────────── | |
| hdr "14. SYSTEM LIMITS & OPEN FILES" | |
| echo "Open file descriptors: $(cat /proc/sys/fs/file-nr | awk '{print $1"/"$3}')" | |
| echo "Max file descriptors : $(cat /proc/sys/fs/file-max)" | |
| echo "Inotify watches : $(cat /proc/sys/fs/inotify/max_user_watches)" | |
| echo "" | |
| echo "--- Kernel parameters relevant to virtualization ---" | |
| sysctl vm.swappiness vm.dirty_ratio vm.dirty_background_ratio \ | |
| net.core.rmem_max net.core.wmem_max net.ipv4.tcp_rmem net.ipv4.tcp_wmem 2>/dev/null | |
| # ── 15. RECENT CRASH / REBOOT HISTORY ──────────────────────── | |
| hdr "15. REBOOT & CRASH HISTORY" | |
| echo "--- Last 10 reboots ---" | |
| last reboot | head -10 | |
| echo "" | |
| echo "--- OOM events (all boots in journal) ---" | |
| journalctl --no-pager -b -1 2>/dev/null | grep -iE "oom|out of memory|killed process" | tail -20 || info "No OOM events in previous boot" | |
| echo "" | |
| echo "--- Kernel panics / tracebacks ---" | |
| journalctl -k -b -1 --no-pager 2>/dev/null | grep -iE "panic|BUG:|kernel BUG|general protection|segfault" | tail -20 || info "No panics in previous boot" | |
| # ── 16. SUMMARY ─────────────────────────────────────────────── | |
| hdr "16. QUICK SUMMARY — PASTE THIS TO CLAUDE" | |
| echo "Run: grep -E '✗|⚠' audit-output.txt to see all warnings and errors" | |
| echo "" | |
| echo "Critical items to review:" | |
| journalctl -k -b -1 --no-pager 2>/dev/null | grep -c "Hardware Unit Hang" | xargs -I{} echo " e1000e hardware hangs in last boot: {}" | |
| [ "$SWAP_TOTAL" -eq 0 ] 2>/dev/null && echo " ✗ NO SWAP" || echo " ✓ Swap present" | |
| [ "$ALLOC" -gt "$RAM_TOTAL" ] 2>/dev/null && echo " ✗ RAM OVERCOMMITTED" || echo " ✓ RAM allocation OK" | |
| zpool status 2>/dev/null | grep -q "DEGRADED\|FAULTED" && echo " ✗ ZFS POOL DEGRADED" || echo " ✓ ZFS healthy (or not in use)" | |
| echo "" | |
| echo -e "${GRN}${BOLD}Audit complete. Share the full output of audit-output.txt for analysis.${NC}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment