Skip to content

Instantly share code, notes, and snippets.

@roylez
Last active May 31, 2022 05:30
Show Gist options
  • Save roylez/a2f61802206d3ab7905f81254651d428 to your computer and use it in GitHub Desktop.
Save roylez/a2f61802206d3ab7905f81254651d428 to your computer and use it in GitHub Desktop.
#!/usr/bin/gawk -f
#
# Usage: ./rabbit-tell.awk <rabbitmqctl_report>
#
# Common RabbitMQ issues that this script can be used to identify
#
# - Partitioned cluster
#
# - High binary/queue memory usage. There are mainly two causes of this:
#
# + queue depth build up (some clients are disconnected)
# + rabbit does not do periodic garbage collection by default
#
# - High fd/socket usage. This is usually caused by excessive connections that can be analyzed
# together with connection breakdown
#
# - Unbalanced queue master count on nodes. Usually this means "queue_master_locator"(only in 3.6
# and newer) should be tuned.
#
function round(x) { return sprintf("%.2f", x) }
function format_status_line(label, keys, array, color)
{
if (color) { printf "\033[%s;1m", color }
printf "%-15s", label
for(i in keys) { printf "%23s", array[i] }
if (color) { printf "%s", "\033[m" }
print ""
}
function humanize(x)
{
split("B KB MB GB TB PB",type)
y=0
for(i=5;y<1;i--) y = x / (2**(10*i))
return round(y) type[i+2]
}
function colorize(s, color) { return "\033[" color "m" s "\033[m" }
function print_title(s) { printf "\n\033[4;1m%s\033[m\n\n", toupper(s) }
# remove color junk for 3.8
{ gsub(/\x1B\[[0-9;]*[mK]/,"") }
/^Status of node/ {
section="node"
gsub(/'/, "")
node=substr($4, 8)
nodes[node]=node
if ( /\.\.\.$/ ) { ver=38 }
else { ver=36 }
}
# 36 format parser {{{
ver==36 && /^$/ { section=0 }
ver==36 && section=="node" && /rabbit,"RabbitMQ",/ {
match($0, /,"([0-9]+\.[0-9]+\.[0-9]+)/, res)
version[node]=res[1]
}
ver==36 && section=="node" && /(total|binary|vm_memory_limit|queue_procs|queue_slave_procs|processes|uptime),/ {
match($0, /,([0-9]+)/, res)
if ( /total/ ) mem_total[node]=res[1]
if ( /binary/ ) mem_binary[node]=res[1]
if ( /vm_memory_limit/ ) mem_limit[node]=res[1]
if ( /queue_procs|queue_slave_procs/ ) mem_queue[node]+=res[1]
if ( /uptime/ ) uptime[node]=res[1]
if ( /processes/ ) {
match($0, /used,([0-9]+)/, res)
process_used[node]=res[1]
}
}
ver==36 && section=="node" && /(total_limit|total_used|sockets_limit|sockets_used),/ {
match($0, /,([0-9]+)/, res)
if ( /total_limit/ ) fd_total[node]=res[1]
if ( /total_used/ ) fd_used[node]=res[1]
if ( /sockets_limit/ ) sockets_limit[node]=res[1]
if ( /sockets_used/ ) sockets_used[node]=res[1]
}
ver==36 && /^Cluster status of node/ {
gsub(/'/, "")
section="cluster"
node=substr($5, 8)
}
ver==36 && section=="cluster" && /partitions,/ {
partitioned[node] = $0 ~ /partitions,\[\]/ ? "NO" : "YES"
}
ver==36 && /^Connections:/ {
FS="\t"
section="connection"
}
ver==36 && section=="connection" && /rabbit/ {
match($22, /connection_name","([^:]*):/, res)
c=res[1]
user[$17]++
uc[c]=$17
client[c]++
}
ver==36 && /^Channels:/ {
section="channel"
}
ver==36 && /^Queues on/ {
match($0, /Queues on (.+):/, res)
vhost=res[1]
section="queue"
}
ver==36 && section=="queue" && /\./ {
match($1, /@([^.]+)\./, res)
queue_master[res[1]]++
}
ver==36 && section=="queue" && $10 && $10 ~ /[0-9]+/ {
queue_vhost[$2]=vhost
queue_messages[$2]=$10
queue_consumers[$2]=$15
}
ver==36 && /^Exchanges on/ {
section="exchange"
}
# }}}
# 3.8 formattings {{{
ver==38 && /\.\.\.$/ { section=0 }
ver==38 && /^Listing connections/ {
FS="\t"
section="connection"
next
}
ver==38 && section=="connection" && NF {
if ( $1 == "pid" ) next
match($24, /connection_name","([^:]*):/, res)
c=res[1]
user[$19]++
uc[c]=$19
client[c]++
if (! $19 ) { print }
}
ver==38 && /^Listing queues for/ {
match($0, /^Listing queues for vhost (.+) /, res)
vhost=res[1]
section="queue"
next
}
ver==38 && section=="queue" && /\./ {
if ( $1 == "name" ) next
match($6, /@([^.]+)\./, res)
queue_master[res[1]]++
}
ver==38 && section=="queue" && $13 && $13 ~ /[0-9]+/ {
queue_vhost[$1]=vhost
queue_messages[$1]=$13
queue_consumers[$1]=$26
}
# }}}
# outputs {{{
END {
if (ver==36) {
print_title("cluster nodes")
format_status_line("node", nodes, nodes)
format_status_line("version", nodes, version)
format_status_line("uptime", nodes, uptime)
for(n in nodes) { m_limit[n] = humanize(mem_limit[n]) }
format_status_line("mem_limit", nodes, m_limit)
for(n in nodes) {
percent = round(mem_total[n]/mem_limit[n]*100)
if (int(percent) > 80) { m_total_color=31 }
m_total[n]="(" percent "%) " humanize(mem_total[n])
}
format_status_line("mem_total", nodes, m_total, m_total_color)
for(n in nodes) {
percent = round(mem_binary[n]/mem_total[n]*100)
if (int(percent) > 50) { m_binary_color=31 }
m_binary[n]="(" percent "%) " humanize(mem_binary[n])
}
format_status_line("mem_binary", nodes, m_binary, m_binary_color)
for(n in nodes) {
percent = round(mem_queue[n]/mem_total[n]*100)
if (int(percent) > 50) { m_queue_color=31 }
m_binary[n]="(" percent "%) " humanize(mem_queue[n])
}
format_status_line("mem_queue", nodes, m_binary, m_queue_color)
for(n in nodes) {
percent = round(fd_used[n]/fd_total[n]*100)
if (int(percent) > 50) { fd_color=31 }
fd[n] = "(" percent "%) " fd_used[n]
}
format_status_line("fd", nodes, fd, fd_color)
for(n in nodes) {
percent = round(sockets_used[n]/sockets_limit[n]*100)
if (int(percent) > 50) { sockets_color=31 }
sockets[n] = "(" percent "%) " sockets_used[n]
}
format_status_line("sockets", nodes, sockets, sockets_color)
for(n in nodes) {
percent = round(process_used[n]/1048576*100)
if (int(percent) > 50) { process_color=31 }
process[n] = "(" percent "%) " process_used[n]
}
format_status_line("process_used", nodes, process, process_color)
for(n in nodes) {
if ( partitioned[n] == "YES" ) { partition_color=31 }
}
format_status_line("partitioned", nodes, partitioned, partition_color)
}
print_title("connections breakdown by user/client")
for(u in user) {
print u, user[u]": "
for(c in client) { if(uc[c]==u) {print "- "c,client[c]}}
}
print_title("queues with messages")
if (length(queue_messages)) {
printf "%-15s%45s%10s%10s\n", "vhost", "queue", "messages", "consumers"
for(q in queue_messages) {
printf "%-15s%45s%10s%10s\n", queue_vhost[q], q, queue_messages[q], queue_consumers[q]
}
}
print_title("queue master count by node")
for(n in queue_master) { print n ": \t" queue_master[n] }
}
# }}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment