Skip to content

Instantly share code, notes, and snippets.

@apconole
Created April 23, 2024 15:06
Show Gist options
  • Save apconole/c8e84af55f6962dee29abf6bb4debaee to your computer and use it in GitHub Desktop.
Save apconole/c8e84af55f6962dee29abf6bb4debaee to your computer and use it in GitHub Desktop.
#!/bin/bash
BASE_COLLECTION_PATH="must-gather"
OVS_DUMP_PATH=${OUT:-"${BASE_COLLECTION_PATH}/openvswitch"}
mkdir -p "${NETWORK_LOG_PATH}"/
GLOBAL_OVS_FILE_COPY=(
"/run/openvswitch/ovsdb-server.pid"
"/run/openvswitch/ovs-vswitchd.pid"
"/run/openvswitch/ovs-monitor-ipsec.pid"
"/var/log/openvswitch/*"
"/usr/local/var/log/openvswitch/*"
)
function debug_node_gather() {
PIDS+=($!)
cat <<EOF | oc debug --to-namespace="default" node/"$1" > "${NODE_DUMP_PATH}"/debug_node.log
echo "chroot to host"
chroot /host
echo "=== dbclient list ==="
ovsdb-client -f list dump
echo "=== ovs-vsctl -t 5 show ==="
ovs-vsctl -t 5 show
echo "List the contents of important runtime directories"
ls -laZ /run/openvswitch
ls -laZ /dev/hugepages/
ls -laZ /dev/vfio
ls -laZ /var/lib/vhost_sockets
echo "=== Capture a list of all bond devices ==="
ovs-appctl bond/list
echo "=== Capture more details from bond devices ==="
ovs-appctl bond/show
echo "=== Capture LACP details ==="
ovs-appctl lacp/show
ovs-appctl lacp/show-stats
echo "=== Capture coverage stats" ==="
ovs-appctl coverage/show
echo "=== Capture cached routes ==="
ovs-appctl ovs/route/show
echo "=== Capture tnl arp table" ==="
ovs-appctl tnl/arp/show
echo "=== Capture a list of listening ports" ==="
ovs-appctl tnl/ports/show -v
echo "=== Capture upcall information ==="
ovs-appctl upcall/show
echo "=== Capture DPDK and other parameters ==="
ovs-vsctl -t 5 get Open_vSwitch . other_config
echo "=== Capture OVS list ==="
ovs-vsctl -t 5 list Open_vSwitch
echo "=== Capture OVS interface list ==="
ovs-vsctl -t 5 list interface
echo "=== Capture OVS detailed information from all the bridges ==="
ovs-vsctl -t 5 list bridge
echo "=== Capture DPDK queue to pmd mapping ==="
ovs-appctl dpif-netdev/pmd-rxq-show
echo "=== Capture DPDK pmd stats ==="
ovs-appctl dpif-netdev/pmd-stats-show
echo "=== Capture DPDK pmd performance counters ==="
ovs-appctl dpif-netdev/pmd-perf-show
echo "=== Capture ofproto tunnel configs ==="
ovs-appctl ofproto/list-tunnels
echo "=== Capture ipsec tunnel information ==="
ovs-appctl -t ovs-monitor-ipsec tunnels/show
ovs-appctl -t ovs-monitor-ipsec xfrm/state
ovs-appctl -t ovs-monitor-ipsec xfrm/policies
echo "=== capture dps ==="
ovs-appctl dpctl/dump-dps
echo "=== capture dp-info ==="
echo "-- dpctl/show ovs-system --"
ovs-appctl dpctl/show -s ovs-system
echo "-- ovs-appctl dpctl/dump-flows --"
ovs-appctl dpctl/dump-flows -m ovs-system
echo "-- ovs-appctl dpctl/dump-conntrack --"
ovs-appctl dpctl/dump-conntrack -m ovs-system
sleep 300
EOF
}
function get_ovs_data_off_node() {
local debugPod=""
# Add this process to the PIDs variable
PIDS+=($!)
#Get debug pod's name
debugPod=$(oc debug --to-namespace="default" node/"$1" -o jsonpath='{.metadata.name}')
NODE_DUMP_PATH="${OVS_DUMP_PATH}"/"$1"
mkdir "${NODE_DUMP_PATH}"
#Start Debug pod force it to stay up until removed in "default" namespace
# Here we will create a file which contains all the data dumps
debug_node_gather "${NODE_DUMP_PATH}"/debug_node.log
#Mimic a normal oc call, i.e pause between two successive calls to allow pod to register
sleep 2
oc wait -n "default" --for=condition=Ready pod/"$debugPod" --timeout=30s
if [ -z "$debugPod" ]; then
echo "Debug pod for node ""$1"" never activated" > "${NODE_DUMP_PATH}"/debug_node.log
else
# copy relevant files
for file in ${GLOBAL_OVS_FILE_COPY}; do
mkdir -p "${NODE_DUMP_PATH}"$(dirname "${file}")
oc cp --loglevel 1 -n "default" "$debugPod":/host"$file" "${NODE_DUMP_PATH}""$file" > /dev/null 2>&1
done
#clean up debug pod after we are done using them
oc delete pod "$debugPod" -n "default"
fi
}
function gather_ovs_data() {
for NODE in ${NODES}; do
get_ovs_data_off_node "${NODE}" &
done
}
if [ $# -eq 0 ]; then
echo "WARNING: ovs: Collecting ovs data from ALL nodes in your cluster."
echo "WARNING: ovs: This can take a bit of time."
fi
PIDS=()
NODES="${*:-$(oc get nodes -o jsonpath='{.items[?(@.status.nodeInfo.operatingSystem=="linux")].metadata.name}')}"
gather_ovs_data
echo "INFO: Waiting for node core dump collection to complete ..."
wait "${PIDS[@]}"
echo "INFO: Node core dump collection to complete."
sync
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment