Created
April 12, 2024 01:46
-
-
Save huaigu/9b26506321fa7871d82d2a8df4e8e220 to your computer and use it in GitHub Desktop.
Kuzco Maintain
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Author: https://raw.githubusercontent.com/DreamGallery/Kuzco-maintenance/main/kz-maint.sh | |
# Directory check | |
[[ -d kzlog ]] || mkdir kzlog | |
# Load Discord webhook URL from .env file | |
if [[ -f .env ]]; then | |
export $(cat .env | xargs) | |
fi | |
send_discord_notification() { | |
if [[ -z "$DISCORD_WEBHOOK_URL" ]]; then | |
echo "$(date "+%Y-%m-%d %H:%M:%S") Discord webhook URL not set. Skipping notification." | |
return 0 | |
fi | |
message="$1" | |
curl -H "Content-Type: application/json" -X POST -d "{\"content\":\"$message\"}" "$DISCORD_WEBHOOK_URL" | |
} | |
while true; do | |
while true; do | |
pidlist=$(ps -ef | grep kuzco | grep -v grep | awk '{print $2}') | |
if [[ -n "$pidlist" ]]; then | |
echo "$(date "+%Y-%m-%d %H:%M:%S") Kuzco task pids: $(echo ${pidlist[@]} | tr '\n' ' ')" | |
for pid in $pidlist; do | |
sudo kill -9 $pid | |
echo "$(date "+%Y-%m-%d %H:%M:%S") Killing process with PID: $pid" | |
done | |
fi | |
portpidlist=$(sudo lsof -i :14444 |grep -v "PID" | awk '{print $2}'|uniq) | |
if [[ -n "$portpidlist" ]]; then | |
echo "$(date "+%Y-%m-%d %H:%M:%S") Waiting for port 14444 to be released" | |
sleep 10 | |
else | |
break | |
fi | |
done | |
echo "$(date "+%Y-%m-%d %H:%M:%S") Starting..." | |
sudo kuzco worker start >> kz-worker.log 2>&1 & | |
send_discord_notification "$(date "+%Y-%m-%d %H:%M:%S") Starting..." | |
runtime=0 | |
while true; do | |
if [[ `grep -c "TimeoutError: The operation timed out" kz-worker.log` -ne '0' ]]; then | |
cp kz-worker.log kzlog/kz-worker-$(date +%Y%m%d%H%M%S).log | |
cat /dev/null > kz-worker.log | |
error_message="$(date "+%Y-%m-%d %H:%M:%S") TimeoutError, preparing to restart..." | |
send_discord_notification "$error_message" | |
echo "$error_message" | |
break | |
elif [[ `grep -c "CUDA error: " kz-worker.log` -ne '0' ]]; then | |
sleep 10 | |
cp kz-worker.log kzlog/kz-worker-$(date +%Y%m%d%H%M%S).log | |
cat /dev/null > kz-worker.log | |
error_message="$(date "+%Y-%m-%d %H:%M:%S") CUDA error, preparing to restart..." | |
send_discord_notification "$error_message" | |
echo "$error_message" | |
break | |
else | |
sleep 1 | |
runtime=$(($runtime+1)) | |
echo "$(date "+%Y-%m-%d %H:%M:%S") Running for ${runtime}s" | |
if [[ "$runtime" -ge '1800' ]]; then | |
error_message="$(date "+%Y-%m-%d %H:%M:%S") Restarting scheduledly..." | |
send_discord_notification "$error_message" | |
echo "$error_message" | |
break | |
fi | |
fi | |
done | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment