Last active
June 19, 2023 09:40
-
-
Save andy108369/f211bf6c06f2a6e3635b20bdfb9f0fca to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Filename: clean-stale-akash-resources.sh | |
# Author: andrey.arapov@nixaid.com | |
# Version: 1.3 - 19 June 2023 | |
# Source: https://gist.github.com/andy108369/f211bf6c06f2a6e3635b20bdfb9f0fca | |
# Ref. https://docs.akash.network/providers/build-a-cloud-provider/akash-provider-troubleshooting/dangling-deployments | |
# Make sure you have kubectl, akash and provider-services binaries. | |
# | |
# akash https://github.com/ovrclk/akash/releases/latest | |
# provider-services https://github.com/ovrclk/provider-services/releases/latest | |
export AKASH_NODE=https://rpc.akash.forbole.com:443 | |
#export AKASH_NODE=https://akash-rpc.polkachu.com:443 | |
#export AKASH_NODE="http://akash-node-1.akash-services.svc.cluster.local:26657" | |
#export KUBECONFIG=/etc/kubernetes/admin.conf | |
#export AKASH_NODE="http://$(kubectl -n akash-services get ep akash-node-1 -o jsonpath='{.subsets[0].addresses[0].ip}'):26657" | |
## 1) delete orphaned deployments | |
#### | |
# the ones which have lease closed on the blockchain but remain active in the K8s cluster for some reason. | |
md_lid="akash.network/lease.id" | |
kubectl get ns -l akash.network,akash.network/lease.id.provider -o jsonpath='{.items[*].metadata.labels}' | | |
jq --arg md_lid "$md_lid" -r '[."akash.network/namespace", .[$md_lid+".owner"], .[$md_lid+".dseq"], .[$md_lid+".gseq"], .[$md_lid+".oseq"], .[$md_lid+".provider"]] | @tsv' | | |
while read ns owner dseq gseq oseq prov; do | |
state=$(provider-services query market lease get --owner $owner --dseq $dseq --gseq $gseq --oseq $oseq --provider $prov -o json 2>/dev/null | jq -r '.lease.state'); | |
if [[ "$state" == "closed" ]]; then | |
echo kubectl delete ns "$ns" --wait=false; | |
echo kubectl -n lease delete providerhosts --selector="$md_lid.owner=$owner,$md_lid.dseq=$dseq,$md_lid.gseq=$gseq,$md_lid.oseq=$oseq" --wait=false; | |
fi; | |
done | |
## 2) delete stale manifests | |
#### | |
# stale manifests are bad as when `akash-provider` starts, it reads them and starts to monitor their leases, attempting to withdraw from them, causing the following errors: | |
# D[2022-09-15|18:11:29.422] lease is out of fund. sending withdraw module=provider-service cmp=balance-checker lease=akash1ny9darqd92sykjeapnn55w3vgg7vtdw3v3tvp4/7192920/1/1/akash1q7spv2cw06yszgfp4f9ed59lkka6ytn8g4tkjf | |
# E[2022-09-15|18:11:29.439] failed to do lease withdrawal module=provider-service cmp=balance-checker err="rpc error: code = InvalidArgument desc = failed to execute message; message index: 0: payment closed: invalid request" LeaseID=akash1ny9darqd92sykjeapnn55w3vgg7vtdw3v3tvp4/7192920/1/1/akash1q7spv2cw06yszgfp4f9ed59lkka6ytn8g4tkjf | |
md_lid="akash.network/lease.id" | |
kubectl -n lease get manifest -l akash.network,akash.network/lease.id.provider -o jsonpath='{.items[*].metadata.labels}' | | |
jq --arg md_lid "$md_lid" -r '[."akash.network/namespace", .[$md_lid+".owner"], .[$md_lid+".dseq"], .[$md_lid+".gseq"], .[$md_lid+".oseq"], .[$md_lid+".provider"]] | @tsv' | | |
while read ns owner dseq gseq oseq prov; do | |
state=$(provider-services query market lease get --owner $owner --dseq $dseq --gseq $gseq --oseq $oseq --provider $prov -o json 2>/dev/null | jq -r '.lease.state'); | |
## Error: rpc error: code = InvalidArgument desc = invalid lease: lease not found: invalid request | |
## Rare case, if chain has been reset, assume state=closed | |
## [[ $? -eq 0 && -z $state ]] && state=closed | |
if [[ "$state" == "closed" ]]; then | |
echo kubectl delete ns "$ns" --wait=false; | |
echo kubectl -n lease delete manifest $ns | |
fi; | |
done | |
## 3) delete orphaned provider hosts | |
#### | |
# provider hosts which have no namespace deployment parent. | |
DSEQ_NS=$(kubectl get ns -A -l akash.network,akash.network/lease.id.provider -o json | jq -r '.items[].metadata.labels | ."akash.network/lease.id.dseq" // empty' | sort -d | uniq) | |
DSEQ_PROVIDER_HOSTS=$(kubectl -n lease get providerhosts -l akash.network,akash.network/lease.id.provider -o json | jq -r '.items[].metadata.labels | ."akash.network/lease.id.dseq" // empty' | sort -d | uniq) | |
DSEQ_DANGLING="$(comm -13 <(echo "$DSEQ_NS") <(echo "$DSEQ_PROVIDER_HOSTS"))" | |
for i in $DSEQ_DANGLING; do | |
echo kubectl -n lease delete providerhosts.akash.network -l akash.network/lease.id.dseq=$i --wait=false; | |
done | |
## 4) delete orphaned leases | |
#### | |
# active leases without actual deployments | |
PROVIDER="$(kubectl -n akash-services exec -i $(kubectl -n akash-services get pods -l app=akash-provider --output jsonpath='{.items[0].metadata.name}') -- sh -c "echo \$AKASH_FROM")" | |
LEASEDATA="$(provider-services query market lease list --provider $PROVIDER --gseq 0 --oseq 0 --page 1 --limit 10000 --state active -o json)" | |
NSDATA="$(kubectl get ns -o json)" | |
echo "$LEASEDATA" | jq -r '.leases[].lease.lease_id | [.owner, .dseq, .gseq, .oseq, .provider] | @tsv' | while read owner dseq gseq oseq provider; do | |
IS_EMPTY=$(echo "$NSDATA" | jq --arg dseq $dseq --arg oseq $oseq --arg gseq $gseq --arg owner $owner --arg provider $provider -r '.items[] | select(.metadata.labels."akash.network/lease.id.dseq"==$dseq and .metadata.labels."akash.network/lease.id.gseq"==$gseq and .metadata.labels."akash.network/lease.id.oseq"==$oseq and .metadata.labels."akash.network/lease.id.owner"==$owner and .metadata.labels."akash.network/lease.id.provider"==$provider) | length' | wc -l); | |
if [[ $IS_EMPTY -eq 0 ]]; then | |
echo "=== Found orphaned lease ===" | |
#echo kubectl get ns -l akash.network/lease.id.dseq=$dseq,akash.network/lease.id.gseq=$gseq,akash.network/lease.id.oseq=$oseq,akash.network/lease.id.owner=$owner,akash.network/lease.id.provider=$provider | |
#echo kubectl -n lease get manifest -l akash.network/lease.id.dseq=$dseq,akash.network/lease.id.gseq=$gseq,akash.network/lease.id.oseq=$oseq,akash.network/lease.id.owner=$owner,akash.network/lease.id.provider=$provider | |
ns=$(provider-services show-cluster-ns --dseq $dseq --owner $owner --provider $provider) | |
echo kubectl -n $ns get all | |
echo "ACTION: close this lease if you can't find it is really running on your K8s cluster:" | |
echo kubectl -n akash-services exec -i $(kubectl -n akash-services get pods -l app=akash-provider --output jsonpath='{.items[0].metadata.name}') -- bash -c \"provider-services tx market bid close --owner $owner --dseq $dseq --gseq $gseq --oseq $oseq --from $provider\" | |
echo "NOTE: However, executing an action from the provider address will cause the \`account sequence mismatch\` afterwards. Make sure to restart akash-provider service once done with running the \`tx market bid close\` command! Ideally, make sure akash-provider services is stopped first." | |
fi | |
done |
@andy108369 Line 76: it appears the akash cli has been transitioned into the provider-services cli. Can you update this line? Or, provide me instructions to install it. Thank you so much for the help here and I really love this script!
You can replace akash
with provider-services
directly in this script.
But you don't need to use the provider-services
, you can still use the akash
binary directly.
You can get it from here https://github.com/ovrclk/akash/releases/tag/v0.18.1
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@andy108369 Line 76: it appears the akash cli has been transitioned into the provider-services cli. Can you update this line? Or, provide me instructions to install it. Thank you so much for the help here and I really love this script!