Last active
June 19, 2023 09:40
-
-
Save andy108369/f211bf6c06f2a6e3635b20bdfb9f0fca to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Filename: clean-stale-akash-resources.sh | |
# Author: andrey.arapov@nixaid.com | |
# Version: 1.3 - 19 June 2023 | |
# Source: https://gist.github.com/andy108369/f211bf6c06f2a6e3635b20bdfb9f0fca | |
# Ref. https://docs.akash.network/providers/build-a-cloud-provider/akash-provider-troubleshooting/dangling-deployments | |
# Make sure you have kubectl, akash and provider-services binaries. | |
# | |
# akash https://github.com/ovrclk/akash/releases/latest | |
# provider-services https://github.com/ovrclk/provider-services/releases/latest | |
export AKASH_NODE=https://rpc.akash.forbole.com:443 | |
#export AKASH_NODE=https://akash-rpc.polkachu.com:443 | |
#export AKASH_NODE="http://akash-node-1.akash-services.svc.cluster.local:26657" | |
#export KUBECONFIG=/etc/kubernetes/admin.conf | |
#export AKASH_NODE="http://$(kubectl -n akash-services get ep akash-node-1 -o jsonpath='{.subsets[0].addresses[0].ip}'):26657" | |
## 1) delete orphaned deployments | |
#### | |
# the ones which have lease closed on the blockchain but remain active in the K8s cluster for some reason. | |
md_lid="akash.network/lease.id" | |
kubectl get ns -l akash.network,akash.network/lease.id.provider -o jsonpath='{.items[*].metadata.labels}' | | |
jq --arg md_lid "$md_lid" -r '[."akash.network/namespace", .[$md_lid+".owner"], .[$md_lid+".dseq"], .[$md_lid+".gseq"], .[$md_lid+".oseq"], .[$md_lid+".provider"]] | @tsv' | | |
while read ns owner dseq gseq oseq prov; do | |
state=$(provider-services query market lease get --owner $owner --dseq $dseq --gseq $gseq --oseq $oseq --provider $prov -o json 2>/dev/null | jq -r '.lease.state'); | |
if [[ "$state" == "closed" ]]; then | |
echo kubectl delete ns "$ns" --wait=false; | |
echo kubectl -n lease delete providerhosts --selector="$md_lid.owner=$owner,$md_lid.dseq=$dseq,$md_lid.gseq=$gseq,$md_lid.oseq=$oseq" --wait=false; | |
fi; | |
done | |
## 2) delete stale manifests | |
#### | |
# stale manifests are bad as when `akash-provider` starts, it reads them and starts to monitor their leases, attempting to withdraw from them, causing the following errors: | |
# D[2022-09-15|18:11:29.422] lease is out of fund. sending withdraw module=provider-service cmp=balance-checker lease=akash1ny9darqd92sykjeapnn55w3vgg7vtdw3v3tvp4/7192920/1/1/akash1q7spv2cw06yszgfp4f9ed59lkka6ytn8g4tkjf | |
# E[2022-09-15|18:11:29.439] failed to do lease withdrawal module=provider-service cmp=balance-checker err="rpc error: code = InvalidArgument desc = failed to execute message; message index: 0: payment closed: invalid request" LeaseID=akash1ny9darqd92sykjeapnn55w3vgg7vtdw3v3tvp4/7192920/1/1/akash1q7spv2cw06yszgfp4f9ed59lkka6ytn8g4tkjf | |
md_lid="akash.network/lease.id" | |
kubectl -n lease get manifest -l akash.network,akash.network/lease.id.provider -o jsonpath='{.items[*].metadata.labels}' | | |
jq --arg md_lid "$md_lid" -r '[."akash.network/namespace", .[$md_lid+".owner"], .[$md_lid+".dseq"], .[$md_lid+".gseq"], .[$md_lid+".oseq"], .[$md_lid+".provider"]] | @tsv' | | |
while read ns owner dseq gseq oseq prov; do | |
state=$(provider-services query market lease get --owner $owner --dseq $dseq --gseq $gseq --oseq $oseq --provider $prov -o json 2>/dev/null | jq -r '.lease.state'); | |
## Error: rpc error: code = InvalidArgument desc = invalid lease: lease not found: invalid request | |
## Rare case, if chain has been reset, assume state=closed | |
## [[ $? -eq 0 && -z $state ]] && state=closed | |
if [[ "$state" == "closed" ]]; then | |
echo kubectl delete ns "$ns" --wait=false; | |
echo kubectl -n lease delete manifest $ns | |
fi; | |
done | |
## 3) delete orphaned provider hosts | |
#### | |
# provider hosts which have no namespace deployment parent. | |
DSEQ_NS=$(kubectl get ns -A -l akash.network,akash.network/lease.id.provider -o json | jq -r '.items[].metadata.labels | ."akash.network/lease.id.dseq" // empty' | sort -d | uniq) | |
DSEQ_PROVIDER_HOSTS=$(kubectl -n lease get providerhosts -l akash.network,akash.network/lease.id.provider -o json | jq -r '.items[].metadata.labels | ."akash.network/lease.id.dseq" // empty' | sort -d | uniq) | |
DSEQ_DANGLING="$(comm -13 <(echo "$DSEQ_NS") <(echo "$DSEQ_PROVIDER_HOSTS"))" | |
for i in $DSEQ_DANGLING; do | |
echo kubectl -n lease delete providerhosts.akash.network -l akash.network/lease.id.dseq=$i --wait=false; | |
done | |
## 4) delete orphaned leases | |
#### | |
# active leases without actual deployments | |
PROVIDER="$(kubectl -n akash-services exec -i $(kubectl -n akash-services get pods -l app=akash-provider --output jsonpath='{.items[0].metadata.name}') -- sh -c "echo \$AKASH_FROM")" | |
LEASEDATA="$(provider-services query market lease list --provider $PROVIDER --gseq 0 --oseq 0 --page 1 --limit 10000 --state active -o json)" | |
NSDATA="$(kubectl get ns -o json)" | |
echo "$LEASEDATA" | jq -r '.leases[].lease.lease_id | [.owner, .dseq, .gseq, .oseq, .provider] | @tsv' | while read owner dseq gseq oseq provider; do | |
IS_EMPTY=$(echo "$NSDATA" | jq --arg dseq $dseq --arg oseq $oseq --arg gseq $gseq --arg owner $owner --arg provider $provider -r '.items[] | select(.metadata.labels."akash.network/lease.id.dseq"==$dseq and .metadata.labels."akash.network/lease.id.gseq"==$gseq and .metadata.labels."akash.network/lease.id.oseq"==$oseq and .metadata.labels."akash.network/lease.id.owner"==$owner and .metadata.labels."akash.network/lease.id.provider"==$provider) | length' | wc -l); | |
if [[ $IS_EMPTY -eq 0 ]]; then | |
echo "=== Found orphaned lease ===" | |
#echo kubectl get ns -l akash.network/lease.id.dseq=$dseq,akash.network/lease.id.gseq=$gseq,akash.network/lease.id.oseq=$oseq,akash.network/lease.id.owner=$owner,akash.network/lease.id.provider=$provider | |
#echo kubectl -n lease get manifest -l akash.network/lease.id.dseq=$dseq,akash.network/lease.id.gseq=$gseq,akash.network/lease.id.oseq=$oseq,akash.network/lease.id.owner=$owner,akash.network/lease.id.provider=$provider | |
ns=$(provider-services show-cluster-ns --dseq $dseq --owner $owner --provider $provider) | |
echo kubectl -n $ns get all | |
echo "ACTION: close this lease if you can't find it is really running on your K8s cluster:" | |
echo kubectl -n akash-services exec -i $(kubectl -n akash-services get pods -l app=akash-provider --output jsonpath='{.items[0].metadata.name}') -- bash -c \"provider-services tx market bid close --owner $owner --dseq $dseq --gseq $gseq --oseq $oseq --from $provider\" | |
echo "NOTE: However, executing an action from the provider address will cause the \`account sequence mismatch\` afterwards. Make sure to restart akash-provider service once done with running the \`tx market bid close\` command! Ideally, make sure akash-provider services is stopped first." | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
You can replace
akash
withprovider-services
directly in this script.But you don't need to use the
provider-services
, you can still use theakash
binary directly.You can get it from here https://github.com/ovrclk/akash/releases/tag/v0.18.1