Skip to content

Instantly share code, notes, and snippets.

@tpsilva
Created February 23, 2020 13:54
Show Gist options
  • Save tpsilva/4f555801bb323acb825262bbc684d452 to your computer and use it in GitHub Desktop.
Save tpsilva/4f555801bb323acb825262bbc684d452 to your computer and use it in GitHub Desktop.
Recover juju machine agents
#!/usr/bin/env python
"""
This is a tool for recovering lost machine units in current model
Usage:
{0} model-name controller-ip dest-dir
"""
import json
import os
import shutil
import subprocess
import sys
import tempfile
MONGOPASS_CMD = "juju ssh %s \"sudo grep ^apipassword: " \
"/var/lib/juju/agents/machine-0/agent.conf\" | " \
"awk -e '{print $2}'"
MONGO_CMD = "juju ssh %s \"mongo --port 37017 --sslAllowInvalidCertificates " \
"--ssl --authenticationDatabase admin -u machine-0 -p %s juju < " \
"/home/ubuntu/%s 2>/dev/null\" | tail -n 2 | head -n 1"
def run(cmd, output=True, shell=True):
print(cmd)
if output:
return subprocess.check_output(cmd, shell=shell).strip()
return subprocess.call(cmd, stderr=subprocess.PIPE,
stdout=subprocess.PIPE, shell=shell)
def exec_mongo_query(mongo_password, controller, query):
with tempfile.NamedTemporaryFile(mode="w", delete=False) as tmp_file:
tmp_file.write(query)
run("juju scp {} {}:/home/ubuntu".format(tmp_file.name, controller))
return run(MONGO_CMD % (controller, mongo_password,
os.path.basename(tmp_file.name)))
def update_machine_password(mongo_password, controller, model, machineid,
passwordhash):
query = """use juju
db.machines.update({"model-uuid": "%s", "machineid": "%s"},
{$set:{"passwordhash": "%s"}})
""" % (model, machineid, passwordhash)
exec_mongo_query(mongo_password, controller, query)
def get_model_uuid(mongo_password, controller, model):
query = """use juju
db.models.find({"name": "%s"}, {"modeluuid": 1})
""" % (model)
uuid_json = exec_mongo_query(mongo_password, controller, query)
uuid = json.loads(uuid_json)['_id']
return uuid
def get_donor_password(mongo_password, controller, donor, model_uuid):
query = """use juju
db.machines.find({"model-uuid": "%s", "machineid": "%s"},
{"passwordhash": 1})
""" % (model_uuid, donor)
attributes_json = exec_mongo_query(mongo_password, controller, query)
attributes = json.loads(attributes_json)
passwordhash = attributes['passwordhash']
return passwordhash
def get_machine_nonce(mongo_password, controller, machineid, model_uuid):
query = """use juju
db.machines.find({"model-uuid": "%s", "machineid": "%s"},
{"nonce": 1})
""" % (model_uuid, machineid)
attributes_json = exec_mongo_query(mongo_password, controller, query)
attributes = json.loads(attributes_json)
nonce = attributes['nonce']
return nonce
def recover_machine(mongo_password, controller, machineid, juju_tar, donor,
passwordhash, model):
print("Recovering machine {}".format(machineid))
# Get machine nonce from mongodb
nonce = get_machine_nonce(mongo_password, controller, machineid, model)
# Recover files
run("juju scp {} {}:/tmp/juju.tar".format(
juju_tar, machineid))
run("juju ssh {} 'sudo tar -xvf /tmp/juju.tar -C /var/lib'".format(
machineid))
run("juju ssh {} 'sudo mv /var/lib/juju/agents/machine-{} "
"/var/lib/juju/agents/machine-{}'".format(machineid, donor, machineid))
run("juju ssh {} 'sudo mv /var/lib/juju/tools/machine-{} "
"/var/lib/juju/tools/machine-{}'".format(machineid, donor, machineid))
run("juju ssh {} 'sudo echo {} | sudo tee /var/lib/juju/nonce.txt'".format(
machineid, nonce))
# Update files
agent_file = "/var/lib/juju/agents/machine-{}/agent.conf".format(machineid)
run("juju ssh {} "
"'sudo sed -i \"s/tag: machine-{}/tag: machine-{}/g\" {}'".format(
machineid, donor, machineid, agent_file))
run("juju ssh {} "
"'sudo sed -i \"s/jujud-machine-{}/jujud-machine-{}/g\" {}'".format(
machineid, donor, machineid, agent_file))
run("juju ssh {} "
"'sudo sed -i \"s/nonce: .*/nonce: {}/g\" {}'".format(
machineid, nonce, agent_file))
# Update mongo
update_machine_password(mongo_password, controller, model,
machineid, passwordhash)
# Restart services
run("juju ssh {} sudo systemctl restart jujud-machine-{}".format(
machineid, machineid))
def get_agent_from_donor(donor, destdir):
print("Getting agent from machine {}".format(donor))
# Get dir from donor and remove whatever is not needed
run("juju ssh {} 'sudo tar -C /var/lib/ -cvf /tmp/juju.tar juju'".format(
donor))
run("juju scp {}:/tmp/juju.tar {}".format(donor, destdir))
run("tar -xvf {}/juju.tar -C {}".format(destdir, destdir))
run("rm -rf {}/juju/agents/unit*".format(destdir))
run("rm -rf {}/juju/meter-status.yaml".format(destdir))
run("rm -rf {}/juju/locks/*".format(destdir))
run("rm -rf {}/juju/tools/unit*".format(destdir))
run("rm -rf {}/juju/metricspool".format(destdir))
run("rm -rf {}/juju/nonce.txt".format(destdir))
run("rm -rf {}/juju.tar".format(destdir))
run("tar -cvf {}/juju.tar -C {} juju".format(destdir, destdir))
run("rm -rf {}/juju".format(destdir))
def main():
model = sys.argv[1]
controller = sys.argv[2]
destdir = sys.argv[3]
if os.path.exists(destdir):
shutil.rmtree(destdir)
os.mkdir(destdir)
run("juju switch {}".format(model))
mongo_password = run(MONGOPASS_CMD % controller)
model_uuid = get_model_uuid(mongo_password, controller, model)
# Query first healthy unit to select as donor
donor = run("juju machines | grep started | head -n 1 | cut -d ' ' -f 1")
passwordhash = get_donor_password(mongo_password, controller, donor,
model_uuid)
get_agent_from_donor(donor, destdir)
juju_tar = "{}/juju.tar".format(destdir)
machines = run("juju machines | grep down")
for machine in machines.split("\n"):
machineid = machine.split()[0]
recover_machine(mongo_password, controller, machineid, juju_tar, donor,
passwordhash, model_uuid)
if __name__ == "__main__":
if len(sys.argv) != 4:
print(__doc__.format(sys.argv[0]))
sys.exit(-1)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment