Created
February 23, 2020 13:54
-
-
Save tpsilva/4f555801bb323acb825262bbc684d452 to your computer and use it in GitHub Desktop.
Recover juju machine agents
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
This is a tool for recovering lost machine units in current model | |
Usage: | |
{0} model-name controller-ip dest-dir | |
""" | |
import json | |
import os | |
import shutil | |
import subprocess | |
import sys | |
import tempfile | |
MONGOPASS_CMD = "juju ssh %s \"sudo grep ^apipassword: " \ | |
"/var/lib/juju/agents/machine-0/agent.conf\" | " \ | |
"awk -e '{print $2}'" | |
MONGO_CMD = "juju ssh %s \"mongo --port 37017 --sslAllowInvalidCertificates " \ | |
"--ssl --authenticationDatabase admin -u machine-0 -p %s juju < " \ | |
"/home/ubuntu/%s 2>/dev/null\" | tail -n 2 | head -n 1" | |
def run(cmd, output=True, shell=True): | |
print(cmd) | |
if output: | |
return subprocess.check_output(cmd, shell=shell).strip() | |
return subprocess.call(cmd, stderr=subprocess.PIPE, | |
stdout=subprocess.PIPE, shell=shell) | |
def exec_mongo_query(mongo_password, controller, query): | |
with tempfile.NamedTemporaryFile(mode="w", delete=False) as tmp_file: | |
tmp_file.write(query) | |
run("juju scp {} {}:/home/ubuntu".format(tmp_file.name, controller)) | |
return run(MONGO_CMD % (controller, mongo_password, | |
os.path.basename(tmp_file.name))) | |
def update_machine_password(mongo_password, controller, model, machineid, | |
passwordhash): | |
query = """use juju | |
db.machines.update({"model-uuid": "%s", "machineid": "%s"}, | |
{$set:{"passwordhash": "%s"}}) | |
""" % (model, machineid, passwordhash) | |
exec_mongo_query(mongo_password, controller, query) | |
def get_model_uuid(mongo_password, controller, model): | |
query = """use juju | |
db.models.find({"name": "%s"}, {"modeluuid": 1}) | |
""" % (model) | |
uuid_json = exec_mongo_query(mongo_password, controller, query) | |
uuid = json.loads(uuid_json)['_id'] | |
return uuid | |
def get_donor_password(mongo_password, controller, donor, model_uuid): | |
query = """use juju | |
db.machines.find({"model-uuid": "%s", "machineid": "%s"}, | |
{"passwordhash": 1}) | |
""" % (model_uuid, donor) | |
attributes_json = exec_mongo_query(mongo_password, controller, query) | |
attributes = json.loads(attributes_json) | |
passwordhash = attributes['passwordhash'] | |
return passwordhash | |
def get_machine_nonce(mongo_password, controller, machineid, model_uuid): | |
query = """use juju | |
db.machines.find({"model-uuid": "%s", "machineid": "%s"}, | |
{"nonce": 1}) | |
""" % (model_uuid, machineid) | |
attributes_json = exec_mongo_query(mongo_password, controller, query) | |
attributes = json.loads(attributes_json) | |
nonce = attributes['nonce'] | |
return nonce | |
def recover_machine(mongo_password, controller, machineid, juju_tar, donor, | |
passwordhash, model): | |
print("Recovering machine {}".format(machineid)) | |
# Get machine nonce from mongodb | |
nonce = get_machine_nonce(mongo_password, controller, machineid, model) | |
# Recover files | |
run("juju scp {} {}:/tmp/juju.tar".format( | |
juju_tar, machineid)) | |
run("juju ssh {} 'sudo tar -xvf /tmp/juju.tar -C /var/lib'".format( | |
machineid)) | |
run("juju ssh {} 'sudo mv /var/lib/juju/agents/machine-{} " | |
"/var/lib/juju/agents/machine-{}'".format(machineid, donor, machineid)) | |
run("juju ssh {} 'sudo mv /var/lib/juju/tools/machine-{} " | |
"/var/lib/juju/tools/machine-{}'".format(machineid, donor, machineid)) | |
run("juju ssh {} 'sudo echo {} | sudo tee /var/lib/juju/nonce.txt'".format( | |
machineid, nonce)) | |
# Update files | |
agent_file = "/var/lib/juju/agents/machine-{}/agent.conf".format(machineid) | |
run("juju ssh {} " | |
"'sudo sed -i \"s/tag: machine-{}/tag: machine-{}/g\" {}'".format( | |
machineid, donor, machineid, agent_file)) | |
run("juju ssh {} " | |
"'sudo sed -i \"s/jujud-machine-{}/jujud-machine-{}/g\" {}'".format( | |
machineid, donor, machineid, agent_file)) | |
run("juju ssh {} " | |
"'sudo sed -i \"s/nonce: .*/nonce: {}/g\" {}'".format( | |
machineid, nonce, agent_file)) | |
# Update mongo | |
update_machine_password(mongo_password, controller, model, | |
machineid, passwordhash) | |
# Restart services | |
run("juju ssh {} sudo systemctl restart jujud-machine-{}".format( | |
machineid, machineid)) | |
def get_agent_from_donor(donor, destdir): | |
print("Getting agent from machine {}".format(donor)) | |
# Get dir from donor and remove whatever is not needed | |
run("juju ssh {} 'sudo tar -C /var/lib/ -cvf /tmp/juju.tar juju'".format( | |
donor)) | |
run("juju scp {}:/tmp/juju.tar {}".format(donor, destdir)) | |
run("tar -xvf {}/juju.tar -C {}".format(destdir, destdir)) | |
run("rm -rf {}/juju/agents/unit*".format(destdir)) | |
run("rm -rf {}/juju/meter-status.yaml".format(destdir)) | |
run("rm -rf {}/juju/locks/*".format(destdir)) | |
run("rm -rf {}/juju/tools/unit*".format(destdir)) | |
run("rm -rf {}/juju/metricspool".format(destdir)) | |
run("rm -rf {}/juju/nonce.txt".format(destdir)) | |
run("rm -rf {}/juju.tar".format(destdir)) | |
run("tar -cvf {}/juju.tar -C {} juju".format(destdir, destdir)) | |
run("rm -rf {}/juju".format(destdir)) | |
def main(): | |
model = sys.argv[1] | |
controller = sys.argv[2] | |
destdir = sys.argv[3] | |
if os.path.exists(destdir): | |
shutil.rmtree(destdir) | |
os.mkdir(destdir) | |
run("juju switch {}".format(model)) | |
mongo_password = run(MONGOPASS_CMD % controller) | |
model_uuid = get_model_uuid(mongo_password, controller, model) | |
# Query first healthy unit to select as donor | |
donor = run("juju machines | grep started | head -n 1 | cut -d ' ' -f 1") | |
passwordhash = get_donor_password(mongo_password, controller, donor, | |
model_uuid) | |
get_agent_from_donor(donor, destdir) | |
juju_tar = "{}/juju.tar".format(destdir) | |
machines = run("juju machines | grep down") | |
for machine in machines.split("\n"): | |
machineid = machine.split()[0] | |
recover_machine(mongo_password, controller, machineid, juju_tar, donor, | |
passwordhash, model_uuid) | |
if __name__ == "__main__": | |
if len(sys.argv) != 4: | |
print(__doc__.format(sys.argv[0])) | |
sys.exit(-1) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment