-
-
Save grishatsuker/f3a749fc4759034b1a3d2df6271049a3 to your computer and use it in GitHub Desktop.
Script to move control services to control nodes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
import os | |
import sys | |
import subprocess | |
import waiting | |
import json | |
from datetime import datetime, timedelta | |
from time import sleep | |
import retrying | |
from strato_common import credentials | |
from strato_common import admin_creds | |
from vm_manager_client import client as vm_client_module | |
from neutronclient.neutron import client as neutron_client_module | |
from strato_kv.clustermanagement import clustermanagementapi | |
from strato_kv.consulutils.consulkeyvaluestoreclient import ConsulKeyValueStoreClient | |
MAINTENANCE_MODE_COOLDOWN_TIME = 2 | |
COOLDOWN_TIME = 10 | |
TIMEOUT_FOR_SERVICE_HEALTH_CHECK = 300 | |
INTERVAL_FOR_SERVICE_HEALTH_CHECK = 10 | |
CONTROL_NODES = ["stratonode0.node.strato", "stratonode2.node.strato", "stratonode1.node.strato"] | |
openstack_services = [ | |
'openstack-nova-api', | |
'openstack-nova-scheduler', | |
'openstack-nova-conductor', | |
'neutron-server', | |
'neutron-rpc-server', | |
'openstack-keystone', | |
] | |
compute_services = ['neutron-openvswitch-agent', 'neutron-ovs-cleanup', 'neutron-metering-agent', 'multipathd', | |
'snapshot-manager-worker', 'servicesgw', 'neutron-l3-agent', 'nrad', 'neutron-dhcp-agent', | |
'strato-filebeat', 'neutron-metadata-agent', 'openstack-nova-compute'] | |
SERVICES_WITH_NODE_TYPE = [ | |
'opa-policy-manager', | |
'acm-api', | |
'alarms-engine', | |
'alarms-service', | |
'api-explorer', | |
'app-catalog-periodic-tasks', | |
'app-catalog', | |
'app-catalog-worker', | |
'asg-api', | |
'autoscaling-groups', | |
'autoscaling-groups-worker', | |
'aws-auth', | |
'cassandra-engine', | |
'certificate-manager-api', | |
'cloudwatch-api', | |
'cloudwatch-backend-api', | |
'cloudwatch-backend-worker', | |
'conversions', | |
'conversions-worker', | |
'credit-manager', | |
'crs-manager-api', | |
'dbc-manager-api', | |
'dbs-manager', | |
'docker-registry', | |
'ec2-compute', | |
'elb-api', | |
'emr-api', | |
'engine-manager-api', | |
'engine-manager-worker', | |
'events-service', | |
'external-endpoint-manager', | |
'galeramariadb-engine', | |
'gargantua', | |
'gcm', | |
'grafana', | |
'guestnet-admin-tool-api', | |
'guestnet-admin-tool-beat', | |
'guestnet-admin-tool-worker', | |
'hot-upgrade', | |
'hot-upgrade-worker', | |
'http-proxy-service', | |
'iam', | |
'identity-manager', | |
'image-manager-api', | |
'image-manager-worker', | |
# 'influxdb': ['control', 2], | |
'inspector-api', | |
'inspector-worker', | |
'jaeger', | |
'kafka-engine', | |
'kubernetes-manager', | |
'kubernetes-worker', | |
'lbaas-manager', | |
'lbaas-worker', | |
'logserver', | |
'maestro-auth', | |
'maestro-data-reporter', | |
'maestro-events-reporter', | |
'maestro-gotty', | |
'maestro-tunnel-client', | |
'mancala-dr', | |
'mancala-externalstorage', | |
'mapreduce-api', | |
'melet-api', | |
'metrics-service', | |
'mongodb-engine', | |
'mssql-engine', | |
# 'mysql': ['control', 3], | |
'net-metrics-collector-worker', | |
'neutron-db-init', | |
#'neutron-rpc-server': ['control', 3], | |
#'neutron-server': ['control', 3], | |
'nfs-manager-api', | |
'ntpd-server', | |
'oauth2-proxy', | |
'oort', | |
'openotp-ldap-bridge', | |
'openstack-cinder-api', | |
'openstack-cinder-scheduler', | |
'openstack-cinder-volume', | |
# 'openstack-keystone': ['control', 3], | |
# 'openstack-nova-api': ['control', 3], | |
'openstack-nova-cert', | |
# 'openstack-nova-conductor': ['control', 3], | |
'openstack-nova-consoleauth', | |
'openstack-nova-novncproxy', | |
# 'openstack-nova-scheduler': ['control', 3], | |
'placementapi', | |
'placement', | |
# 'policy-enforcer': ['control', 3], | |
'policy-store', | |
'protection-scheduler-api', | |
'protection-scheduler-worker', | |
'quotas-manager', | |
# 'rack-storage-mgr': ['control', 3], | |
# 'rack-storage-monitor': ['control', 3], | |
# 'rack-storage-radosgw': ['control', 3], | |
'rds-api', | |
'redis-cache', | |
'redis-engine', | |
'region', | |
'resource-tracker', | |
'route53', | |
's3-manager-api', | |
's3-manager-worker', | |
's3-scality', | |
's3-vault', | |
'scality-engine', | |
'service-provisioner', | |
'services-metrics-collector', | |
'snapshot-manager', | |
'sns-api', | |
'sns-backend', | |
'sqs-engine', | |
'sqs-service-api', | |
'strato-kapacitor', | |
'stratonet-frontend', | |
'stratonet-garbagecollector', | |
'stratonet-ipam', | |
'ui-backend', | |
'ui-console', | |
'updatemanagerapi', | |
'vault-manager', | |
'vault', | |
'virtual-api2', | |
'virtual-dr', | |
'virtual-installation', | |
'virtual-maestro', | |
'virtual-nb', | |
'virtual-region', | |
'virtual-servicesgw', | |
'vm-manager', | |
'vm-manager-worker', | |
'vms-monitor', | |
'volumehealth', | |
'volume-manager', | |
'vpc-backend-api', | |
'vpc-backend-periodic-tasks', | |
'vpc-backend-worker', | |
] | |
def is_service_healthy(service, to_node): | |
command = ( | |
"dig {service}.service.strato | grep 'status: NOERROR' && " | |
"consul catalog services -node {to_node} | grep '{service}' ".format( | |
service=service, to_node=to_node | |
) | |
) | |
print("Running cmd: {}".format(command)) | |
try: | |
res = os.system(command) | |
return res == 0 | |
except: | |
return False | |
def flip_placement_map(placement_map): | |
'''Flips a nested dict inside out | |
takes {hostname: {service: state}}, returns {service: [hostnames]} | |
''' | |
res = {} | |
remove_empty_values_in_dict(placement_map) | |
for hostname in placement_map: | |
for service in placement_map[hostname]: | |
res.setdefault(service, []).append(hostname) | |
return res | |
@retrying.retry(stop_max_attempt_number=3, wait_fixed=5000) | |
def move_service_with_retry(service, from_node, to_node): | |
'''Move a service to a node, retrying if it fails''' | |
command = 'inspector tools cm move-service {service} {from_node} {to_node} -q'.format( | |
service=service, from_node=from_node, to_node=to_node | |
) | |
print("Running CMD: {}".format(command)) | |
os.system('timeout 45s {}'.format(command)) | |
def progress_bar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='█'): | |
percent = ('{0:.' + str(decimals) + 'f}').format(100 * (iteration / float(total))) | |
filled_length = int(length * iteration // total) | |
bar = fill * filled_length + '-' * (length - filled_length) | |
sys.stdout.write('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix)) | |
sys.stdout.flush() | |
if iteration == total: | |
sys.stdout.write('\n') | |
def print_moving_services_table(data): | |
print('{:<30} {:<30} {:<30}'.format('Service', 'From Node', 'To Node')) | |
for service_name, node_move_mappings in data.iteritems(): | |
for node_move_mapping in node_move_mappings: | |
to_node, from_node = node_move_mapping.values() | |
print('{:<30} {:<30} {:<30}'.format(service_name, from_node, to_node)) | |
print('-------------------------- Total services to move: {} --------------------------'.format(len(data))) | |
def system_command_with_timeout(command): | |
try: | |
os.system('timeout 45s {}'.format(command)) | |
except Exception as e: | |
print(e.message) | |
raise e | |
def move_services_to_control_nodes(cmapi, services_to_filter=None): | |
services_to_move = summary_of_moving_services(cmapi, print_results=False, services_to_filter=services_to_filter) | |
total_moves = len(services_to_move) | |
moves_counter = 0 | |
for control_service, node_migrations in services_to_move.iteritems(): | |
print("control services: {} node migrations:{}".format(control_service, node_migrations)) | |
for path in node_migrations: | |
to_node, from_node = path.values() | |
moves_counter += 1 | |
progress_bar(moves_counter, total_moves, prefix='Step {}'.format(moves_counter), | |
suffix='Moving service {} from {} to node {}'.format(control_service, from_node, to_node)) | |
try: | |
move_service_with_retry(control_service, from_node, to_node) | |
wait_for_service_to_be_healthy(control_service, to_node) | |
except: | |
print('Failed to move service {} from {} to node {}'.format(control_service, from_node, to_node)) | |
continue | |
print('Done moving {} services'.format(moves_counter)) | |
def wait_for_service_to_be_healthy(service, host): | |
host_no_suffix = host.replace(".node.strato", "") | |
waiting.wait( | |
lambda: is_service_healthy(service, host_no_suffix), | |
timeout_seconds=TIMEOUT_FOR_SERVICE_HEALTH_CHECK, | |
sleep_seconds=INTERVAL_FOR_SERVICE_HEALTH_CHECK, | |
waiting_for='Service {} to be healthy'.format(service), | |
) | |
def find_vms_on_control_nodes(vm_client): | |
all_vms = vm_client.list() | |
vms_on_control_nodes = [vm for vm in all_vms if vm["hostname"] in CONTROL_NODES] | |
if vms_on_control_nodes: | |
print('Found {} VMs on control nodes:'.format(len(vms_on_control_nodes))) | |
for vm in vms_on_control_nodes: | |
print('VM {} is on node {}'.format(vm["name"], vm["hostname"])) | |
else: | |
print('No VMs found on control nodes') | |
def run_validators(vm_client): | |
print('\n\n[] Validation that no VMs are found on a future control node') | |
find_vms_on_control_nodes(vm_client) | |
def get_services_to_node_mapping(cmapi, services_to_filter=None): | |
""" | |
Get a mapping of services to nodes | |
In the format: {service: [node1, node2, ...]} | |
""" | |
control_map = cmapi.registry.get('cluster/control_services_placement_map') | |
# print ("before filter control map: {}".format(control_map)) | |
filtered_services = {} | |
if services_to_filter: | |
for node, node_services in control_map.iteritems(): | |
filtered_services[node] = dict(filter(lambda item: item[0] in services_to_filter, node_services.items())) | |
control_map = filtered_services | |
flipped_control_map = flip_placement_map(control_map) | |
return flipped_control_map | |
def summary_of_moving_services(cmapi, print_results=True, services_to_filter=None): | |
flipped_control_map = get_services_to_node_mapping(cmapi, services_to_filter) | |
services_dict = {} | |
total_moves = 0 | |
services_to_move = services_to_filter or SERVICES_WITH_NODE_TYPE | |
print("services to move:{}".format(services_to_move)) | |
for control_service in services_to_move: | |
services_dict[control_service] = [] | |
current_service_nodes = flipped_control_map[control_service] | |
from_nodes = list(set(current_service_nodes) - (set(CONTROL_NODES))) # The nodes that are not control nodes do not need ot move | |
to_nodes = list(set(CONTROL_NODES) - (set(current_service_nodes))) # The nodes that already have the service cannot be the targets | |
print("Service: {} to move from nodes: {}".format(control_service, from_nodes)) | |
print("Service: {} to move to nodes: {}".format(control_service, to_nodes)) | |
if not from_nodes: | |
continue | |
for from_node in from_nodes: | |
to_node = to_nodes[total_moves % len(to_nodes)] | |
if from_node in CONTROL_NODES: | |
continue | |
if from_node == to_node: | |
continue | |
total_moves += 1 | |
services_dict[control_service].append({'from': str(from_node), 'to': to_node}) | |
remove_empty_values_in_dict(services_dict) | |
if print_results: | |
print_moving_services_table(services_dict) | |
return services_dict | |
def remove_empty_values_in_dict(dict_to_remove): | |
for key, value in dict_to_remove.items(): | |
if not value: | |
del dict_to_remove[key] | |
def get_none_compute_from_nodes(service_nodes, control_nodes): | |
return list(set(service_nodes) - (set(control_nodes))) | |
def consul_backup(): | |
file_name = '/cluster_consul_backup_{}.snap'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) | |
system_command_with_timeout('consul snapshot save {}'.format(file_name)) | |
print('Saved consul snapshot to {}'.format(file_name)) | |
def fix_rack_storage_monitor_issue(cmapi): | |
print('Deleting all ceph storage monitors from Consul') | |
cmapi.registry.delete('cluster/cephMonitors', recursive=True) | |
print('*** Now delete /var/lib/ceph/mon/ from the relevant nodes, and restart the service ***') | |
def run_compute_maintenance(): | |
for service in compute_services: | |
print('Putting service {} in maintenance mode'.format(service)) | |
system_command_with_timeout('inspector tools cm service-to-maintenance {}'.format(service)) | |
sleep(MAINTENANCE_MODE_COOLDOWN_TIME) | |
print('Done putting services in maintenance mode') | |
def run_compute_unmaintenance(): | |
for service in compute_services: | |
print('Putting service {} out of maintenance mode'.format(service)) | |
system_command_with_timeout('inspector tools cm service-from-maintenance {}'.format(service)) | |
sleep(MAINTENANCE_MODE_COOLDOWN_TIME) | |
print('Done putting services out of maintenance mode') | |
def print_mysql_info(): | |
print_service_hosts("mysql") | |
print("mysql master info:") | |
os.system("consul kv get cluster/service_master_node/mysql") | |
print("noded process PIDs:") | |
os.system(""" inspector exec 'pgrep -f "python -m strato.noded.main" -P 1' """) | |
def print_service_hosts(service_name): | |
print('Nodes running service {}:'.format(service_name)) | |
os.system('consul catalog nodes -service={}'.format(service_name)) | |
def wait_for_mysql_synced(node_name, timeout=30): | |
print("Waiting for mysql synced on node: %s" % node_name) | |
def _check_mysql_wsrep_status(): | |
command = '''inspector exec -n {} "mysql --skip-column-names -B -e \\"show global status like '%wsrep_local_state_comment%'\\""'''.format(node_name) | |
try: | |
value = subprocess.check_output([command], shell=True) | |
if "Synced" in str(value): | |
print("mysql is synced on %s" % node_name) | |
return True | |
print("mysql is not in sync on %s: %s" % node_name, value) | |
except Exception as ex: # pylint: disable=broad-except | |
print("Error in _wait_for_mysql_synced: %s" % ex) | |
raise | |
waiting.wait(_check_mysql_wsrep_status, | |
timeout_seconds=timeout, | |
sleep_seconds=10, | |
expected_exceptions=Exception) | |
return True | |
def does_node_have_mysql(node_name): | |
command = 'consul catalog nodes -service=mysql | grep {}'.format(node_name) | |
try: | |
res = subprocess.call([command], shell=True) | |
return res == 0 | |
except Exception as e: | |
return False | |
def is_mysql_fit_for_master(node_name): | |
node_have_mysql = does_node_have_mysql(node_name) | |
if not node_have_mysql: | |
return False | |
return wait_for_mysql_synced(node_name) | |
def set_new_mysql_master(cmapi, node, noded_pid): | |
if is_mysql_fit_for_master(node): | |
set_key_for_mysql_master(cmapi, node, noded_pid) | |
else: | |
msg = "Node {} is not fit for mysql master".format(node) | |
print(msg) | |
raise Exception(msg) | |
def set_key_for_mysql_master(consul_kv_client, node, noded_pid): | |
consul_kv_client.set( | |
'cluster/service_master_node/mysql', | |
{"hostname": "{}.node.strato".format(node), "pid": noded_pid}) | |
def increase_openstack_server_count(dry_run=False): | |
increase_compute_and_identity_server_count(dry_run=dry_run) | |
increase_neutron_server_count(dry_run=dry_run) | |
def increase_compute_and_identity_server_count(dry_run=False): | |
print("Increasing openstack server count according to worker alligner") | |
try: | |
compute_identity_worker_resizer_cmd = ( | |
"inspector tools compute all-workers-resizer --lcs {}".format("--dry-run" if dry_run else '') | |
) | |
output = subprocess.check_output(compute_identity_worker_resizer_cmd, shell=True) | |
except Exception as e: | |
print("Failed to run all-workers-resizer: %s" % e) | |
return | |
def increase_neutron_server_count(dry_run=False): | |
print("Increasing network server count according to worker alligner") | |
try: | |
neutron_worker_resizer_cmd = ( | |
"inspector tools network neutron worker-auto-align --lcs".format("--dry-run" if dry_run else '') | |
) | |
output = subprocess.check_output(neutron_worker_resizer_cmd, shell=True) | |
except Exception as e: | |
print("Failed to run all-workers-resizer: %s" % e) | |
return | |
def migrate_openstack_servers_to_future_control_nodes(cmapi): | |
print("Migrating openstack services to future control nodes") | |
move_services_to_control_nodes(cmapi=cmapi, services_to_filter=openstack_services) | |
def get_services_nodes(consul_kv_client, service_name): | |
nodes = consul_kv_client.get('cluster/services/{}/nodes'.format(service_name)) | |
return nodes | |
def set_rpc_agent_down_time(agent_down_time, force_restart=False, cmapi=None): | |
print('Setting Neutron RPC server agent down time to {}'.format(agent_down_time)) | |
system_command_with_timeout('crudini --set /etc/neutron/neutron.conf DEFAULT agent_down_time {}'.format(agent_down_time)) | |
system_command_with_timeout('inspector copy to-remotes --src-path /etc/neutron/neutron.conf') | |
if force_restart: | |
rolling_restart_for_service(cmapi, 'neutron-rpc-server') | |
print('Setting Nova RPC server agent down time to {}'.format(agent_down_time)) | |
system_command_with_timeout('crudini --set /etc/nova/nova.conf DEFAULT service_down_time {}'.format(agent_down_time)) | |
system_command_with_timeout('inspector copy to-remotes --src-path /etc/nova/nova.conf') | |
if force_restart: | |
rolling_restart_for_service(cmapi, 'openstack-nova-conductor') | |
def migreate_rabbit_to_control_node(cmapi, to_node): | |
current_rabbit_host = cmapi.registry.get('cluster/rabbit/leader')['hostname'] | |
if to_node in current_rabbit_host: | |
print("Rabbit is already on the requested node") | |
return | |
if not to_node.endswith('.node.strato'): | |
print('Invalid node name: {}'.format(to_node)) | |
return | |
print("Migrating rabbit to node: %s" % to_node) | |
cmapi.registry.set('cluster/rabbit/requested-node', to_node) | |
print("Waiting for rabbit to be migrated to node: %s" % to_node) | |
waiting.wait( | |
lambda: cmapi.registry.get('cluster/rabbit/leader')['hostname'] == to_node, | |
timeout_seconds=30, | |
sleep_seconds=5, | |
) | |
def rolling_restart_for_services(consul_kv_client, services): | |
print("Restarting services: %s", services) | |
for service in services: | |
rolling_restart_for_service(consul_kv_client=consul_kv_client, service=service) | |
def rolling_restart_for_openstack_servers(consul_kv_client): | |
rolling_restart_for_services(consul_kv_client=consul_kv_client, services=openstack_services) | |
def rolling_restart_for_service(consul_kv_client, service): | |
nodes_to_service_mapping = get_services_to_node_mapping(cmapi=consul_kv_client, services_to_filter=[service]) | |
current_service_hosts = nodes_to_service_mapping[service] | |
for host in current_service_hosts: | |
print("Restarting service: %s on host: %s" % (service, host)) | |
restart_service_on_host(service, host) | |
wait_for_service_to_be_healthy(service, host) | |
def restart_service_on_host(service, host): | |
command = "inspector exec -n {} \'systemctl restart {} \'".format(host, service) | |
print("Restarting service with command: {}".format(command)) | |
system_command_with_timeout(command) | |
def verify_agents_are_up(neutron_client, restart_unsynced_agents=False): | |
print("Verifying that all agents are synced") | |
unsynced_agents = get_list_of_unsynced_agents(neutron_client) | |
if unsynced_agents: | |
print("Retrying to get agent list, incase of any unsynced agents on first try") | |
unsynced_agents = get_list_of_unsynced_agents(neutron_client) | |
print("Unsynced agents: %s" % unsynced_agents) | |
if unsynced_agents and restart_unsynced_agents: | |
print("Unsynced agents found, restarting them") | |
restart_agent_services(unsynced_agents) | |
def restart_agent_services(agent_to_restart_list): | |
for agent in agent_to_restart_list: | |
print("Restarting agent: %s" % agent['id']) | |
restart_service_on_host(service=agent['binary'], host=agent['host']) | |
# wait_for_service_to_be_healthy(service=agent['binary'], host=agent['host']) | |
def get_list_of_unsynced_agents(neutron_client): | |
agent_list = neutron_client.list_agents()['agents'] | |
unsynced_agents = [] | |
most_graced_last_heartbeat_time = datetime.now() - timedelta(seconds=20) | |
for agent in agent_list: | |
agent_heartbeat_datetime = datetime.strptime(agent['heartbeat_timestamp'], '%Y-%m-%d %H:%M:%S') | |
print("last heartbeat: {}, agent heartbeats {}".format(most_graced_last_heartbeat_time, agent_heartbeat_datetime)) | |
if agent_heartbeat_datetime < most_graced_last_heartbeat_time: | |
print("Agent %s is down" % agent['id']) | |
unsynced_agents.append(agent) | |
return unsynced_agents | |
def migrate_openstack_and_rabbit_stack_all_in_one(cmapi, neutron_client, to_node): | |
set_rpc_agent_down_time(agent_down_time=900, force_restart=True) | |
increase_openstack_server_count() | |
migrate_openstack_servers_to_future_control_nodes(cmapi) | |
migreate_rabbit_to_control_node(cmapi, to_node) | |
rolling_restart_for_openstack_servers(cmapi) | |
verify_agents_are_up(neutron_client) | |
def main(): | |
cmapi = clustermanagementapi.ClusterManagementAPI() | |
vm_client = vm_client_module.Client(headers=credentials.get_internal_headers()).api.v2.compute.vms | |
token = admin_creds.get_credentials().token | |
neutron_client = neutron_client_module.Client( | |
'2.0', | |
token=token, | |
endpoint_url='http://neutron-server.service.strato:9696', | |
insecure=True | |
) | |
options = [ | |
'backup', 'validate', 'summary', 'move', | |
'fix-rack', 'compute-maint', 'compute-unmaint', | |
'print-mysql-info', 'set-new-mysql-master', | |
'migrate-openstack-rabbit', 'set-rpc-agent-down-time', | |
'increase-openstack-server-count', | |
'migrate-openstack-servers-to-future-control-nodes', | |
'migrate-rabbit-to-control-node', | |
'rolling-restart-for-openstack-servers', | |
'verify-agents-are-up', | |
'rolling-restart-for-services', | |
] | |
if len(sys.argv) == 1: | |
print('Choose the following flags: {}'.format(", ".join(options))) | |
return | |
flag = sys.argv[1] | |
# run consul backup | |
if flag == 'backup': | |
try: | |
consul_backup() | |
except Exception as e: | |
print('Error while trying to take a consul snapshot {} - try again!'.format(e.message)) | |
return | |
# run validators | |
if flag == 'validate': | |
try: | |
run_validators(vm_client) | |
except Exception as e: | |
print('Error while running validations {} - try again!'.format(e.message)) | |
return | |
# run a summary of services to move | |
if flag == 'summary': | |
try: | |
summary_of_moving_services(cmapi) | |
except Exception as e: | |
print('Error while trying get a summary of all moving services {} - try again!'.format(e.message)) | |
return | |
# run the actual move | |
if flag == 'move': | |
try: | |
move_services_to_control_nodes(cmapi) | |
except Exception as e: | |
print('Error while trying to move services {} - try again!'.format(e.message)) | |
return | |
# fix rack-storage-monitor issue | |
if flag == 'fix-rack': | |
print('Fixing rack storage monitor issue') | |
try: | |
fix_rack_storage_monitor_issue(cmapi) | |
except Exception as e: | |
print('Error while trying to fix rack-storage-monitor issue {} - try again!'.format(e.message)) | |
return | |
# run compute services maintenance | |
if flag == 'compute-maint': | |
try: | |
run_compute_maintenance() | |
except Exception as e: | |
print('Error while trying to run compute services maintenance {} - try again!'.format(e.message)) | |
return | |
# run compute services un-maintenance | |
if flag == 'compute-unmaint': | |
try: | |
run_compute_unmaintenance() | |
except Exception as e: | |
print('Error while trying to run compute services un-maintenance {} - try again!'.format(e.message)) | |
return | |
# set new mysql master | |
# usage: python move_services.py set-mysql-master <node> | |
# example: python move_services.py set-mysql-master stratonode2 | |
# this will set the mysql master to stratonode2 | |
if flag == 'set-new-mysql-master': | |
if len(sys.argv) != 4: | |
print('Usage: python move_services.py set-new-mysql-master <node> <noded_pid>') | |
return | |
try: | |
consul_kv_client = ConsulKeyValueStoreClient() | |
set_new_mysql_master(consul_kv_client, sys.argv[2], sys.argv[3]) | |
except Exception as e: | |
print('Error while trying to set new mysql master {} - try again!'.format(e.message)) | |
return | |
# print(mysql info) | |
if flag == 'print-mysql-info': | |
try: | |
print_mysql_info() | |
except Exception as e: | |
print('Error while trying to print(mysql info {} - try again!'.format(e.message)) | |
return | |
# Migrate Openstack and rabbit | |
if flag == 'migrate-openstack-rabbit': | |
if len(sys.argv) != 3: | |
print('Usage: python move_services.py migrate-openstack-rabbit <node>') | |
return | |
try: | |
migrate_openstack_and_rabbit_stack_all_in_one(cmapi, neutron_client, sys.argv[2]) | |
except Exception as e: | |
print('Error while trying to migrate openstack and rabbit {} - try again!'.format(e.message)) | |
return | |
# Set RPC agent downtime to a set time | |
if flag == 'set-rpc-agent-down-time': | |
force_restart = False | |
agent_down_time = 900 | |
if '--down-time' in sys.argv: | |
if sys.argv[2] == '--down-time': | |
print("Setting downtime:") | |
try: | |
if sys.argv[3].isdigit(): | |
agent_down_time = int(sys.argv[3]) | |
else: | |
print("Down time flag not set correctly,Usage: python move_services.py set-rpc-agent-down-time --down-time <down_time_int> --force-restart") | |
return | |
except Exception as e: | |
print("Down time flag not set correctly,Usage: python move_services.py set-rpc-agent-down-time --down-time <down_time_int> --force-restart ") | |
return | |
try: | |
if '--force-restart' in sys.argv: | |
if sys.argv[4] == '--force-restart': | |
force_restart = True | |
except Exception as e: | |
print("Force restart flag not set correctly, setting to False. Usage: python move_services.py set-rpc-agent-down-time --down-time <down_time_int> --force-restart") | |
force_restart = False | |
try: | |
set_rpc_agent_down_time(agent_down_time=agent_down_time, force_restart=force_restart, cmapi=cmapi) | |
except Exception as e: | |
print('Error while trying to set rpc agent down time {} - try again!'.format(e.message)) | |
return | |
#Increase opnstack server count | |
if flag == 'increase-openstack-server-count': | |
dry_run = False | |
if "--dry-run" in sys.argv: | |
if sys.argv[2] == "--dry-run": | |
print("Running dry run") | |
dry_run = True | |
else: | |
print("Dry run flag not set correctly, Usage: python move_services.py increase-openstack-server-count --dry-run") | |
return | |
try: | |
increase_openstack_server_count(dry_run=dry_run) | |
except Exception as e: | |
print('Error while trying to increase openstack server count {} - try again!'.format(e.message)) | |
return | |
#migrate_openstack_servers_to_future_control_nodes | |
if flag == 'migrate-openstack-servers-to-future-control-nodes': | |
try: | |
migrate_openstack_servers_to_future_control_nodes(cmapi) | |
except Exception as e: | |
print('Error while trying to migrate openstack servers to future control nodes {} - try again!'.format(e.message)) | |
return | |
#migreate_rabbit_to_control_node | |
if flag == 'migrate-rabbit-to-control-node': | |
if len(sys.argv) != 3: | |
print('Usage: python move_services.py migrate-rabbit-to-control-node <node>') | |
return | |
try: | |
migreate_rabbit_to_control_node(cmapi, sys.argv[2]) | |
except Exception as e: | |
print('Error while trying to migrate rabbit to control node {} - try again!'.format(e.message)) | |
return | |
#rolling_restart_for_openstack_servers | |
if flag == 'rolling-restart-for-openstack-servers': | |
try: | |
rolling_restart_for_openstack_servers(cmapi) | |
except Exception as e: | |
print('Error while trying to rolling restart for openstack servers {} - try again!'.format(e.message)) | |
return | |
# Rolling restart for any list of services | |
# usage: python move_services.py rolling-restart-for-services <service1> <service2> <service3> ... | |
# example: python move_services.py rolling-restart-for-services nova-api nova-scheduler nova-conductor | |
# note: the services must be in the list of services that are supported by the rolling restart function | |
# if the service is not supported, the function will print an error message and exit | |
if flag == 'rolling-restart-for-services': | |
if len(sys.argv) < 3: | |
print('Usage: python move_services.py rolling-restart-for-services <service1> <service2> <service3> ...') | |
return | |
try: | |
rolling_restart_for_services(cmapi, sys.argv[2:]) | |
except Exception as e: | |
print('Error while trying to rolling restart for services {} - try again!'.format(e.message)) | |
return | |
#verify_agents_are_up | |
if flag == 'verify-agents-are-up': | |
restart_unsynced_agents = False | |
if "--restart-unsynced-agents" in sys.argv: | |
if sys.argv[2] == "--restart-unsynced-agents": | |
print("Restarting unsynced agents") | |
restart_unsynced_agents = True | |
else: | |
print("Restart unsynced agents flag not set correctly, Usage: python move_services.py verify-agents-are-up --restart-unsynced-agents") | |
return | |
try: | |
verify_agents_are_up(neutron_client, restart_unsynced_agents) | |
except Exception as e: | |
print('Error while trying to verify agents are up {} - try again!'.format(e.message)) | |
return | |
if flag not in options: | |
print('Choose the following flags: {}'.format(", ".join(options))) | |
return | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@grishatsuker
Just setting the key for mysql master node is enough? don't we need to also release the lock beforehand?