ajkavanagh/p-r-a-charm.py

## p-r-a-charm.py
# FIRST make sure you find all of the code that might call service_start(), service_restart() or service_reload()
# I use:
# ack "service_(restart|start|reload)" --ignore-dir=charmhelpers --ignore-dir=trusty --ignore-dir=unit_tests --ignore-dir=.tox

# This will provide a list of files/locations that definitely need a "if not is_unit_paused_set():" type stanze.

# 0. Sync charm-helpers to the latest version - it includes the code thats needed for
#    maintenance mode (pause/resume).

# 1. Change the charm’s xxx_utils.py file (in lib/ or hooks/)
#  - Remove is_paused() if it exists.
#  - Change/add assess_status()
#  - Add assess_status_func() -- leave off ports=... as we don’t want to check the ports due to a race hazard.
#  - Add pause_unit_helper()
#  - Add resume_unit_helper()
#  - Add _pause_resume_helper() -- and leave off ports=... as we don’t want to check the ports.
#  - tie in the use of assess_status() to the main hooks function, and anywhere else it is needed.

# Repeat: REMEMBER to tie in the assess_status() function into the hooks! - i.e. see how ceilometer or keystone does it.
# e.g. add it into the main() function after executing the hook.

# In xxx_utils.py:

def assess_status(configs):
    """Assess status of current unit

    Decides what the state of the unit should be based on the current
    configuration.

    SIDE EFFECT: calls set_os_workload_status(...) which sets the workload
    status of the unit.
    Also calls status_set(...) directly if paused state isn't complete.

    @param configs: a templating.OSConfigRenderer() object
    @returns None - this function is executed for its side-effect
    """
    assess_status_func(configs)()


def assess_status_func(configs):
    """Helper function to create the function that will assess_status() for
    the unit.
    Uses charmhelpers.contrib.openstack.utils.make_assess_status_func() to
    create the appropriate status function and then returns it.
    Used directly by assess_status() and also for pausing and resuming
    the unit.

    NOTE(ajkavanagh) ports are not checked due to race hazards with services
    that don't behave sychronously w.r.t their service scripts.  e.g.
    apache2.

    @param configs: a templating.OSConfigRenderer() object
    @return f() -> None : a function that assesses the unit's workload status
    """
    return make_assess_status_func(
        configs, REQUIRED_INTERFACES,
        charm_func=check_optional_relations,
        services=services(), ports=None)


def pause_unit_helper(configs):
    """Helper function to pause a unit, and then call assess_status(...) in
    effect, so that the status is correctly updated.
    Uses charmhelpers.contrib.openstack.utils.pause_unit() to do the work.

    @param configs: a templating.OSConfigRenderer() object
    @returns None - this function is executed for its side-effect
    """
    _pause_resume_helper(pause_unit, configs)

def resume_unit_helper(configs):
    """Helper function to resume a unit, and then call assess_status(...) in
    effect, so that the status is correctly updated.
    Uses charmhelpers.contrib.openstack.utils.resume_unit() to do the work.

    @param configs: a templating.OSConfigRenderer() object
    @returns None - this function is executed for its side-effect
    """
    _pause_resume_helper(resume_unit, configs)


def _pause_resume_helper(f, configs):
    """Helper function that uses the make_assess_status_func(...) from
    charmhelpers.contrib.openstack.utils to create an assess_status(...)
    function that can be used with the pause/resume of the unit

    @param f: the function to be used with the assess_status(...) function
    @returns None - this function is executed for its side-effect
    """
    # TODO(ajkavanagh) - ports= has been left off because of the race hazard
    # that exists due to service_start()
    f(assess_status_func(configs),
      services=services(),
      ports=None)

# 2. Add tests to test_xxx_utils.py unit test:
#  - Remove any test against is_paused().
#  - Change test_assess_status(self):  if it exists.
#  - Add test_assess_status_func(self)
#  - Add test_pause_unit_helper(self)
#  - Add test_pause_resume_helper(self)
#  - You may have to tweak the imports/etc to get lint/tests to pass.

class TestSomeThing(CharmTestCase):

    # ...

    def test_assess_status(self):
        with patch.object(utils, 'assess_status_func') as asf:
            callee = MagicMock()
            asf.return_value = callee
            utils.assess_status('test-config')
            asf.assert_called_once_with('test-config')
            callee.assert_called_once_with()

    @patch.object(utils, 'REQUIRED_INTERFACES')
    @patch.object(utils, 'services')
    @patch.object(utils, 'determine_ports')
    @patch.object(utils, 'make_assess_status_func')
    def test_assess_status_func(self,
                                make_assess_status_func,
                                determine_ports,
                                services,
                                REQUIRED_INTERFACES):
        services.return_value = 's1'
        determine_ports.return_value = 'p1'
        utils.assess_status_func('test-config')
        # ports=None whilst port checks are disabled.
        make_assess_status_func.assert_called_once_with(
            'test-config', REQUIRED_INTERFACES, services='s1', ports=None)

    def test_pause_unit_helper(self):
        with patch.object(utils, '_pause_resume_helper') as prh:
            utils.pause_unit_helper('random-config')
            prh.assert_called_once_with(utils.pause_unit, 'random-config')
        with patch.object(utils, '_pause_resume_helper') as prh:
            utils.resume_unit_helper('random-config')
            prh.assert_called_once_with(utils.resume_unit, 'random-config')

    @patch.object(utils, 'services')
    @patch.object(utils, 'determine_ports')
    def test_pause_resume_helper(self, determine_ports, services):
        f = MagicMock()
        services.return_value = 's1'
        determine_ports.return_value = 'p1'
        with patch.object(utils, 'assess_status_func') as asf:
            asf.return_value = 'assessor'
            utils._pause_resume_helper(f, 'some-config')
            asf.assert_called_once_with('some-config')
            # ports=None whilst port checks are disabled.
            f.assert_called_once_with('assessor', services='s1', ports=None)


# 3.1 Add/edit an actions/actions.py to incorporate pause/resume:
#
# Generally actions/actions.py is where we’re putting pause and resume actions.
# Thus if the file doesn’t exist, create it and symlink pause and resume to
# actions.py.  Then make the file look like:

#!/usr/bin/python

import os
import sys

from charmhelpers.core.hookenv import action_fail
from xxx_utils import (
    pause_unit_helper,
    resume_unit_helper,
    register_configs,
)


def pause(args):
    """Pause the Ceilometer services.

    @raises Exception should the service fail to stop.
    """
    pause_unit_helper(register_configs())


def resume(args):
    """Resume the Ceilometer services.

    @raises Exception should the service fail to start."""
    resume_unit_helper(register_configs())


# A dictionary of all the defined actions to callables (which take
# parsed arguments).
ACTIONS = {"pause": pause, "resume": resume}


def main(args):
    action_name = os.path.basename(args[0])
    try:
        action = ACTIONS[action_name]
    except KeyError:
        s = "Action {} undefined".format(action_name)
        action_fail(s)
        return s
    else:
        try:
            action(args)
        except Exception as e:
            action_fail("Action {} failed: {}".format(action_name, str(e)))


if __name__ == "__main__":
    sys.exit(main(sys.argv))


# 3.2 Fix up/add the unit_tests/test_actions.py file to look like this (at least for the pause/resume
#     actions.  If the file doesn't exist then it needs to look something like this:

import mock
from mock import patch

from test_utils import CharmTestCase

with patch('xxx_utils.register_configs') as configs:
    configs.return_value = 'test-config'
    import actions


class PauseTestCase(CharmTestCase):

    def setUp(self):
        super(PauseTestCase, self).setUp(
            actions, ["pause_unit_helper"])

    def test_pauses_services(self):
        actions.pause([])
        self.pause_unit_helper.assert_called_once_with('test-config')


class ResumeTestCase(CharmTestCase):

    def setUp(self):
        super(ResumeTestCase, self).setUp(
            actions, ["resume_unit_helper"])

    def test_pauses_services(self):
        actions.resume([])
        self.resume_unit_helper.assert_called_once_with('test-config')

class MainTestCase(CharmTestCase):

    def setUp(self):
        super(MainTestCase, self).setUp(actions, ["action_fail"])

    def test_invokes_action(self):
        dummy_calls = []

        def dummy_action(args):
            dummy_calls.append(True)

        with mock.patch.dict(actions.ACTIONS, {"foo": dummy_action}):
            actions.main(["foo"])
        self.assertEqual(dummy_calls, [True])

    def test_unknown_action(self):
        """Unknown actions aren't a traceback."""
        exit_string = actions.main(["foo"])
        self.assertEqual("Action foo undefined", exit_string)

    def test_failing_action(self):
        """Actions which traceback trigger action_fail() calls."""
        dummy_calls = []

        self.action_fail.side_effect = dummy_calls.append

        def dummy_action(args):
            raise ValueError("uh oh")

        with mock.patch.dict(actions.ACTIONS, {"foo": dummy_action}):
            actions.main(["foo"])
        self.assertEqual(dummy_calls, ["uh oh"])


# 3.3 Fix up/add the actions.yaml file to include:

pause:
  description: Pause the xxx unit.  This action will stop xxx services.
resume:
  descrpition: Resume the xxx unit.  This action will start xxx services.

# 3.4 Add amulet tests to verify that pause and resume work.  The tests/basic_deployment.py needs test 910
# added (or ensure that it is there) for the pause and resume to be tested:

    def _run_action(self, unit_id, action, *args):
        command = ["juju", "action", "do", "--format=json", unit_id, action]
        command.extend(args)
        print("Running command: %s\n" % " ".join(command))
        output = subprocess.check_output(command)
        output_json = output.decode(encoding="UTF-8")
        data = json.loads(output_json)
        action_id = data[u'Action queued with id']
        return action_id

    def _wait_on_action(self, action_id):
        command = ["juju", "action", "fetch", "--format=json", action_id]
        while True:
            try:
                output = subprocess.check_output(command)
            except Exception as e:
                print(e)
                return False
            output_json = output.decode(encoding="UTF-8")
            data = json.loads(output_json)
            if data[u"status"] == "completed":
                return True
            elif data[u"status"] == "failed":
                return False
            time.sleep(2)

    def test_910_pause_and_resume(self):
        """The services can be paused and resumed. """
        u.log.debug('Checking pause and resume actions...')
        unit_name = "ceilometer/0"
        unit = self.d.sentry.unit[unit_name]

        assert u.status_get(unit)[0] == "active"

        action_id = self._run_action(unit_name, "pause")
        assert self._wait_on_action(action_id), "Pause action failed."
        assert u.status_get(unit)[0] == "maintenance"

        action_id = self._run_action(unit_name, "resume")
        assert self._wait_on_action(action_id), "Resume action failed."
        assert u.status_get(unit)[0] == "active"
        u.log.debug('OK')


# 4. If xxx_utils.py contains a git_post_install() then the service_restart() needs to be gated against
#    is_unit_paused_set() from charmhelpers.

from charmhelpers.contrib.openstack.utils import (
    ...
    is_unit_paused_set,
    ...
)

def git_post_install(projects_yaml):
    """Perform keystone post-install setup."""

    ...
# Don't restart if the unit is supposed to be paused.
    if not is_unit_paused_set():
        service_restart('keystone')

# This is true for any other actions that might cause a service to be started or restarted.

# 5. in the hooks file (hooks/xxx_hooks.py) replace the 'restart_on_change' import with:

from charmhelpers.contrib.openstack.utils import (
    ...
    pausable_restart_on_change as restart_on_change,
    ...
)

# 6. run tests, fix import problems, run lint, run functional tests & make tea.

# DONE!
	# FIRST make sure you find all of the code that might call service_start(), service_restart() or service_reload()
	# I use:
	# ack "service_(restart\|start\|reload)" --ignore-dir=charmhelpers --ignore-dir=trusty --ignore-dir=unit_tests --ignore-dir=.tox

	# This will provide a list of files/locations that definitely need a "if not is_unit_paused_set():" type stanze.

	# 0. Sync charm-helpers to the latest version - it includes the code thats needed for
	# maintenance mode (pause/resume).

	# 1. Change the charm’s xxx_utils.py file (in lib/ or hooks/)
	# - Remove is_paused() if it exists.
	# - Change/add assess_status()
	# - Add assess_status_func() -- leave off ports=... as we don’t want to check the ports due to a race hazard.
	# - Add pause_unit_helper()
	# - Add resume_unit_helper()
	# - Add _pause_resume_helper() -- and leave off ports=... as we don’t want to check the ports.
	# - tie in the use of assess_status() to the main hooks function, and anywhere else it is needed.

	# Repeat: REMEMBER to tie in the assess_status() function into the hooks! - i.e. see how ceilometer or keystone does it.
	# e.g. add it into the main() function after executing the hook.

	# In xxx_utils.py:

	def assess_status(configs):
	"""Assess status of current unit

	Decides what the state of the unit should be based on the current
	configuration.

	SIDE EFFECT: calls set_os_workload_status(...) which sets the workload
	status of the unit.
	Also calls status_set(...) directly if paused state isn't complete.

	@param configs: a templating.OSConfigRenderer() object
	@returns None - this function is executed for its side-effect
	"""
	assess_status_func(configs)()


	def assess_status_func(configs):
	"""Helper function to create the function that will assess_status() for
	the unit.
	Uses charmhelpers.contrib.openstack.utils.make_assess_status_func() to
	create the appropriate status function and then returns it.
	Used directly by assess_status() and also for pausing and resuming
	the unit.

	NOTE(ajkavanagh) ports are not checked due to race hazards with services
	that don't behave sychronously w.r.t their service scripts. e.g.
	apache2.

	@param configs: a templating.OSConfigRenderer() object
	@return f() -> None : a function that assesses the unit's workload status
	"""
	return make_assess_status_func(
	configs, REQUIRED_INTERFACES,
	charm_func=check_optional_relations,
	services=services(), ports=None)


	def pause_unit_helper(configs):
	"""Helper function to pause a unit, and then call assess_status(...) in
	effect, so that the status is correctly updated.
	Uses charmhelpers.contrib.openstack.utils.pause_unit() to do the work.

	@param configs: a templating.OSConfigRenderer() object
	@returns None - this function is executed for its side-effect
	"""
	_pause_resume_helper(pause_unit, configs)

	def resume_unit_helper(configs):
	"""Helper function to resume a unit, and then call assess_status(...) in
	effect, so that the status is correctly updated.
	Uses charmhelpers.contrib.openstack.utils.resume_unit() to do the work.

	@param configs: a templating.OSConfigRenderer() object
	@returns None - this function is executed for its side-effect
	"""
	_pause_resume_helper(resume_unit, configs)


	def _pause_resume_helper(f, configs):
	"""Helper function that uses the make_assess_status_func(...) from
	charmhelpers.contrib.openstack.utils to create an assess_status(...)
	function that can be used with the pause/resume of the unit

	@param f: the function to be used with the assess_status(...) function
	@returns None - this function is executed for its side-effect
	"""
	# TODO(ajkavanagh) - ports= has been left off because of the race hazard
	# that exists due to service_start()
	f(assess_status_func(configs),
	services=services(),
	ports=None)

	# 2. Add tests to test_xxx_utils.py unit test:
	# - Remove any test against is_paused().
	# - Change test_assess_status(self): if it exists.
	# - Add test_assess_status_func(self)
	# - Add test_pause_unit_helper(self)
	# - Add test_pause_resume_helper(self)
	# - You may have to tweak the imports/etc to get lint/tests to pass.

	class TestSomeThing(CharmTestCase):

	# ...

	def test_assess_status(self):
	with patch.object(utils, 'assess_status_func') as asf:
	callee = MagicMock()
	asf.return_value = callee
	utils.assess_status('test-config')
	asf.assert_called_once_with('test-config')
	callee.assert_called_once_with()

	@patch.object(utils, 'REQUIRED_INTERFACES')
	@patch.object(utils, 'services')
	@patch.object(utils, 'determine_ports')
	@patch.object(utils, 'make_assess_status_func')
	def test_assess_status_func(self,
	make_assess_status_func,
	determine_ports,
	services,
	REQUIRED_INTERFACES):
	services.return_value = 's1'
	determine_ports.return_value = 'p1'
	utils.assess_status_func('test-config')
	# ports=None whilst port checks are disabled.
	make_assess_status_func.assert_called_once_with(
	'test-config', REQUIRED_INTERFACES, services='s1', ports=None)

	def test_pause_unit_helper(self):
	with patch.object(utils, '_pause_resume_helper') as prh:
	utils.pause_unit_helper('random-config')
	prh.assert_called_once_with(utils.pause_unit, 'random-config')
	with patch.object(utils, '_pause_resume_helper') as prh:
	utils.resume_unit_helper('random-config')
	prh.assert_called_once_with(utils.resume_unit, 'random-config')

	@patch.object(utils, 'services')
	@patch.object(utils, 'determine_ports')
	def test_pause_resume_helper(self, determine_ports, services):
	f = MagicMock()
	services.return_value = 's1'
	determine_ports.return_value = 'p1'
	with patch.object(utils, 'assess_status_func') as asf:
	asf.return_value = 'assessor'
	utils._pause_resume_helper(f, 'some-config')
	asf.assert_called_once_with('some-config')
	# ports=None whilst port checks are disabled.
	f.assert_called_once_with('assessor', services='s1', ports=None)


	# 3.1 Add/edit an actions/actions.py to incorporate pause/resume:
	#
	# Generally actions/actions.py is where we’re putting pause and resume actions.
	# Thus if the file doesn’t exist, create it and symlink pause and resume to
	# actions.py. Then make the file look like:

	#!/usr/bin/python

	import os
	import sys

	from charmhelpers.core.hookenv import action_fail
	from xxx_utils import (
	pause_unit_helper,
	resume_unit_helper,
	register_configs,
	)


	def pause(args):
	"""Pause the Ceilometer services.

	@raises Exception should the service fail to stop.
	"""
	pause_unit_helper(register_configs())


	def resume(args):
	"""Resume the Ceilometer services.

	@raises Exception should the service fail to start."""
	resume_unit_helper(register_configs())


	# A dictionary of all the defined actions to callables (which take
	# parsed arguments).
	ACTIONS = {"pause": pause, "resume": resume}


	def main(args):
	action_name = os.path.basename(args[0])
	try:
	action = ACTIONS[action_name]
	except KeyError:
	s = "Action {} undefined".format(action_name)
	action_fail(s)
	return s
	else:
	try:
	action(args)
	except Exception as e:
	action_fail("Action {} failed: {}".format(action_name, str(e)))


	if __name__ == "__main__":
	sys.exit(main(sys.argv))


	# 3.2 Fix up/add the unit_tests/test_actions.py file to look like this (at least for the pause/resume
	# actions. If the file doesn't exist then it needs to look something like this:

	import mock
	from mock import patch

	from test_utils import CharmTestCase

	with patch('xxx_utils.register_configs') as configs:
	configs.return_value = 'test-config'
	import actions


	class PauseTestCase(CharmTestCase):

	def setUp(self):
	super(PauseTestCase, self).setUp(
	actions, ["pause_unit_helper"])

	def test_pauses_services(self):
	actions.pause([])
	self.pause_unit_helper.assert_called_once_with('test-config')


	class ResumeTestCase(CharmTestCase):

	def setUp(self):
	super(ResumeTestCase, self).setUp(
	actions, ["resume_unit_helper"])

	def test_pauses_services(self):
	actions.resume([])
	self.resume_unit_helper.assert_called_once_with('test-config')

	class MainTestCase(CharmTestCase):

	def setUp(self):
	super(MainTestCase, self).setUp(actions, ["action_fail"])

	def test_invokes_action(self):
	dummy_calls = []

	def dummy_action(args):
	dummy_calls.append(True)

	with mock.patch.dict(actions.ACTIONS, {"foo": dummy_action}):
	actions.main(["foo"])
	self.assertEqual(dummy_calls, [True])

	def test_unknown_action(self):
	"""Unknown actions aren't a traceback."""
	exit_string = actions.main(["foo"])
	self.assertEqual("Action foo undefined", exit_string)

	def test_failing_action(self):
	"""Actions which traceback trigger action_fail() calls."""
	dummy_calls = []

	self.action_fail.side_effect = dummy_calls.append

	def dummy_action(args):
	raise ValueError("uh oh")

	with mock.patch.dict(actions.ACTIONS, {"foo": dummy_action}):
	actions.main(["foo"])
	self.assertEqual(dummy_calls, ["uh oh"])


	# 3.3 Fix up/add the actions.yaml file to include:

	pause:
	description: Pause the xxx unit. This action will stop xxx services.
	resume:
	descrpition: Resume the xxx unit. This action will start xxx services.

	# 3.4 Add amulet tests to verify that pause and resume work. The tests/basic_deployment.py needs test 910
	# added (or ensure that it is there) for the pause and resume to be tested:

	def _run_action(self, unit_id, action, *args):
	command = ["juju", "action", "do", "--format=json", unit_id, action]
	command.extend(args)
	print("Running command: %s\n" % " ".join(command))
	output = subprocess.check_output(command)
	output_json = output.decode(encoding="UTF-8")
	data = json.loads(output_json)
	action_id = data[u'Action queued with id']
	return action_id

	def _wait_on_action(self, action_id):
	command = ["juju", "action", "fetch", "--format=json", action_id]
	while True:
	try:
	output = subprocess.check_output(command)
	except Exception as e:
	print(e)
	return False
	output_json = output.decode(encoding="UTF-8")
	data = json.loads(output_json)
	if data[u"status"] == "completed":
	return True
	elif data[u"status"] == "failed":
	return False
	time.sleep(2)

	def test_910_pause_and_resume(self):
	"""The services can be paused and resumed. """
	u.log.debug('Checking pause and resume actions...')
	unit_name = "ceilometer/0"
	unit = self.d.sentry.unit[unit_name]

	assert u.status_get(unit)[0] == "active"

	action_id = self._run_action(unit_name, "pause")
	assert self._wait_on_action(action_id), "Pause action failed."
	assert u.status_get(unit)[0] == "maintenance"

	action_id = self._run_action(unit_name, "resume")
	assert self._wait_on_action(action_id), "Resume action failed."
	assert u.status_get(unit)[0] == "active"
	u.log.debug('OK')


	# 4. If xxx_utils.py contains a git_post_install() then the service_restart() needs to be gated against
	# is_unit_paused_set() from charmhelpers.

	from charmhelpers.contrib.openstack.utils import (
	...
	is_unit_paused_set,
	...
	)

	def git_post_install(projects_yaml):
	"""Perform keystone post-install setup."""

	...
	# Don't restart if the unit is supposed to be paused.
	if not is_unit_paused_set():
	service_restart('keystone')

	# This is true for any other actions that might cause a service to be started or restarted.

	# 5. in the hooks file (hooks/xxx_hooks.py) replace the 'restart_on_change' import with:

	from charmhelpers.contrib.openstack.utils import (
	...
	pausable_restart_on_change as restart_on_change,
	...
	)

	# 6. run tests, fix import problems, run lint, run functional tests & make tea.

	# DONE!