summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker
diff options
context:
space:
mode:
authorOpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com>2017-09-22 21:02:35 -0700
committerGitHub <noreply@github.com>2017-09-22 21:02:35 -0700
commite1650a2e1da3b1a365a8e4f021d1c55fe4ffc72d (patch)
treea286bae582ca912e1b7badd1052f5d2ad52ec91e /roles/openshift_health_checker
parentd407d4fc3cb1d76d7fef52088e2459a420952c16 (diff)
parent9698f76b641b7cc5964d2f4f318ea71702aa2245 (diff)
downloadopenshift-e1650a2e1da3b1a365a8e4f021d1c55fe4ffc72d.tar.gz
openshift-e1650a2e1da3b1a365a8e4f021d1c55fe4ffc72d.tar.bz2
openshift-e1650a2e1da3b1a365a8e4f021d1c55fe4ffc72d.tar.xz
openshift-e1650a2e1da3b1a365a8e4f021d1c55fe4ffc72d.zip
Merge pull request #5491 from sosiouxme/20170920-diagnostics-check
Automatic merge from submit-queue health checks: add diagnostics check Adds a health check that runs `oc adm diagnostics` with each individual diagnostic. Also, moved `is_first_master` method into superclass for reuse. And look at `oo_first_master` and `ansible_host` instead of `masters` and `ansible_ssh_host`.
Diffstat (limited to 'roles/openshift_health_checker')
-rw-r--r--roles/openshift_health_checker/library/ocutil.py11
-rw-r--r--roles/openshift_health_checker/openshift_checks/__init__.py17
-rw-r--r--roles/openshift_health_checker/openshift_checks/diagnostics.py62
-rw-r--r--roles/openshift_health_checker/openshift_checks/etcd_volume.py2
-rw-r--r--roles/openshift_health_checker/openshift_checks/logging/logging.py8
-rw-r--r--roles/openshift_health_checker/test/diagnostics_test.py50
-rw-r--r--roles/openshift_health_checker/test/logging_check_test.py8
7 files changed, 138 insertions, 20 deletions
diff --git a/roles/openshift_health_checker/library/ocutil.py b/roles/openshift_health_checker/library/ocutil.py
index 2e60735d6..c72f4c5b3 100644
--- a/roles/openshift_health_checker/library/ocutil.py
+++ b/roles/openshift_health_checker/library/ocutil.py
@@ -40,18 +40,17 @@ def main():
module = AnsibleModule(
argument_spec=dict(
- namespace=dict(type="str", required=True),
+ namespace=dict(type="str", required=False),
config_file=dict(type="str", required=True),
cmd=dict(type="str", required=True),
extra_args=dict(type="list", default=[]),
),
)
- cmd = [
- locate_oc_binary(),
- '--config', module.params["config_file"],
- '-n', module.params["namespace"],
- ] + shlex.split(module.params["cmd"])
+ cmd = [locate_oc_binary(), '--config', module.params["config_file"]]
+ if module.params["namespace"]:
+ cmd += ['-n', module.params["namespace"]]
+ cmd += shlex.split(module.params["cmd"]) + module.params["extra_args"]
failed = True
try:
diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py
index 28cb53cc5..ce05b44a4 100644
--- a/roles/openshift_health_checker/openshift_checks/__init__.py
+++ b/roles/openshift_health_checker/openshift_checks/__init__.py
@@ -13,6 +13,7 @@ from importlib import import_module
from ansible.module_utils import six
from ansible.module_utils.six.moves import reduce # pylint: disable=import-error,redefined-builtin
+from ansible.module_utils.six import string_types
from ansible.plugins.filter.core import to_bool as ansible_to_bool
@@ -110,6 +111,11 @@ class OpenShiftCheck(object):
"""Returns true if this check applies to the ansible-playbook run."""
return True
+ def is_first_master(self):
+ """Determine if running on first master. Returns: bool"""
+ masters = self.get_var("groups", "oo_first_master", default=None) or [None]
+ return masters[0] == self.get_var("ansible_host")
+
@abstractmethod
def run(self):
"""Executes a check against a host and returns a result hash similar to Ansible modules.
@@ -283,6 +289,17 @@ class OpenShiftCheck(object):
))
@staticmethod
+ def normalize(name_list):
+ """Return a clean list of names.
+
+ The input may be a comma-separated string or a sequence. Leading and
+ trailing whitespace characters are removed. Empty items are discarded.
+ """
+ if isinstance(name_list, string_types):
+ name_list = name_list.split(',')
+ return [name.strip() for name in name_list if name.strip()]
+
+ @staticmethod
def get_major_minor_version(openshift_image_tag):
"""Parse and return the deployed version of OpenShift as a tuple."""
if openshift_image_tag and openshift_image_tag[0] == 'v':
diff --git a/roles/openshift_health_checker/openshift_checks/diagnostics.py b/roles/openshift_health_checker/openshift_checks/diagnostics.py
new file mode 100644
index 000000000..1cfdc1129
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/diagnostics.py
@@ -0,0 +1,62 @@
+"""
+A check to run relevant diagnostics via `oc adm diagnostics`.
+"""
+
+import os
+
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException
+
+
+DIAGNOSTIC_LIST = (
+ "AggregatedLogging ClusterRegistry ClusterRoleBindings ClusterRoles "
+ "ClusterRouter DiagnosticPod NetworkCheck"
+).split()
+
+
+class DiagnosticCheck(OpenShiftCheck):
+ """A check to run relevant diagnostics via `oc adm diagnostics`."""
+
+ name = "diagnostics"
+ tags = ["health"]
+
+ def is_active(self):
+ return super(DiagnosticCheck, self).is_active() and self.is_first_master()
+
+ def run(self):
+ if self.exec_diagnostic("ConfigContexts"):
+ # only run the other diagnostics if that one succeeds (otherwise, all will fail)
+ diagnostics = self.get_var("openshift_check_diagnostics", default=DIAGNOSTIC_LIST)
+ for diagnostic in self.normalize(diagnostics):
+ self.exec_diagnostic(diagnostic)
+ return {}
+
+ def exec_diagnostic(self, diagnostic):
+ """
+ Execute an 'oc adm diagnostics' command on the remote host.
+ Raises OcNotFound or registers OcDiagFailed.
+ Returns True on success or False on failure (non-zero rc).
+ """
+ config_base = self.get_var("openshift.common.config_base")
+ args = {
+ "config_file": os.path.join(config_base, "master", "admin.kubeconfig"),
+ "cmd": "adm diagnostics",
+ "extra_args": [diagnostic],
+ }
+
+ result = self.execute_module("ocutil", args, save_as_name=diagnostic + ".failure.json")
+ self.register_file(diagnostic + ".txt", result['result'])
+ if result.get("failed"):
+ if result['result'] == '[Errno 2] No such file or directory':
+ raise OpenShiftCheckException(
+ "OcNotFound",
+ "This host is supposed to be a master but does not have the `oc` command where expected.\n"
+ "Has an installation been run on this host yet?"
+ )
+
+ self.register_failure(OpenShiftCheckException(
+ 'OcDiagFailed',
+ 'The {diag} diagnostic reported an error:\n'
+ '{error}'.format(diag=diagnostic, error=result['result'])
+ ))
+ return False
+ return True
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py
index e5d93ff3f..79955cb2f 100644
--- a/roles/openshift_health_checker/openshift_checks/etcd_volume.py
+++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py
@@ -16,7 +16,7 @@ class EtcdVolume(OpenShiftCheck):
def is_active(self):
etcd_hosts = self.get_var("groups", "etcd", default=[]) or self.get_var("groups", "masters", default=[]) or []
- is_etcd_host = self.get_var("ansible_ssh_host") in etcd_hosts
+ is_etcd_host = self.get_var("ansible_host") in etcd_hosts
return super(EtcdVolume, self).is_active() and is_etcd_host
def run(self):
diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py
index 06bdfebf6..05ba73ca1 100644
--- a/roles/openshift_health_checker/openshift_checks/logging/logging.py
+++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py
@@ -30,14 +30,6 @@ class LoggingCheck(OpenShiftCheck):
logging_deployed = self.get_var("openshift_hosted_logging_deploy", convert=bool, default=False)
return logging_deployed and super(LoggingCheck, self).is_active() and self.is_first_master()
- def is_first_master(self):
- """Determine if running on first master. Returns: bool"""
- # Note: It would be nice to use membership in oo_first_master group, however for now it
- # seems best to avoid requiring that setup and just check this is the first master.
- hostname = self.get_var("ansible_ssh_host") or [None]
- masters = self.get_var("groups", "masters", default=None) or [None]
- return masters[0] == hostname
-
def run(self):
return {}
diff --git a/roles/openshift_health_checker/test/diagnostics_test.py b/roles/openshift_health_checker/test/diagnostics_test.py
new file mode 100644
index 000000000..800889fa7
--- /dev/null
+++ b/roles/openshift_health_checker/test/diagnostics_test.py
@@ -0,0 +1,50 @@
+import pytest
+
+from openshift_checks.diagnostics import DiagnosticCheck, OpenShiftCheckException
+
+
+@pytest.fixture()
+def task_vars():
+ return dict(
+ openshift=dict(
+ common=dict(config_base="/etc/origin/")
+ )
+ )
+
+
+def test_module_succeeds(task_vars):
+ check = DiagnosticCheck(lambda *_: {"result": "success"}, task_vars)
+ check.is_first_master = lambda: True
+ assert check.is_active()
+ check.exec_diagnostic("spam")
+ assert not check.failures
+
+
+def test_oc_not_there(task_vars):
+ def exec_module(*_):
+ return {"failed": True, "result": "[Errno 2] No such file or directory"}
+
+ check = DiagnosticCheck(exec_module, task_vars)
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.exec_diagnostic("spam")
+ assert excinfo.value.name == "OcNotFound"
+
+
+def test_module_fails(task_vars):
+ def exec_module(*_):
+ return {"failed": True, "result": "something broke"}
+
+ check = DiagnosticCheck(exec_module, task_vars)
+ check.exec_diagnostic("spam")
+ assert check.failures and check.failures[0].name == "OcDiagFailed"
+
+
+def test_names_executed(task_vars):
+ task_vars["openshift_check_diagnostics"] = diagnostics = "ConfigContexts,spam,,eggs"
+
+ def exec_module(module, args, *_):
+ assert "extra_args" in args
+ assert args["extra_args"][0] in diagnostics
+ return {"result": "success"}
+
+ DiagnosticCheck(exec_module, task_vars).run()
diff --git a/roles/openshift_health_checker/test/logging_check_test.py b/roles/openshift_health_checker/test/logging_check_test.py
index 1a1c190f6..59c703214 100644
--- a/roles/openshift_health_checker/test/logging_check_test.py
+++ b/roles/openshift_health_checker/test/logging_check_test.py
@@ -98,21 +98,19 @@ def test_oc_failure(problem, expect):
assert expect in str(excinfo)
-groups_with_first_master = dict(masters=['this-host', 'other-host'])
-groups_with_second_master = dict(masters=['other-host', 'this-host'])
-groups_not_a_master = dict(masters=['other-host'])
+groups_with_first_master = dict(oo_first_master=['this-host'])
+groups_not_a_master = dict(oo_first_master=['other-host'], oo_masters=['other-host'])
@pytest.mark.parametrize('groups, logging_deployed, is_active', [
(groups_with_first_master, True, True),
(groups_with_first_master, False, False),
(groups_not_a_master, True, False),
- (groups_with_second_master, True, False),
(groups_not_a_master, True, False),
])
def test_is_active(groups, logging_deployed, is_active):
task_vars = dict(
- ansible_ssh_host='this-host',
+ ansible_host='this-host',
groups=groups,
openshift_hosted_logging_deploy=logging_deployed,
)