From eb9914420e8c327a998531a4bb7a6b8406b4316f Mon Sep 17 00:00:00 2001 From: Michael Gugino Date: Mon, 6 Nov 2017 12:08:26 -0500 Subject: Retry restarting master controllers Currently, master controller services may fail to restart if master api services are not fully initialized. This commit enables retry of master controllers. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1509837 --- playbooks/common/openshift-master/scaleup.yml | 7 ++++++- playbooks/common/openshift-master/tasks/wire_aggregator.yml | 7 ++++++- roles/nuage_master/handlers/main.yaml | 7 ++++++- roles/openshift_hosted_metrics/handlers/main.yml | 7 ++++++- roles/openshift_logging/handlers/main.yml | 7 ++++++- roles/openshift_master/handlers/main.yml | 9 ++++++--- roles/openshift_metrics/handlers/main.yml | 7 ++++++- 7 files changed, 42 insertions(+), 9 deletions(-) diff --git a/playbooks/common/openshift-master/scaleup.yml b/playbooks/common/openshift-master/scaleup.yml index f4dc9df8a..05b37d59f 100644 --- a/playbooks/common/openshift-master/scaleup.yml +++ b/playbooks/common/openshift-master/scaleup.yml @@ -22,8 +22,13 @@ - name: restart master api service: name={{ openshift.common.service_type }}-master-controllers state=restarted notify: verify api server + # We retry the controllers because the API may not be 100% initialized yet. - name: restart master controllers - service: name={{ openshift.common.service_type }}-master-controllers state=restarted + command: "systemctl restart {{ openshift.common.service_type }}-master-controllers" + retries: 3 + delay: 5 + register: result + until: result.rc == 0 - name: verify api server command: > curl --silent --tlsv1.2 diff --git a/playbooks/common/openshift-master/tasks/wire_aggregator.yml b/playbooks/common/openshift-master/tasks/wire_aggregator.yml index 560eea785..df3ea27b4 100644 --- a/playbooks/common/openshift-master/tasks/wire_aggregator.yml +++ b/playbooks/common/openshift-master/tasks/wire_aggregator.yml @@ -179,8 +179,13 @@ - yedit_output.changed - openshift.master.cluster_method == 'native' +# We retry the controllers because the API may not be 100% initialized yet. - name: restart master controllers - systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted + command: "systemctl restart {{ openshift.common.service_type }}-master-controllers" + retries: 3 + delay: 5 + register: result + until: result.rc == 0 when: - yedit_output.changed - openshift.master.cluster_method == 'native' diff --git a/roles/nuage_master/handlers/main.yaml b/roles/nuage_master/handlers/main.yaml index 21da6b953..410b739e9 100644 --- a/roles/nuage_master/handlers/main.yaml +++ b/roles/nuage_master/handlers/main.yaml @@ -7,8 +7,13 @@ openshift.master.cluster_method == 'native' # TODO: need to fix up ignore_errors here +# We retry the controllers because the API may not be 100% initialized yet. - name: restart master controllers - systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted + command: "systemctl restart {{ openshift.common.service_type }}-master-controllers" + retries: 3 + delay: 5 + register: result + until: result.rc == 0 when: > (openshift_master_ha | bool) and (not master_controllers_service_status_changed | default(false)) and diff --git a/roles/openshift_hosted_metrics/handlers/main.yml b/roles/openshift_hosted_metrics/handlers/main.yml index ce7688581..88b893448 100644 --- a/roles/openshift_hosted_metrics/handlers/main.yml +++ b/roles/openshift_hosted_metrics/handlers/main.yml @@ -4,8 +4,13 @@ when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' notify: Verify API Server +# We retry the controllers because the API may not be 100% initialized yet. - name: restart master controllers - systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted + command: "systemctl restart {{ openshift.common.service_type }}-master-controllers" + retries: 3 + delay: 5 + register: result + until: result.rc == 0 when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' - name: Verify API Server diff --git a/roles/openshift_logging/handlers/main.yml b/roles/openshift_logging/handlers/main.yml index ce7688581..acc838bd1 100644 --- a/roles/openshift_logging/handlers/main.yml +++ b/roles/openshift_logging/handlers/main.yml @@ -4,8 +4,13 @@ when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' notify: Verify API Server +# We retry the controllers because the API may not be 100% initialized yet. - name: restart master controllers - systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted + command: "{{ openshift.common.service_type }}-master-controllers" + retries: 3 + delay: 5 + register: result + until: result.rc == 0 when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' - name: Verify API Server diff --git a/roles/openshift_master/handlers/main.yml b/roles/openshift_master/handlers/main.yml index d5094c2c9..f88c4a7dc 100644 --- a/roles/openshift_master/handlers/main.yml +++ b/roles/openshift_master/handlers/main.yml @@ -9,10 +9,13 @@ notify: - Verify API Server +# We retry the controllers because the API may not be 100% initialized yet. - name: restart master controllers - systemd: - name: "{{ openshift.common.service_type }}-master-controllers" - state: restarted + command: "systemctl restart {{ openshift.common.service_type }}-master-controllers" + retries: 3 + delay: 5 + register: result + until: result.rc == 0 when: - not (master_controllers_service_status_changed | default(false) | bool) - openshift.master.cluster_method == 'native' diff --git a/roles/openshift_metrics/handlers/main.yml b/roles/openshift_metrics/handlers/main.yml index ce7688581..88b893448 100644 --- a/roles/openshift_metrics/handlers/main.yml +++ b/roles/openshift_metrics/handlers/main.yml @@ -4,8 +4,13 @@ when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' notify: Verify API Server +# We retry the controllers because the API may not be 100% initialized yet. - name: restart master controllers - systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted + command: "systemctl restart {{ openshift.common.service_type }}-master-controllers" + retries: 3 + delay: 5 + register: result + until: result.rc == 0 when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native' - name: Verify API Server -- cgit v1.2.1