summaryrefslogtreecommitdiffstats
path: root/roles/openshift_master/tasks/main.yml
diff options
context:
space:
mode:
Diffstat (limited to 'roles/openshift_master/tasks/main.yml')
-rw-r--r--roles/openshift_master/tasks/main.yml23
1 files changed, 23 insertions, 0 deletions
diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml
index ce2f96723..1a59717c7 100644
--- a/roles/openshift_master/tasks/main.yml
+++ b/roles/openshift_master/tasks/main.yml
@@ -168,10 +168,21 @@
- include: set_loopback_context.yml
when: openshift.common.version_gte_3_2_or_1_2
+# TODO: Master startup can fail when ec2 transparently reallocates the block
+# storage, causing etcd writes to temporarily fail. Retry failures blindly just
+# once to allow time for this transient condition to to resolve and for systemd
+# to restart the master (which will eventually succeed).
+#
+# https://github.com/coreos/etcd/issues/3864
+# https://github.com/openshift/origin/issues/6065
+# https://github.com/openshift/origin/issues/6447
- name: Start and enable master
service: name={{ openshift.common.service_type }}-master enabled=yes state=started
when: not openshift_master_ha | bool
register: start_result
+ until: not start_result | failed
+ retries: 1
+ delay: 60
notify: Verify API Server
- name: Check for non-HA master service presence
@@ -202,6 +213,9 @@
state: started
when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0]
register: start_result
+ until: not start_result | failed
+ retries: 1
+ delay: 60
- set_fact:
master_api_service_status_changed: "{{ start_result | changed }}"
@@ -218,6 +232,9 @@
state: started
when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0]
register: start_result
+ until: not start_result | failed
+ retries: 1
+ delay: 60
- set_fact:
master_api_service_status_changed: "{{ start_result | changed }}"
@@ -251,6 +268,9 @@
state: started
when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0]
register: start_result
+ until: not start_result | failed
+ retries: 1
+ delay: 60
- pause:
seconds: 15
@@ -263,6 +283,9 @@
state: started
when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0]
register: start_result
+ until: not start_result | failed
+ retries: 1
+ delay: 60
- set_fact:
master_controllers_service_status_changed: "{{ start_result | changed }}"