summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDevan Goodwin <dgoodwin@redhat.com>2016-12-15 14:55:10 -0400
committerDevan Goodwin <dgoodwin@redhat.com>2016-12-15 14:55:10 -0400
commitf8d5693489cb95e6a7ccfcc5b33d99115f7da5d3 (patch)
tree77da11c92076f176798cc8cf0e659c932905c413
parent002fdef1769baccdd6c90a4caa8c0028ec9559db (diff)
downloadopenshift-f8d5693489cb95e6a7ccfcc5b33d99115f7da5d3.tar.gz
openshift-f8d5693489cb95e6a7ccfcc5b33d99115f7da5d3.tar.bz2
openshift-f8d5693489cb95e6a7ccfcc5b33d99115f7da5d3.tar.xz
openshift-f8d5693489cb95e6a7ccfcc5b33d99115f7da5d3.zip
Wait for nodes to be ready before proceeding with upgrade.
Near the end of node upgrade, we now wait for the node to report Ready before marking it schedulable again. This should help eliminate delays when pods need to relocate as the next node in line is evacuated. Happens near the end of the process, the only remaining task would be to mark it schedulable again so easy for admins to detect and recover from.
-rw-r--r--playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml13
1 files changed, 13 insertions, 0 deletions
diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
index cefc7d12b..b3ac34d90 100644
--- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
@@ -87,6 +87,19 @@
- name: Restart rpm node service
service: name="{{ openshift.common.service_type }}-node" state=restarted
when: inventory_hostname in groups.oo_nodes_to_upgrade and not openshift.common.is_containerized | bool
+
+ - name: Wait for node to be ready
+ command: >
+ {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} get node {{ openshift.common.hostname | lower }} --no-headers
+ register: node_output
+ delegate_to: "{{ groups.oo_first_master.0 }}"
+ when: inventory_hostname in groups.oo_nodes_to_upgrade
+ until: "{{ node_output.stdout.split()[1].startswith('Ready')}}"
+ # Give the node two minutes to come back online. Note that we pre-pull images now
+ # so containerized services should restart quickly as well.
+ retries: 24
+ delay: 5
+
- name: Set node schedulability
command: >
{{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=true