#! /bin/bash cd "$(dirname "$0")" . opts.sh e_nodes=$2 e_pods=2 e_restarts=10 p_pods=10 online=$(../scripts/ping.pl "$host") healthy=$online version=$(oc version | head -n 1 | awk '{ print $2 }') if [ -z "$version" ]; then healthy=0 else version="OpenShift $version" fi etcd=$(oc get cs | grep etcd | grep "Healthy" | wc -l) if [ -z "$etcd" -o "$etcd" -lt 3 ]; then healthy=2 oc get cs | grep etcd | grep "Healthy" | sed -r -e 's/\s+/ /g' | awk '{ print $1, $2 }' | sed 's/^/* /' fi if [ $healthy -ne 0 ]; then nodes=$(oc get nodes) ready=$(echo "$nodes" | grep Ready | wc -l) active=$(echo "$nodes" | grep Ready | grep -vi SchedulingDisabled | wc -l) if [ $ready -ge $e_nodes ]; then nodes=" \${color gray}/ $etcd etcd, $ready nodes" if [ $active -ne $ready ]; then nodes="$nodes ($active active)" fi else echo "$nodes" | grep -v "STATUS" | grep -v "Ready" | awk '{ print $1, $2 }' | sed 's/^/* /' offline=$(echo "$nodes" | grep -v "STATUS" | grep -v "Ready" | wc -l) nodes=" \${color gray}/ $etcd etcd, $ready ready, $offline offline" healthy=2 fi fi # Find pods in unexpected states if [ $healthy -ne 0 ]; then pods=$(oc get pods --all-namespaces | awk '$6~/s|m/ { next } { print }' | grep -P "Terminating|Pending" | wc -l) if [ $pods -ge $e_pods ]; then healthy=2 echo "Pods stalled in wrong states:" oc get pods --all-namespaces -o wide | awk '$6~/s|m/ { next } { print }' | grep -P "(Terminating|Pending)" | head -n $p_pods | sed -e 's/[[:space:]]\+/ /g' fi fi # Find not-ready running pods with large restart number if [ $healthy -ne 0 ]; then pods=$(oc get pods --all-namespaces | awk '$6~/s|m/ { next } $5<$e_restarts { next } $3~/^0/ { print $0 }' | grep Running | wc -l) if [ $pods -ge $e_pods ]; then healthy=2 echo "Pods restarting:" oc get pods --all-namespaces -o wide | awk '$6~/s|m/ { next } $5<$e_restarts { next } $3~/^0/ { print $0 }' | grep Running | head -n $p_pods | sed -e 's/[[:space:]]\+/ /g' fi fi # Find own pods in error states if [ $healthy -ne 0 ]; then pods=$(oc get pods --all-namespaces | grep -P "adei|adai|bora" | awk '$6~/s|m/ { next } { print }' | grep -P "CrashLoopBackOff|Error" | wc -l) if [ $pods -ge $e_pods ]; then healthy=2 echo "Pods with errors:" oc get pods --all-namespaces -o wide | grep -P "adei|adai|bora" | awk '$6~/s|m/ { next } { print }' | grep -P "(CrashLoopBackOff|Error)" | head -n $p_pods | sed -e 's/[[:space:]]\+/ /g' fi fi # Find if schedulling takes too long if [ $healthy -ne 0 ]; then pods=$(oc get pods --all-namespaces | grep "ContainerCreating" | awk '$6~/d|h/ { print } $6~/s/ { next } int(substr($6, 1, length($6) - 1)) > 3 { print }' | wc -l) #' if [ $pods -gt 0 ]; then healthy=2 echo "Scheduling problems for the following pods:" oc get pods --all-namespaces | grep "ContainerCreating" | awk '$6~/d|h/ { print } $6~/s/ { next } int(substr($6, 1, length($6) - 1)) > 3 { print }' fi fi echo "$online $healthy $version $nodes"