summaryrefslogtreecommitdiffstats
path: root/service
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@suren.me>2019-01-06 01:56:17 +0100
committerSuren A. Chilingaryan <csa@suren.me>2019-01-06 01:56:17 +0100
commitaf658521b46751a20a5953bd9c7f3ef01c0a74d7 (patch)
tree11745af7d1b73c6e56db9ffa49a92b1bb9389f80 /service
parent55783753ae8f2d857a7225b7a93c1d47039e5a90 (diff)
downloadconky-af658521b46751a20a5953bd9c7f3ef01c0a74d7.tar.gz
conky-af658521b46751a20a5953bd9c7f3ef01c0a74d7.tar.bz2
conky-af658521b46751a20a5953bd9c7f3ef01c0a74d7.tar.xz
conky-af658521b46751a20a5953bd9c7f3ef01c0a74d7.zip
Added more checks to kaas and adei
Diffstat (limited to 'service')
-rwxr-xr-xservice/check_adei.sh12
-rwxr-xr-xservice/check_kaas.sh24
2 files changed, 35 insertions, 1 deletions
diff --git a/service/check_adei.sh b/service/check_adei.sh
index f08ab5f..4edad93 100755
--- a/service/check_adei.sh
+++ b/service/check_adei.sh
@@ -72,6 +72,18 @@ size=$(query "$url/info.php?target=size&encoding=text")
[[ "$size" =~ "Error:" ]] && size=""
[ -n "$size" ] && msg="\${color gray}/ $((size / 1024 / 1024 / 1024)) GB"
+# Check pending administrative scripts
+if [ $healthy -ne 0 ]; then
+ scripts=$(query "$url/info.php?target=scripts")
+ waiting=$(echo $scripts | xmllint --format - | grep "Value" | sed -e "s/^\(.*mtime=\"\([^\"]*\)\".*\)$/\\2\\1/" | awk -v date="$(date +%s)" '{duration=date - $1} duration > 3600 { print duration }' | sort -rn)
+ num_waiting=$(echo $waiting | tr ' ' '\n' | wc -l)
+ long_waiting=$(echo $waiting | cut -d ' ' -f 1)
+ if [ $num_waiting -gt 0 ]; then
+ healthy=2
+ echo "$num_waiting pending scripts, longest for $(format_time $long_waiting)"
+ fi
+fi
+
# Verify offset (for selected database)
if [ $healthy -ne 0 -a -n "$src" ]; then
diff --git a/service/check_kaas.sh b/service/check_kaas.sh
index d6e7300..e69e73d 100755
--- a/service/check_kaas.sh
+++ b/service/check_kaas.sh
@@ -4,7 +4,9 @@ cd "$(dirname "$0")"
. opts.sh
e_nodes=$2
-
+e_pods=2
+e_restarts=10
+p_pods=10
online=$(../scripts/ping.pl "$host")
healthy=$online
@@ -34,4 +36,24 @@ if [ $healthy -ne 0 ]; then
fi
fi
+# Find pods in wrong states
+if [ $healthy -ne 0 ]; then
+ pods=$(oc get pods --all-namespaces | awk '$6~/s|m/ { next } { print }' | grep -P "Terminating|Pending|CrashLoopBackOff" | wc -l)
+ if [ $pods -ge $e_pods ]; then
+ healthy=2
+ echo "Pods stalled in wrong states:"
+ oc get pods --all-namespaces -o wide | awk '$6~/s|m/ { next } { print }' | grep -P "(Terminating|Pending|CrashLoopBackOff)" | head -n $p_pods | sed -e 's/[[:space:]]\+/ /g'
+ fi
+fi
+
+# Find not-ready running pods with large restart number
+if [ $healthy -ne 0 ]; then
+ pods=$(oc get pods --all-namespaces | awk '$6~/s|m/ { next } $5<$e_restarts { next } $3~/^0/ { print $0 }' | grep Running | wc -l)
+ if [ $pods -ge $e_pods ]; then
+ healthy=2
+ echo "Pods restarting:"
+ oc get pods --all-namespaces -o wide | awk '$6~/s|m/ { next } $5<$e_restarts { next } $3~/^0/ { print $0 }' | grep Running | head -n $p_pods | sed -e 's/[[:space:]]\+/ /g'
+ fi
+fi
+
echo "$online $healthy $version $nodes"