X-Git-Url: https://gerrit.akraino.org/r/gitweb?p=ta%2Fcaas-kubernetes.git;a=blobdiff_plain;f=ansible%2Froles%2Fkubelet%2Ftemplates%2Fkubelet_healthcheck.sh;h=6c9f46ddd4d1fce91ef4cfc656668792d2cdc81b;hp=7cf4a0856fb3390911845c889c1fa0ea41b9bc92;hb=4fa2d523a4eced951f6e250cf926ce3d7edf1649;hpb=d5d007c85cb90ac892dbcda51a0419267743d9cc diff --git a/ansible/roles/kubelet/templates/kubelet_healthcheck.sh b/ansible/roles/kubelet/templates/kubelet_healthcheck.sh index 7cf4a08..6c9f46d 100644 --- a/ansible/roles/kubelet/templates/kubelet_healthcheck.sh +++ b/ansible/roles/kubelet/templates/kubelet_healthcheck.sh @@ -13,61 +13,36 @@ # See the License for the specific language governing permissions and # limitations under the License. -wait_for_file () { - while [[ ! -f $1 ]] - do - echo "Waiting for file $1" - sleep 1 - done -} - - -CERT_AUTH="/etc/openssl/ca.pem" -CLIENT_CER="/etc/kubernetes/ssl/kubelet-server.pem" -CLIENT_KEY="/etc/kubernetes/ssl/kubelet-server-key.pem" -wait_for_file $CERT_AUTH -wait_for_file $CLIENT_CER -wait_for_file $CLIENT_KEY - - -keepdoing="true" error=0 while true do - if [[ "$keepdoing" == "true" ]] - then - echo "Waiting for kubernetes node to become ready..." - uncordon_ready=$( /usr/bin/kubectl get node --show-labels | grep -i "{{ nodename }}" | grep -i "ready" | grep -i "SchedulingDisabled" | wc -l ) - if [[ "$uncordon_ready" -eq "1" ]] - then - keepdoing="false" - /usr/bin/kubectl uncordon {{ ansible_host }} || echo "Post start kubelet, this node was never cordoned." - echo "Node uncordoned, and ready!" - fi - node_ready=$( /usr/bin/kubectl get node --show-labels | grep -i "{{ nodename }}" | grep -i " ready " | wc -l ) - if [[ "$node_ready" -eq "1" ]] - then - keepdoing="false" - echo "Node become ready." - fi - fi set +e - result="$(wget --timeout 10 --tries 5 --ca-certificate $CERT_AUTH --certificate $CLIENT_CER --private-key $CLIENT_KEY --spider https://{{ ansible_host }}:10250/healthz 2>&1 | grep 'HTTP' | grep -E -o '[[:digit:]]{3}')" - + result="$(curl 127.0.0.1:{{ kubelet_healthcheck_port }}/healthz)" set -e - if [ "$result" == "200" ] + if [ "$result" == "ok" ] then - echo "Healtcheck success" + echo "Healtcheck success." error=0 + set +e + uncordonresult="$(/usr/bin/kubectl uncordon {{ ansible_host }} 2>&1)" + set -e + echo "$uncordonresult" else - echo "Healtcheck failed" + echo "Healtcheck failed." error=$(($error+1)) fi if [ "$error" -ge "5" ] then - echo "Error with kubelet (Healtcheck failed 5 times) restarting it" - systemctl restart kubelet.service + activeState="$(systemctl show -p ActiveState --value kubelet)" + if [[ "$activeState" == "deactivating" ]] || [[ "$activeState" == "activating" ]] + then + echo "Kubelet is possibly restarting." + error=0 + else + echo "Error with kubelet (Healtcheck failed 5 times) restarting it." + systemctl restart kubelet.service + fi fi - sleep 30 + sleep 1 done