1 from robot.api import logger
2 from robot.libraries.BuiltIn import BuiltIn
3 from decorators_for_robot_functionalities import *
6 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '../libraries/common'))
7 from test_constants import * # noqa
8 import common_utils # noqa
11 ex = BuiltIn().get_library_instance('execute_command')
12 STACK_INFOS = BuiltIn().get_library_instance('stack_infos')
15 def tc_002_pod_health_check():
16 steps = ['step1_check_componentstatus',
17 'step2_check_kubelet_is_running',
18 'step3_check_apiserver_is_running',
19 'step4_check_all_kubernetes_pod',
20 'step5_check_services_with_systemctl']
21 common_utils.keyword_runner(steps)
24 @pabot_lock("health_check_1")
25 @pabot_lock("health_check_2")
26 def step1_check_componentstatus():
27 stdout = ex.execute_unix_command("kubectl get componentstatus -o json | jq .items[].conditions[].type")
29 for line in stdout.split('\n'):
37 def check_container_is_running(name, nodes):
39 stdout = ex.execute_unix_command_on_remote_as_user("docker ps --filter status=running --filter name=" + name +
40 " | grep -v pause | grep " + name + " | wc -l ", nodes[key])
42 logger.console("\n" + name + " container is running on node " + key + ".")
44 stdout = ex.execute_unix_command_on_remote_as_user("docker ps | grep -v pause | grep " + name, nodes[key])
45 raise Exception(name + "container is NOT running on node " + key + "\n" + stdout)
49 def check_program_is_running(name, nodes):
51 stdout = ex.execute_unix_command_on_remote_as_user("ps -aux | grep '" + name + "' | grep -v 'color' | wc -l ",
54 logger.console("\n" + name + " is running on node " + key + ".")
56 stdout = ex.execute_unix_command_on_remote_as_user("ps -aux | grep '" + name + "' | grep -v 'color'",
58 raise Exception(name + " is NOT running on node " + key + "\n" + stdout)
61 def step2_check_kubelet_is_running():
62 all_nodes = STACK_INFOS.get_all_nodes()
63 check_program_is_running("/kubelet ", all_nodes)
64 check_program_is_running("/kubelet_healthcheck.sh", all_nodes)
67 def step3_check_apiserver_is_running():
68 crf_nodes = STACK_INFOS.get_crf_nodes()
69 check_container_is_running("kube-apiserver", crf_nodes)
72 @pabot_lock("health_check_1")
73 def step4_check_all_kubernetes_pod():
74 LOG_DIR = os.path.join(os.path.dirname(__file__))
75 command = "kubectl get po -n kube-system | tail -n +2 | grep -vP 'Running"
76 for pod in pods_skipped:
79 stdout = ex.execute_unix_command(command, fail_on_non_zero_rc=False, skip_prompt_in_command_output=True)[0]
81 logger.console("\nAll kubernetes PODs are running.")
83 for line in stdout.split("\n"):
85 command = "kubectl logs --namespace " + line[0] + " " + line[1]
86 filename = "tc004_step1_" + line[1] + ".log"
87 common_utils.gather_logs(command, filename, LOG_DIR)
88 raise Exception(stdout)
91 def step5_check_services_with_systemctl():
92 all_nodes = STACK_INFOS.get_all_nodes()
93 command = "systemctl status | grep -E 'State: running|Jobs: 0 queued|Failed: 0 units' | grep -v grep"
96 stdout = "\nsystemctl status output:\n" + ex.execute_unix_command_on_remote_as_user(command, all_nodes[key])
97 if all(x in stdout for x in ["State: running", "Jobs: 0 queued", "Failed: 0 units"]):
98 logger.console(stdout)
100 # cat is needed here to remove the coloring of the systemctl for the robot logs
101 failedservices = ex.execute_unix_command_on_remote_as_user("systemctl --failed | cat", all_nodes[key])
102 # TODO: cloud-final.service fails with unknown reason
103 if any(service in failedservices for service in services_skipped):
104 stdout = stdout + "\n" + ex.execute_unix_command_on_remote_as_user("systemctl --failed | cat",
106 logger.console(stdout)
108 stdout = stdout + "\n" + ex.execute_unix_command_on_remote_as_user("systemctl --failed | cat",
110 raise Exception(stdout)