robot tcs, test charts, robot container added
[ta/cloudtaf.git] / testcases / basic_func_tests / tc_002_pod_health_check.py
1 from robot.api import logger
2 from robot.libraries.BuiltIn import BuiltIn
3 from decorators_for_robot_functionalities import *
4 import sys
5 import os
6 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '../libraries/common'))
7 from test_constants import *  # noqa
8 import common_utils  # noqa
9
10
11 ex = BuiltIn().get_library_instance('execute_command')
12 STACK_INFOS = BuiltIn().get_library_instance('stack_infos')
13
14
15 def tc_002_pod_health_check():
16     steps = ['step1_check_componentstatus',
17              'step2_check_kubelet_is_running',
18              'step3_check_apiserver_is_running',
19              'step4_check_all_kubernetes_pod',
20              'step5_check_services_with_systemctl']
21     common_utils.keyword_runner(steps)
22
23
24 @pabot_lock("health_check_1")
25 @pabot_lock("health_check_2")
26 def step1_check_componentstatus():
27     stdout = ex.execute_unix_command("kubectl get componentstatus -o json | jq .items[].conditions[].type")
28     logger.console('\n')
29     for line in stdout.split('\n'):
30         if "Healthy" in line:
31             logger.console(line)
32         else:
33             raise Exception(line)
34
35
36 @robot_log
37 def check_container_is_running(name, nodes):
38     for key in nodes:
39         stdout = ex.execute_unix_command_on_remote_as_user("docker ps --filter status=running --filter name=" + name +
40                                                            " | grep -v pause | grep " + name + " | wc -l ", nodes[key])
41         if stdout == '1':
42             logger.console("\n" + name + " container is running on node " + key + ".")
43         else:
44             stdout = ex.execute_unix_command_on_remote_as_user("docker ps | grep -v pause | grep " + name, nodes[key])
45             raise Exception(name + "container is NOT running on node " + key + "\n" + stdout)
46
47
48 @robot_log
49 def check_program_is_running(name, nodes):
50     for key in nodes:
51         stdout = ex.execute_unix_command_on_remote_as_user("ps -aux | grep '" + name + "' | grep -v 'color' | wc -l ",
52                                                            nodes[key])
53         if stdout == '1':
54             logger.console("\n" + name + " is running on node " + key + ".")
55         else:
56             stdout = ex.execute_unix_command_on_remote_as_user("ps -aux | grep '" + name + "' | grep -v 'color'",
57                                                                nodes[key])
58             raise Exception(name + " is NOT running on node " + key + "\n" + stdout)
59
60
61 def step2_check_kubelet_is_running():
62     all_nodes = STACK_INFOS.get_all_nodes()
63     check_program_is_running("/kubelet ", all_nodes)
64     check_program_is_running("/kubelet_healthcheck.sh", all_nodes)
65
66
67 def step3_check_apiserver_is_running():
68     crf_nodes = STACK_INFOS.get_crf_nodes()
69     check_container_is_running("kube-apiserver", crf_nodes)
70
71
72 @pabot_lock("health_check_1")
73 def step4_check_all_kubernetes_pod():
74     LOG_DIR = os.path.join(os.path.dirname(__file__))
75     command = "kubectl get po -n kube-system | tail -n +2 | grep -vP 'Running"
76     for pod in pods_skipped:
77         command += '|'+pod
78     command += "'"
79     stdout = ex.execute_unix_command(command, fail_on_non_zero_rc=False, skip_prompt_in_command_output=True)[0]
80     if not stdout:
81         logger.console("\nAll kubernetes PODs are running.")
82         return
83     for line in stdout.split("\n"):
84         line = line.split()
85         command = "kubectl logs --namespace " + line[0] + " " + line[1]
86         filename = "tc004_step1_" + line[1] + ".log"
87         common_utils.gather_logs(command, filename, LOG_DIR)
88     raise Exception(stdout)
89
90
91 def step5_check_services_with_systemctl():
92     all_nodes = STACK_INFOS.get_all_nodes()
93     command = "systemctl status | grep -E 'State: running|Jobs: 0 queued|Failed: 0 units' | grep -v grep"
94     for key in all_nodes:
95         logger.console(key)
96         stdout = "\nsystemctl status output:\n" + ex.execute_unix_command_on_remote_as_user(command, all_nodes[key])
97         if all(x in stdout for x in ["State: running", "Jobs: 0 queued", "Failed: 0 units"]):
98             logger.console(stdout)
99         else:
100             # cat is needed here to remove the coloring of the systemctl for the robot logs
101             failedservices = ex.execute_unix_command_on_remote_as_user("systemctl --failed | cat", all_nodes[key])
102             # TODO: cloud-final.service fails with unknown reason
103             if any(service in failedservices for service in services_skipped):
104                 stdout = stdout + "\n" + ex.execute_unix_command_on_remote_as_user("systemctl --failed | cat",
105                                                                                    all_nodes[key])
106                 logger.console(stdout)
107             else:
108                 stdout = stdout + "\n" + ex.execute_unix_command_on_remote_as_user("systemctl --failed | cat",
109                                                                                    all_nodes[key])
110                 raise Exception(stdout)