From: Ferenc Tóth Date: Fri, 14 Jun 2019 12:12:03 +0000 (+0200) Subject: Add initContainer to SR-IOV Device Plugin Pod X-Git-Url: https://gerrit.akraino.org/r/gitweb?a=commitdiff_plain;h=6065f4ec13b9bb575a43ccb0aaf8cbde7e3537c1;p=ta%2Fcaas-install.git Add initContainer to SR-IOV Device Plugin Pod InitContainer waits until VFs are available, so SR-IOV DP can be started (it won't run into crash loop). The consequence is that the infra helm chart won't be ready until VFs are available, so we need to check the Pods status to determine when the chart is deployed. Signed-off-by: Ferenc Tóth Change-Id: I3a7f5dd63893b389422db1f3e60071425caaa109 Depends-On: I19893e39652003640f995a6a88342ceb12c7a9d3 --- diff --git a/SPECS/infra-charts.spec b/SPECS/infra-charts.spec index 9b6e692..69c096e 100644 --- a/SPECS/infra-charts.spec +++ b/SPECS/infra-charts.spec @@ -15,7 +15,7 @@ %define COMPONENT infra-charts %define RPM_NAME caas-%{COMPONENT} %define RPM_MAJOR_VERSION 1.0.0 -%define RPM_MINOR_VERSION 4 +%define RPM_MINOR_VERSION 5 Name: %{RPM_NAME} Version: %{RPM_MAJOR_VERSION} @@ -26,6 +26,8 @@ BuildArch: x86_64 Vendor: %{_platform_vendor} Source0: %{name}-%{version}.tar.gz +Requires: rsync + %description This rpm contains the necessary helm charts to deploy the caas subsystem. diff --git a/SPECS/instantiate.spec b/SPECS/instantiate.spec index e4b6a5c..ea2c0a0 100644 --- a/SPECS/instantiate.spec +++ b/SPECS/instantiate.spec @@ -15,7 +15,7 @@ %define COMPONENT instantiate %define RPM_NAME caas-%{COMPONENT} %define RPM_MAJOR_VERSION 1.0.0 -%define RPM_MINOR_VERSION 4 +%define RPM_MINOR_VERSION 5 Name: %{RPM_NAME} Version: %{RPM_MAJOR_VERSION} @@ -26,6 +26,8 @@ BuildArch: x86_64 Vendor: %{_platform_vendor} Source0: %{name}-%{version}.tar.gz +Requires: rsync + %description This rpm contains the necessary playbooks to instantiate the caas subsystem. diff --git a/ansible/roles/install_caas_infra/tasks/main.yaml b/ansible/roles/install_caas_infra/tasks/main.yaml index b8c69a9..a70fd64 100644 --- a/ansible/roles/install_caas_infra/tasks/main.yaml +++ b/ansible/roles/install_caas_infra/tasks/main.yaml @@ -33,18 +33,24 @@ - name: update helm repo shell: HELM_HOST={{ caas.tiller_ip }}:{{ caas.tiller_port }} helm repo update -- name: helm-list +- name: check helm chart availability shell: HELM_HOST={{ caas.tiller_ip }}:{{ caas.tiller_port }} helm get {{ infra_chart_name }} register: caas_list_result failed_when: "( caas_list_result.rc != 0 ) and ( not (caas_list_result.stderr | search('Error: release')) )" - block: - - name: Install infra_chart with helm - shell: HELM_HOST={{ caas.tiller_ip }}:{{ caas.tiller_port }} helm install --wait --timeout {{ caas.helm_operation_timeout }} --name {{ infra_chart_name }} default/{{ infra_chart_name }} - when: "caas_list_result is defined and ( ( caas_list_result.stderr | search('Error: release:') ) or ( caas_list_result.stdout == '' ) )" + - name: install {{ infra_chart_name }} with helm + shell: HELM_HOST={{ caas.tiller_ip }}:{{ caas.tiller_port }} helm install --name {{ infra_chart_name }} default/{{ infra_chart_name }} + + - name: poll infra pod status + shell: kubectl get pods --no-headers --namespace=kube-system | grep -vEw "1/1|2/2|3/3" | grep -vE "sriovdp|webhook-cfg-cleaner" | wc -l + register: poll + until: poll.stdout == "0" + retries: "{{ ((caas.helm_operation_timeout | int) / 5) | int }}" + delay: 5 rescue: - - name: Delete the caas_infra_chart + - name: delete the {{ infra_chart_name }} chart shell: HELM_HOST={{ caas.tiller_ip }}:{{ caas.tiller_port }} helm delete --purge {{ infra_chart_name }} - name: fail the current playbook run, because helm install failed diff --git a/infra-charts/templates/sriovdp.yml b/infra-charts/templates/sriovdp-ds.yaml similarity index 81% rename from infra-charts/templates/sriovdp.yml rename to infra-charts/templates/sriovdp-ds.yaml index 53956b2..bba35dc 100644 --- a/infra-charts/templates/sriovdp.yml +++ b/infra-charts/templates/sriovdp-ds.yaml @@ -15,7 +15,7 @@ limitations under the License. */}} {{ if .Values.sriovdp.required }} --- -apiVersion: apps/v1beta2 +apiVersion: apps/v1 kind: DaemonSet metadata: name: sriovdp-ds @@ -60,6 +60,17 @@ spec: - name: sriovdp-config mountPath: /etc/pcidp/ readOnly: true + initContainers: + - name: init-sriov + image: {{ .Values.sriovdp.image_name }} + command: ['sh', '-c', 'source /init/init.sh'] + volumeMounts: + - name: sriovdp-config + mountPath: /etc/pcidp/ + readOnly: true + - name: sriovdp-init + mountPath: /init/ + readOnly: true volumes: - name: time-mount hostPath: @@ -73,5 +84,8 @@ spec: - name: sriovdp-config hostPath: path: /etc/pcidp/ + - name: sriovdp-init + configMap: + name: sriovdp-initcm terminationGracePeriodSeconds: 1 {{ end }} diff --git a/infra-charts/templates/sriovdp-initcm.yaml b/infra-charts/templates/sriovdp-initcm.yaml new file mode 100644 index 0000000..e8d9aa9 --- /dev/null +++ b/infra-charts/templates/sriovdp-initcm.yaml @@ -0,0 +1,47 @@ +{{/* +Copyright 2019 Nokia + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} +{{ if .Values.sriovdp.required }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: sriovdp-initcm + namespace: kube-system + labels: + app: sriovdp-app +data: + init.sh: |- + while true; do + date + rootdevs=`jq -r .resourceList[].rootDevices[] /dev/null` + if [[ -n "$rootdevs" ]]; then + cat /etc/pcidp/config.json | jq -r .resourceList[].rootDevices[] | while read pci; do + vf=`cat /sys/bus/pci/devices/0000:$pci/sriov_numvfs` + echo "$pci: $vf VFs" + if [[ -z "$vf" || "$vf" == "0" ]]; then + echo "No VFs found -> SR-IOV DP cannot be started -> sleep 10" + sleep 10 + break + fi + done + echo "Every SR-IOV designated PF has VF configured -> SR-IOV DP can be started -> rc=0" + exit 0 + else + echo "No SR-IOV designated PF found -> SR-IOV DP cannot be started -> sleep 10" + sleep 10 + fi + done +{{ end }}