From b2e0fce40b43fcbf977cf26f482cafe980fc909a Mon Sep 17 00:00:00 2001 From: Krisztian Lengyel Date: Fri, 30 Aug 2019 15:33:15 +0200 Subject: [PATCH] Make Prometheus data storage configurable - From now Prometheus uses a persistent volume to store its data. The volume size set to GBi. - Data retention also fine tuned to avoid out of space situations in normal operation. (max retention size and WAL compression) - `kubernetes-apiservers` job deleted, because its data is unused. Change-Id: Ifc801f966c39b41ca65bf917257f84ed95d6e5f8 Signed-off-by: Krisztian Lengyel --- SPECS/infra-charts.spec | 4 +-- SPECS/instantiate.spec | 4 +-- SPECS/utils.spec | 4 +-- cm_config/caas.yaml | 2 ++ infra-charts/templates/prometheus-cfg.yaml | 48 ++++++++---------------------- infra-charts/templates/prometheus-dep.yaml | 5 ++++ infra-charts/templates/prometheus-pvc.yaml | 11 +++++++ infra-charts/values.yaml.j2 | 8 +++++ 8 files changed, 45 insertions(+), 41 deletions(-) create mode 100644 infra-charts/templates/prometheus-pvc.yaml diff --git a/SPECS/infra-charts.spec b/SPECS/infra-charts.spec index 1ca011b..33d643c 100644 --- a/SPECS/infra-charts.spec +++ b/SPECS/infra-charts.spec @@ -15,14 +15,14 @@ %define COMPONENT infra-charts %define RPM_NAME caas-%{COMPONENT} %define RPM_MAJOR_VERSION 1.0.0 -%define RPM_MINOR_VERSION 27 +%define RPM_MINOR_VERSION 28 Name: %{RPM_NAME} Version: %{RPM_MAJOR_VERSION} Release: %{RPM_MINOR_VERSION}%{?dist} Summary: Containers as a Service helm charts License: %{_platform_license} -BuildArch: x86_64 +BuildArch: noarch Vendor: %{_platform_vendor} Source0: %{name}-%{version}.tar.gz diff --git a/SPECS/instantiate.spec b/SPECS/instantiate.spec index a0e3632..65d86cc 100644 --- a/SPECS/instantiate.spec +++ b/SPECS/instantiate.spec @@ -15,14 +15,14 @@ %define COMPONENT instantiate %define RPM_NAME caas-%{COMPONENT} %define RPM_MAJOR_VERSION 1.0.0 -%define RPM_MINOR_VERSION 16 +%define RPM_MINOR_VERSION 17 Name: %{RPM_NAME} Version: %{RPM_MAJOR_VERSION} Release: %{RPM_MINOR_VERSION}%{?dist} Summary: Containers as a Service instantiate playbooks License: %{_platform_license} -BuildArch: x86_64 +BuildArch: noarch Vendor: %{_platform_vendor} Source0: %{name}-%{version}.tar.gz diff --git a/SPECS/utils.spec b/SPECS/utils.spec index ecb041e..dc5fe99 100644 --- a/SPECS/utils.spec +++ b/SPECS/utils.spec @@ -15,7 +15,7 @@ %define COMPONENT utils %define RPM_NAME caas-%{COMPONENT} %define RPM_MAJOR_VERSION 1.0.0 -%define RPM_MINOR_VERSION 5 +%define RPM_MINOR_VERSION 6 %define KUBELET_PLUGINS_LOGDIR /var/log/kubelet-plugins/ Name: %{RPM_NAME} @@ -23,7 +23,7 @@ Version: %{RPM_MAJOR_VERSION} Release: %{RPM_MINOR_VERSION}%{?dist} Summary: Containers as a Service supplementary utils License: %{_platform_license} -BuildArch: x86_64 +BuildArch: noarch Vendor: %{_platform_vendor} Source0: %{name}-%{version}.tar.gz diff --git a/cm_config/caas.yaml b/cm_config/caas.yaml index ee79a5a..98bf332 100644 --- a/cm_config/caas.yaml +++ b/cm_config/caas.yaml @@ -78,6 +78,8 @@ kubernetes_service_url: kubernetes.default.svc prometheus_port: 9090 prometheus_url: prometheus.kube-system.svc.{{ dns_domain }} prometheus: https://{{ prometheus_url }}:{{ prometheus_port }} +prometheus_storage_size: "1Gi" +prometheus_data_retention_hours: 6 custom_metrics_api_port: 6443 custom_metrics_api_serviceport: 443 metrics_server_port: 443 diff --git a/infra-charts/templates/prometheus-cfg.yaml b/infra-charts/templates/prometheus-cfg.yaml index 214c5f3..021c4d8 100644 --- a/infra-charts/templates/prometheus-cfg.yaml +++ b/infra-charts/templates/prometheus-cfg.yaml @@ -47,40 +47,6 @@ data: evaluation_interval: 1m scrape_configs: - - job_name: 'kubernetes-apiservers' - - kubernetes_sd_configs: - - role: endpoints - - # Default to scraping over https. If required, just disable this or change to - # `http`. - scheme: https - - # This TLS & bearer token file config is used to connect to the actual scrape - # endpoints for cluster components. This is separate to discovery auth - # configuration because discovery & scraping are two separate concerns in - # Prometheus. The discovery auth config is automatic if Prometheus runs inside - # the cluster. Otherwise, more config options have to be provided within the - # . - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - # If your node certificates are self-signed or use a different CA to the - # master CA, then disable certificate verification below. Note that - # certificate verification is an integral part of a secure infrastructure - # so this should only be disabled in a controlled environment. You can - # disable certificate verification by uncommenting the line below. - # - insecure_skip_verify: true - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - - # Keep only the default/kubernetes service endpoints for the https port. This - # will add targets for each API server which Kubernetes adds an endpoint to - # the default/kubernetes service. - relabel_configs: - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - action: keep - regex: default;kubernetes;https - # Scrape config for service endpoints. # # The relabeling allows the actual service scrape endpoint to be configured @@ -97,6 +63,11 @@ data: kubernetes_sd_configs: - role: endpoints + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: false + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] action: keep @@ -184,6 +155,7 @@ data: http { server { listen {{ .Values.prometheus.port }} ssl; + access_log /dev/stdout; ssl_certificate /var/run/serving-cert/prometheus.crt; ssl_certificate_key /var/run/serving-cert/prometheus.key; ssl_client_certificate /etc/openssl/ca.pem; @@ -222,7 +194,13 @@ data: supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface [program:prometheus] - command=/bin/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.retention=1h --web.listen-address=127.0.0.1:19090 + command=/bin/prometheus + --config.file=/etc/prometheus/prometheus.yml + --storage.tsdb.path=/data + --storage.tsdb.retention.time={{ .Values.prometheus.data_retention }}h + --storage.tsdb.retention.size={{ .Values.prometheus.data_retension_size }} + --web.listen-address=127.0.0.1:19090 + --storage.tsdb.wal-compression startsecs=3 startretries=1 stopwaitsecs = 3 diff --git a/infra-charts/templates/prometheus-dep.yaml b/infra-charts/templates/prometheus-dep.yaml index 6970921..03047aa 100644 --- a/infra-charts/templates/prometheus-dep.yaml +++ b/infra-charts/templates/prometheus-dep.yaml @@ -64,6 +64,8 @@ spec: readOnly: true - mountPath: /etc/prometheus name: prometheus-config + - mountPath: /data + name: prometheus-data livenessProbe: httpGet: path: /api/v1/status/config @@ -94,4 +96,7 @@ spec: - key: supervisord.conf path: supervisord.conf mode: 0644 + - name: prometheus-data + persistentVolumeClaim: + claimName: prometheus-pvc {{ end }} diff --git a/infra-charts/templates/prometheus-pvc.yaml b/infra-charts/templates/prometheus-pvc.yaml new file mode 100644 index 0000000..809ddb7 --- /dev/null +++ b/infra-charts/templates/prometheus-pvc.yaml @@ -0,0 +1,11 @@ +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: prometheus-pvc + namespace: kube-system +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.prometheus.storage_size }} diff --git a/infra-charts/values.yaml.j2 b/infra-charts/values.yaml.j2 index 3a864bd..98c0752 100644 --- a/infra-charts/values.yaml.j2 +++ b/infra-charts/values.yaml.j2 @@ -1,3 +1,4 @@ +#jinja2: lstrip_blocks: True {# Copyright 2019 Nokia @@ -32,6 +33,13 @@ prometheus: prometheus_url: {{ caas.prometheus }} server_cert: {{ prometheus_cert_b64 }} server_key: {{ prometheus_cert_key_b64 }} + storage_size: {{ caas.prometheus_storage_size }} + data_retention: {{ caas.prometheus_data_retention_hours }} +{# human_to_bytes handles decimal prefix as binary prefix #} + {% set prometheus_storage_size_in_mbi = (caas.prometheus_storage_size | replace('i','') | human_to_bytes) %} + {% set prometheus_wal_size = (3 * 128) | human_to_bytes('MB') %} + {% set prometheus_db_size = ((prometheus_storage_size_in_mbi | int) * 0.8) - (prometheus_wal_size | int) %} + data_retension_size: {{ ((prometheus_db_size | int) / (1024 | pow(2))) | int }}MB custom_metrics_api: required: true -- 2.16.6