Make Prometheus data storage configurable 09/1509/9
authorKrisztian Lengyel <krisztian.lengyel@nokia.com>
Fri, 30 Aug 2019 13:33:15 +0000 (15:33 +0200)
committerKrisztian Lengyel <krisztian.lengyel@nokia.com>
Wed, 25 Sep 2019 14:33:59 +0000 (16:33 +0200)
- From now Prometheus uses a persistent volume to store its data. The
volume size set to GBi.
- Data retention also fine tuned to avoid out of space situations in
normal operation. (max retention size and WAL compression)
- `kubernetes-apiservers` job deleted, because its data is unused.

Change-Id: Ifc801f966c39b41ca65bf917257f84ed95d6e5f8
Signed-off-by: Krisztian Lengyel <krisztian.lengyel@nokia.com>
SPECS/infra-charts.spec
SPECS/instantiate.spec
SPECS/utils.spec
cm_config/caas.yaml
infra-charts/templates/prometheus-cfg.yaml
infra-charts/templates/prometheus-dep.yaml
infra-charts/templates/prometheus-pvc.yaml [new file with mode: 0644]
infra-charts/values.yaml.j2

index 1ca011b..33d643c 100644 (file)
 %define COMPONENT infra-charts
 %define RPM_NAME caas-%{COMPONENT}
 %define RPM_MAJOR_VERSION 1.0.0
-%define RPM_MINOR_VERSION 27
+%define RPM_MINOR_VERSION 28
 
 Name:           %{RPM_NAME}
 Version:        %{RPM_MAJOR_VERSION}
 Release:        %{RPM_MINOR_VERSION}%{?dist}
 Summary:        Containers as a Service helm charts
 License:        %{_platform_license}
-BuildArch:      x86_64
+BuildArch:      noarch
 Vendor:         %{_platform_vendor}
 Source0:        %{name}-%{version}.tar.gz
 
index a0e3632..65d86cc 100644 (file)
 %define COMPONENT instantiate
 %define RPM_NAME caas-%{COMPONENT}
 %define RPM_MAJOR_VERSION 1.0.0
-%define RPM_MINOR_VERSION 16
+%define RPM_MINOR_VERSION 17
 
 Name:           %{RPM_NAME}
 Version:        %{RPM_MAJOR_VERSION}
 Release:        %{RPM_MINOR_VERSION}%{?dist}
 Summary:        Containers as a Service instantiate playbooks
 License:        %{_platform_license}
-BuildArch:      x86_64
+BuildArch:      noarch
 Vendor:         %{_platform_vendor}
 Source0:        %{name}-%{version}.tar.gz
 
index ecb041e..dc5fe99 100644 (file)
@@ -15,7 +15,7 @@
 %define COMPONENT utils
 %define RPM_NAME caas-%{COMPONENT}
 %define RPM_MAJOR_VERSION 1.0.0
-%define RPM_MINOR_VERSION 5
+%define RPM_MINOR_VERSION 6
 %define KUBELET_PLUGINS_LOGDIR /var/log/kubelet-plugins/
 
 Name:           %{RPM_NAME}
@@ -23,7 +23,7 @@ Version:        %{RPM_MAJOR_VERSION}
 Release:        %{RPM_MINOR_VERSION}%{?dist}
 Summary:        Containers as a Service supplementary utils
 License:        %{_platform_license}
-BuildArch:      x86_64
+BuildArch:      noarch
 Vendor:         %{_platform_vendor}
 Source0:        %{name}-%{version}.tar.gz
 
index ee79a5a..98bf332 100644 (file)
@@ -78,6 +78,8 @@ kubernetes_service_url: kubernetes.default.svc
 prometheus_port: 9090
 prometheus_url: prometheus.kube-system.svc.{{ dns_domain }}
 prometheus: https://{{ prometheus_url }}:{{ prometheus_port }}
+prometheus_storage_size: "1Gi"
+prometheus_data_retention_hours: 6
 custom_metrics_api_port: 6443
 custom_metrics_api_serviceport: 443
 metrics_server_port: 443
index 214c5f3..021c4d8 100644 (file)
@@ -47,40 +47,6 @@ data:
       evaluation_interval: 1m
 
     scrape_configs:
-    - job_name: 'kubernetes-apiservers'
-
-      kubernetes_sd_configs:
-      - role: endpoints
-
-      # Default to scraping over https. If required, just disable this or change to
-      # `http`.
-      scheme: https
-
-      # This TLS & bearer token file config is used to connect to the actual scrape
-      # endpoints for cluster components. This is separate to discovery auth
-      # configuration because discovery & scraping are two separate concerns in
-      # Prometheus. The discovery auth config is automatic if Prometheus runs inside
-      # the cluster. Otherwise, more config options have to be provided within the
-      # <kubernetes_sd_config>.
-      tls_config:
-        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-        # If your node certificates are self-signed or use a different CA to the
-        # master CA, then disable certificate verification below. Note that
-        # certificate verification is an integral part of a secure infrastructure
-        # so this should only be disabled in a controlled environment. You can
-        # disable certificate verification by uncommenting the line below.
-        #
-        insecure_skip_verify: true
-      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
-
-      # Keep only the default/kubernetes service endpoints for the https port. This
-      # will add targets for each API server which Kubernetes adds an endpoint to
-      # the default/kubernetes service.
-      relabel_configs:
-      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
-        action: keep
-        regex: default;kubernetes;https
-
     # Scrape config for service endpoints.
     #
     # The relabeling allows the actual service scrape endpoint to be configured
@@ -97,6 +63,11 @@ data:
       kubernetes_sd_configs:
       - role: endpoints
 
+      tls_config:
+        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+        insecure_skip_verify: false
+      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
       relabel_configs:
       - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
         action: keep
@@ -184,6 +155,7 @@ data:
     http {
         server {
             listen                {{ .Values.prometheus.port }} ssl;
+            access_log /dev/stdout;
             ssl_certificate       /var/run/serving-cert/prometheus.crt;
             ssl_certificate_key   /var/run/serving-cert/prometheus.key;
             ssl_client_certificate /etc/openssl/ca.pem;
@@ -222,7 +194,13 @@ data:
     supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
 
     [program:prometheus]
-    command=/bin/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.retention=1h --web.listen-address=127.0.0.1:19090
+    command=/bin/prometheus
+      --config.file=/etc/prometheus/prometheus.yml
+      --storage.tsdb.path=/data
+      --storage.tsdb.retention.time={{ .Values.prometheus.data_retention }}h
+      --storage.tsdb.retention.size={{ .Values.prometheus.data_retension_size }}
+      --web.listen-address=127.0.0.1:19090
+      --storage.tsdb.wal-compression
     startsecs=3
     startretries=1
     stopwaitsecs = 3
index 6970921..03047aa 100644 (file)
@@ -64,6 +64,8 @@ spec:
           readOnly: true
         - mountPath: /etc/prometheus
           name: prometheus-config
+        - mountPath: /data
+          name: prometheus-data
         livenessProbe:
           httpGet:
             path: /api/v1/status/config
@@ -94,4 +96,7 @@ spec:
               - key: supervisord.conf
                 path: supervisord.conf
                 mode: 0644
+        - name: prometheus-data
+          persistentVolumeClaim:
+            claimName: prometheus-pvc
 {{ end }}
diff --git a/infra-charts/templates/prometheus-pvc.yaml b/infra-charts/templates/prometheus-pvc.yaml
new file mode 100644 (file)
index 0000000..809ddb7
--- /dev/null
@@ -0,0 +1,11 @@
+kind: PersistentVolumeClaim
+apiVersion: v1
+metadata:
+  name: prometheus-pvc
+  namespace: kube-system
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: {{ .Values.prometheus.storage_size }}
index 3a864bd..98c0752 100644 (file)
@@ -1,3 +1,4 @@
+#jinja2: lstrip_blocks: True
 {#
 Copyright 2019 Nokia
 
@@ -32,6 +33,13 @@ prometheus:
   prometheus_url: {{ caas.prometheus }}
   server_cert: {{ prometheus_cert_b64 }}
   server_key: {{ prometheus_cert_key_b64 }}
+  storage_size: {{ caas.prometheus_storage_size }}
+  data_retention: {{ caas.prometheus_data_retention_hours }}
+{# human_to_bytes handles decimal prefix as binary prefix #}
+  {% set prometheus_storage_size_in_mbi = (caas.prometheus_storage_size | replace('i','') | human_to_bytes) %}
+  {% set prometheus_wal_size = (3 * 128) | human_to_bytes('MB') %}
+  {% set prometheus_db_size = ((prometheus_storage_size_in_mbi | int) * 0.8) - (prometheus_wal_size | int) %}
+  data_retension_size: {{ ((prometheus_db_size | int) / (1024 | pow(2))) | int }}MB
 
 custom_metrics_api:
   required: true