Merge pull request #2708 from verejoel/feature/flexible-autoscaling

[loki-distributed] Enable custom metrics for HPA and update Loki to 2.9.2
grafana · Oct 17, 2023 · e005972 · e005972
2 parents f92df77 + 1ba8fea
commit e005972
Show file tree

Hide file tree

Showing 10 changed files with 165 additions and 8 deletions.
diff --git a/charts/loki-distributed/Chart.yaml b/charts/loki-distributed/Chart.yaml
@@ -2,8 +2,8 @@ apiVersion: v2
 name: loki-distributed
 description: Helm chart for Grafana Loki in microservices mode
 type: application
-appVersion: 2.9.1
-version: 0.75.0
+appVersion: 2.9.2
+version: 0.76.0
 home: https://grafana.github.io/helm-charts
 sources:
   - https://github.com/grafana/loki

diff --git a/charts/loki-distributed/README.md b/charts/loki-distributed/README.md
@@ -1,6 +1,6 @@
 # loki-distributed
 
-![Version: 0.75.0](https://img.shields.io/badge/Version-0.75.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.9.1](https://img.shields.io/badge/AppVersion-2.9.1-informational?style=flat-square)
+![Version: 0.76.0](https://img.shields.io/badge/Version-0.76.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.9.2](https://img.shields.io/badge/AppVersion-2.9.2-informational?style=flat-square)
 
 Helm chart for Grafana Loki in microservices mode
 
@@ -24,6 +24,18 @@ helm repo add grafana https://grafana.github.io/helm-charts
 
 Major version upgrades listed here indicate that there is an incompatible breaking change needing manual actions.
 
+### From 0.74.x to 0.75.0
+The Index Gateway and Query Scheduler now expose the memberlist port 7946. In order to join the
+member list, you need to specify this in the `structuredConfig`:
+```yaml
+loki:
+  structuredConfig:
+    index_gateway:
+      mode: ring
+    query_scheduler:
+      use_scheduler_ring: true
+```
+
 ### From 0.68.x to 0.69.0
 The in-memory `fifocache` has been renamed to more general `embedded_cache`, which currently doesn't have a `max_size_items` attribute.
 ```yaml
@@ -114,6 +126,7 @@ kubectl delete statefulset RELEASE_NAME-loki-distributed-querier -n LOKI_NAMESPA
 | distributor.autoscaling.behavior.enabled | bool | `false` | Enable autoscaling behaviours |
 | distributor.autoscaling.behavior.scaleDown | object | `{}` | define scale down policies, must conform to HPAScalingRules |
 | distributor.autoscaling.behavior.scaleUp | object | `{}` | define scale up policies, must conform to HPAScalingRules |
+| distributor.autoscaling.customMetrics | list | `[]` | Allows one to define custom metrics using the HPA/v2 schema (for example, Pods, Object or External metrics) |
 | distributor.autoscaling.enabled | bool | `false` | Enable autoscaling for the distributor |
 | distributor.autoscaling.maxReplicas | int | `3` | Maximum autoscaling replicas for the distributor |
 | distributor.autoscaling.minReplicas | int | `1` | Minimum autoscaling replicas for the distributor |
@@ -146,6 +159,7 @@ kubectl delete statefulset RELEASE_NAME-loki-distributed-querier -n LOKI_NAMESPA
 | gateway.autoscaling.behavior.enabled | bool | `false` | Enable autoscaling behaviours |
 | gateway.autoscaling.behavior.scaleDown | object | `{}` | define scale down policies, must conform to HPAScalingRules |
 | gateway.autoscaling.behavior.scaleUp | object | `{}` | define scale up policies, must conform to HPAScalingRules |
+| gateway.autoscaling.customMetrics | list | `[]` | Allows one to define custom metrics using the HPA/v2 schema (for example, Resource, Object or External metrics) |
 | gateway.autoscaling.enabled | bool | `false` | Enable autoscaling for the gateway |
 | gateway.autoscaling.maxReplicas | int | `3` | Maximum autoscaling replicas for the gateway |
 | gateway.autoscaling.minReplicas | int | `1` | Minimum autoscaling replicas for the gateway |
@@ -254,6 +268,7 @@ kubectl delete statefulset RELEASE_NAME-loki-distributed-querier -n LOKI_NAMESPA
 | ingester.autoscaling.behavior.enabled | bool | `false` | Enable autoscaling behaviours |
 | ingester.autoscaling.behavior.scaleDown | object | `{}` | define scale down policies, must conform to HPAScalingRules |
 | ingester.autoscaling.behavior.scaleUp | object | `{}` | define scale up policies, must conform to HPAScalingRules |
+| ingester.autoscaling.customMetrics | list | `[]` | Allows one to define custom metrics using the HPA/v2 schema (for example, Pods, Object or External metrics) |
 | ingester.autoscaling.enabled | bool | `false` | Enable autoscaling for the ingester |
 | ingester.autoscaling.maxReplicas | int | `3` | Maximum autoscaling replicas for the ingester |
 | ingester.autoscaling.minReplicas | int | `1` | Minimum autoscaling replicas for the ingester |
@@ -462,6 +477,7 @@ kubectl delete statefulset RELEASE_NAME-loki-distributed-querier -n LOKI_NAMESPA
 | querier.autoscaling.behavior.enabled | bool | `false` | Enable autoscaling behaviours |
 | querier.autoscaling.behavior.scaleDown | object | `{}` | define scale down policies, must conform to HPAScalingRules |
 | querier.autoscaling.behavior.scaleUp | object | `{}` | define scale up policies, must conform to HPAScalingRules |
+| querier.autoscaling.customMetrics | list | `[]` | Allows one to define custom metrics using the HPA/v2 schema (for example, Pods, Object or External metrics) |
 | querier.autoscaling.enabled | bool | `false` | Enable autoscaling for the querier, this is only used if `indexGateway.enabled: true` |
 | querier.autoscaling.maxReplicas | int | `3` | Maximum autoscaling replicas for the querier |
 | querier.autoscaling.minReplicas | int | `1` | Minimum autoscaling replicas for the querier |
@@ -502,6 +518,7 @@ kubectl delete statefulset RELEASE_NAME-loki-distributed-querier -n LOKI_NAMESPA
 | queryFrontend.autoscaling.behavior.enabled | bool | `false` | Enable autoscaling behaviours |
 | queryFrontend.autoscaling.behavior.scaleDown | object | `{}` | define scale down policies, must conform to HPAScalingRules |
 | queryFrontend.autoscaling.behavior.scaleUp | object | `{}` | define scale up policies, must conform to HPAScalingRules |
+| queryFrontend.autoscaling.customMetrics | list | `[]` | Allows one to define custom metrics using the HPA/v2 schema (for example, Pods, Object or External metrics) |
 | queryFrontend.autoscaling.enabled | bool | `false` | Enable autoscaling for the query-frontend |
 | queryFrontend.autoscaling.maxReplicas | int | `3` | Maximum autoscaling replicas for the query-frontend |
 | queryFrontend.autoscaling.minReplicas | int | `1` | Minimum autoscaling replicas for the query-frontend |

diff --git a/charts/loki-distributed/README.md.gotmpl b/charts/loki-distributed/README.md.gotmpl
@@ -22,6 +22,18 @@ helm repo add grafana https://grafana.github.io/helm-charts
 
 Major version upgrades listed here indicate that there is an incompatible breaking change needing manual actions.
 
+### From 0.74.x to 0.75.0
+The Index Gateway and Query Scheduler now expose the memberlist port 7946. In order to join the
+member list, you need to specify this in the `structuredConfig`:
+```yaml
+loki:
+  structuredConfig:
+    index_gateway:
+      mode: ring
+    query_scheduler:
+      use_scheduler_ring: true
+```
+
 ### From 0.68.x to 0.69.0
 The in-memory `fifocache` has been renamed to more general `embedded_cache`, which currently doesn't have a `max_size_items` attribute.
 ```yaml

diff --git a/charts/loki-distributed/templates/distributor/hpa.yaml b/charts/loki-distributed/templates/distributor/hpa.yaml
@@ -38,6 +38,9 @@ spec:
         targetAverageUtilization: {{ . }}
         {{- end }}
   {{- end }}
+  {{- with .Values.distributor.autoscaling.customMetrics }}
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
   {{- if .Values.distributor.autoscaling.behavior.enabled }}
   behavior:
     {{- with .Values.distributor.autoscaling.behavior.scaleDown }}

diff --git a/charts/loki-distributed/templates/gateway/hpa.yaml b/charts/loki-distributed/templates/gateway/hpa.yaml
@@ -38,6 +38,9 @@ spec:
         targetAverageUtilization: {{ . }}
         {{- end }}
   {{- end }}
+  {{- with .Values.gateway.autoscaling.customMetrics }}
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
   {{- if .Values.gateway.autoscaling.behavior.enabled }}
   behavior:
     {{- with .Values.gateway.autoscaling.behavior.scaleDown }}

diff --git a/charts/loki-distributed/templates/ingester/hpa.yaml b/charts/loki-distributed/templates/ingester/hpa.yaml
@@ -39,6 +39,9 @@ spec:
         targetAverageUtilization: {{ . }}
         {{- end }}
   {{- end }}
+  {{- with .Values.ingester.autoscaling.customMetrics }}
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
   {{- if .Values.ingester.autoscaling.behavior.enabled }}
   behavior:
     {{- with .Values.ingester.autoscaling.behavior.scaleDown }}

diff --git a/charts/loki-distributed/templates/querier/hpa.yaml b/charts/loki-distributed/templates/querier/hpa.yaml
@@ -39,6 +39,9 @@ spec:
         targetAverageUtilization: {{ . }}
         {{- end }}
   {{- end }}
+  {{- with .Values.querier.autoscaling.customMetrics }}
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
   {{- if .Values.querier.autoscaling.behavior.enabled }}
   behavior:
     {{- with .Values.querier.autoscaling.behavior.scaleDown }}

diff --git a/charts/loki-distributed/templates/query-frontend/hpa.yaml b/charts/loki-distributed/templates/query-frontend/hpa.yaml
@@ -39,6 +39,9 @@ spec:
         targetAverageUtilization: {{ . }}
         {{- end }}
   {{- end }}
+  {{- with .Values.queryFrontend.autoscaling.customMetrics }}
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
   {{- if .Values.queryFrontend.autoscaling.behavior.enabled }}
   behavior:
     {{- with .Values.queryFrontend.autoscaling.behavior.scaleDown }}

diff --git a/charts/loki-distributed/values.test.yaml b/charts/loki-distributed/values.test.yaml
@@ -1,3 +1,67 @@
 loki:
   annotations:
     foo: bar
+
+ingester:
+  autoscaling:
+    enabled: true
+    customMetrics:
+      - type: Pods
+        external:
+          metric:
+            name: loki_lines_total
+          target:
+            type: AverageValue
+            averageValue: 10000
+
+distributor:
+  autoscaling:
+    enabled: true
+    customMetrics:
+      - type: Pods
+        external:
+          metric:
+            name: loki_lines_total
+          target:
+            type: AverageValue
+            averageValue: 10000
+
+querier:
+  autoscaling:
+    enabled: true
+    customMetrics:
+      - type: External
+        external:
+          metric:
+            name: loki_inflight_queries
+          target:
+            type: AverageValue
+            averageValue: 12
+
+queryFrontend:
+  autoscaling:
+    enabled: true
+    customMetrics:
+      - type: Pods
+        pods:
+          metric:
+            name: loki_query_rate
+          target:
+            type: AverageValue
+            averageValue: 100
+
+gateway:
+  autoscaling:
+    enabled: true
+    customMetrics:
+      - type: Object
+        object:
+          metric:
+            name: requests-per-second
+          describedObject:
+            apiVersion: networking.k8s.io/v1
+            kind: Ingress
+            name: main-route
+          target:
+            type: Values
+            averageValue: 10k
diff --git a/charts/loki-distributed/values.yaml b/charts/loki-distributed/values.yaml
@@ -391,7 +391,16 @@ ingester:
     # -- Target CPU utilisation percentage for the ingester
     targetCPUUtilizationPercentage: 60
     # -- Target memory utilisation percentage for the ingester
-    targetMemoryUtilizationPercentage:
+    targetMemoryUtilizationPercentage: null
+    # -- Allows one to define custom metrics using the HPA/v2 schema (for example, Pods, Object or External metrics)
+    customMetrics: []
+    # - type: Pods
+    #   pods:
+    #     metric:
+    #       name: loki_lines_total
+    #     target:
+    #       type: AverageValue
+    #       averageValue: 10k
     behavior:
       # -- Enable autoscaling behaviours
       enabled: false
@@ -521,7 +530,16 @@ distributor:
     # -- Target CPU utilisation percentage for the distributor
     targetCPUUtilizationPercentage: 60
     # -- Target memory utilisation percentage for the distributor
-    targetMemoryUtilizationPercentage:
+    targetMemoryUtilizationPercentage: null
+    # -- Allows one to define custom metrics using the HPA/v2 schema (for example, Pods, Object or External metrics)
+    customMetrics: []
+    # - type: Pods
+    #   pods:
+    #     metric:
+    #       name: loki_lines_total
+    #     target:
+    #       type: AverageValue
+    #       averageValue: 10k
     behavior:
       # -- Enable autoscaling behaviours
       enabled: false
@@ -610,7 +628,16 @@ querier:
     # -- Target CPU utilisation percentage for the querier
     targetCPUUtilizationPercentage: 60
     # -- Target memory utilisation percentage for the querier
-    targetMemoryUtilizationPercentage:
+    targetMemoryUtilizationPercentage: null
+    # -- Allows one to define custom metrics using the HPA/v2 schema (for example, Pods, Object or External metrics)
+    customMetrics: []
+    # - type: External
+    #   external:
+    #     metric:
+    #       name: loki_inflight_queries
+    #     target:
+    #       type: AverageValue
+    #       averageValue: 12
     behavior:
       # -- Enable autoscaling behaviours
       enabled: false
@@ -725,7 +752,16 @@ queryFrontend:
     # -- Target CPU utilisation percentage for the query-frontend
     targetCPUUtilizationPercentage: 60
     # -- Target memory utilisation percentage for the query-frontend
-    targetMemoryUtilizationPercentage:
+    targetMemoryUtilizationPercentage: null
+    # -- Allows one to define custom metrics using the HPA/v2 schema (for example, Pods, Object or External metrics)
+    customMetrics: []
+    # - type: Pods
+    #   pods:
+    #     metric:
+    #       name: loki_query_rate
+    #     target:
+    #       type: AverageValue
+    #       averageValue: 100
     behavior:
       # -- Enable autoscaling behaviours
       enabled: false
@@ -982,7 +1018,20 @@ gateway:
     # -- Target CPU utilisation percentage for the gateway
     targetCPUUtilizationPercentage: 60
     # -- Target memory utilisation percentage for the gateway
-    targetMemoryUtilizationPercentage:
+    targetMemoryUtilizationPercentage: null
+    # -- Allows one to define custom metrics using the HPA/v2 schema (for example, Resource, Object or External metrics)
+    customMetrics: []
+    # - type: Object
+    #   object:
+    #     metric:
+    #       name: requests-per-second
+    #     describedObject:
+    #       apiVersion: networking.k8s.io/v1
+    #       kind: Ingress
+    #       name: main-route
+    #     target:
+    #       type: Values
+    #       averageValue: 10k
     behavior:
       # -- Enable autoscaling behaviours
       enabled: false