From 0eb92ef5910b9c9e497a96a7f52f148db4216e0f Mon Sep 17 00:00:00 2001 From: alperencelik Date: Sun, 26 Nov 2023 16:54:50 +0300 Subject: [PATCH 1/2] Add some custom dashboards --- PROJECT | 2 + grafana/controller-resources-metrics.json | 306 ++++++++ grafana/controller-runtime-metrics.json | 710 ++++++++++++++++++ grafana/custom-metrics/config.yaml | 24 + .../custom-metrics-dashboard.json | 299 ++++++++ 5 files changed, 1341 insertions(+) create mode 100644 grafana/controller-resources-metrics.json create mode 100644 grafana/controller-runtime-metrics.json create mode 100644 grafana/custom-metrics/config.yaml create mode 100644 grafana/custom-metrics/custom-metrics-dashboard.json diff --git a/PROJECT b/PROJECT index bda3032..6817898 100644 --- a/PROJECT +++ b/PROJECT @@ -6,6 +6,8 @@ domain: alperen.cloud layout: - go.kubebuilder.io/v4 multigroup: true +plugins: + grafana.kubebuilder.io/v1-alpha: {} projectName: kubemox repo: github.com/alperencelik/kubemox resources: diff --git a/grafana/controller-resources-metrics.json b/grafana/controller-resources-metrics.json new file mode 100644 index 0000000..629e0d3 --- /dev/null +++ b/grafana/controller-resources-metrics.json @@ -0,0 +1,306 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 2, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "rate(process_cpu_seconds_total{job=\"$job\", namespace=\"$namespace\", pod=\"$pod\"}[5m]) * 100", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Pod: {{pod}} | Container: {{container}}", + "refId": "A", + "step": 10 + } + ], + "title": "Controller CPU Usage", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 4, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "process_resident_memory_bytes{job=\"$job\", namespace=\"$namespace\", pod=\"$pod\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Pod: {{pod}} | Container: {{container}}", + "refId": "A", + "step": 10 + } + ], + "title": "Controller Memory Usage", + "type": "timeseries" + } + ], + "refresh": "", + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(controller_runtime_reconcile_total{namespace=~\"$namespace\"}, job)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "job", + "options": [], + "query": { + "query": "label_values(controller_runtime_reconcile_total{namespace=~\"$namespace\"}, job)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "observability", + "value": "observability" + }, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(controller_runtime_reconcile_total, namespace)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(controller_runtime_reconcile_total, namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(controller_runtime_reconcile_total{namespace=~\"$namespace\", job=~\"$job\"}, pod)", + "hide": 2, + "includeAll": true, + "label": "pod", + "multi": true, + "name": "pod", + "options": [], + "query": { + "query": "label_values(controller_runtime_reconcile_total{namespace=~\"$namespace\", job=~\"$job\"}, pod)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Controller-Resources-Metrics", + "weekStart": "" +} diff --git a/grafana/controller-runtime-metrics.json b/grafana/controller-runtime-metrics.json new file mode 100644 index 0000000..70023a4 --- /dev/null +++ b/grafana/controller-runtime-metrics.json @@ -0,0 +1,710 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 9, + "panels": [], + "title": "Reconciliation Metrics", + "type": "row" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of reconciliations per controller", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cpm" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(controller_runtime_reconcile_total{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, pod)", + "interval": "", + "legendFormat": "{{instance}} {{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Total Reconciliation Count Per Controller", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of reconciliation errors per controller", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cpm" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(controller_runtime_reconcile_errors_total{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, pod)", + "interval": "", + "legendFormat": "{{instance}} {{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Reconciliation Error Count Per Controller", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 11, + "panels": [], + "title": "Work Queue Metrics", + "type": "row" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "How long in seconds an item stays in workqueue before being requested", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "max", + "mean" + ], + "displayMode": "list", + "placement": "right" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "histogram_quantile(0.50, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name, le))", + "interval": "", + "legendFormat": "P50 {{name}} {{instance}} ", + "refId": "A" + }, + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "histogram_quantile(0.90, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name, le))", + "hide": false, + "interval": "", + "legendFormat": "P90 {{name}} {{instance}} ", + "refId": "B" + }, + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name, le))", + "hide": false, + "interval": "", + "legendFormat": "P99 {{name}} {{instance}} ", + "refId": "C" + } + ], + "title": "Seconds For Items Stay In Queue (before being requested) (P50, P90, P99)", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 15, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "sum(rate(workqueue_adds_total{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name)", + "interval": "", + "legendFormat": "{{name}} {{instance}}", + "refId": "A" + } + ], + "title": "Work Queue Add Rate", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "How long in seconds processing an item from workqueue takes.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "max", + "mean" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "histogram_quantile(0.50, sum(rate(workqueue_work_duration_seconds_bucket{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name, le))", + "interval": "", + "legendFormat": "P50 {{name}} {{instance}} ", + "refId": "A" + }, + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "histogram_quantile(0.90, sum(rate(workqueue_work_duration_seconds_bucket{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name, le))", + "hide": false, + "interval": "", + "legendFormat": "P90 {{name}} {{instance}} ", + "refId": "B" + }, + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(workqueue_work_duration_seconds_bucket{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name, le))", + "hide": false, + "interval": "", + "legendFormat": "P99 {{name}} {{instance}} ", + "refId": "C" + } + ], + "title": "Seconds Processing Items From WorkQueue (P50, P90, P99)", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of retries handled by workqueue", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "sum(rate(workqueue_retries_total{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name)", + "interval": "", + "legendFormat": "{{name}} {{instance}} ", + "refId": "A" + } + ], + "title": "Work Queue Retries Rate", + "type": "timeseries" + } + ], + "refresh": "", + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(controller_runtime_reconcile_total{namespace=~\"$namespace\"}, job)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "job", + "options": [], + "query": { + "query": "label_values(controller_runtime_reconcile_total{namespace=~\"$namespace\"}, job)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(controller_runtime_reconcile_total, namespace)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(controller_runtime_reconcile_total, namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(controller_runtime_reconcile_total{namespace=~\"$namespace\", job=~\"$job\"}, pod)", + "hide": 2, + "includeAll": true, + "label": "pod", + "multi": true, + "name": "pod", + "options": [], + "query": { + "query": "label_values(controller_runtime_reconcile_total{namespace=~\"$namespace\", job=~\"$job\"}, pod)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Controller-Runtime-Metrics", + "weekStart": "" +} diff --git a/grafana/custom-metrics/config.yaml b/grafana/custom-metrics/config.yaml new file mode 100644 index 0000000..da7d10f --- /dev/null +++ b/grafana/custom-metrics/config.yaml @@ -0,0 +1,24 @@ +--- +customMetrics: + - metric: virtualmachine_cpu_cores + type: gauge + unit: none + expr: sum by (name, namespace) (virtualmachine_cpu_cores) + - metric: virtualmachine_memory + type: gauge + unit: megabytes + expr: sum by (name, namespace) (virtualmachine_memory) + +# - metric: # Raw custom metric (required) +# type: # Metric type: counter/gauge/histogram (required) +# expr: # Prom_ql for the metric (optional) +# unit: # Unit of measurement, examples: s,none,bytes,percent,etc. (optional) +# +# +# Example: +# --- +# customMetrics: +# - metric: foo_bar +# unit: none +# type: histogram +# expr: histogram_quantile(0.90, sum by(instance, le) (rate(foo_bar{job=\"$job\", namespace=\"$namespace\"}[5m]))) diff --git a/grafana/custom-metrics/custom-metrics-dashboard.json b/grafana/custom-metrics/custom-metrics-dashboard.json new file mode 100644 index 0000000..1836877 --- /dev/null +++ b/grafana/custom-metrics/custom-metrics-dashboard.json @@ -0,0 +1,299 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24 + }, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "sum by (name, namespace) (virtualmachine_cpu_cores)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "refId": "A", + "step": 10 + } + ], + "title": "virtualmachine_cpu_cores (gauge)", + "type": "timeseries" + }, + + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "megabytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24 + }, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "sum by (name, namespace) (virtualmachine_memory)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "refId": "A", + "step": 10 + } + ], + "title": "virtualmachine_memory (gauge)", + "type": "timeseries" + } + ], + "refresh": "", + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(controller_runtime_reconcile_total{namespace=~\"$namespace\"}, job)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "job", + "options": [], + "query": { + "query": "label_values(controller_runtime_reconcile_total{namespace=~\"$namespace\"}, job)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "observability", + "value": "observability" + }, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(controller_runtime_reconcile_total, namespace)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(controller_runtime_reconcile_total, namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(controller_runtime_reconcile_total{namespace=~\"$namespace\", job=~\"$job\"}, pod)", + "hide": 2, + "includeAll": true, + "label": "pod", + "multi": true, + "name": "pod", + "options": [], + "query": { + "query": "label_values(controller_runtime_reconcile_total{namespace=~\"$namespace\", job=~\"$job\"}, pod)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Custom-Metrics", + "weekStart": "" +} From 770f53cea2a50a12e15d91bcd3b165c8364da853 Mon Sep 17 00:00:00 2001 From: alperencelik Date: Tue, 12 Dec 2023 22:28:24 +0300 Subject: [PATCH 2/2] feat: Add more custom metrics --- grafana/custom-metrics/config.yaml | 20 +- .../custom-metrics-dashboard.json | 352 +++++++++++++++++- .../managedvirtualmachine_controller.go | 3 + .../proxmox/virtualmachine_controller.go | 4 + pkg/proxmox/proxmox.go | 9 + service-monitor.yaml | 18 + 6 files changed, 402 insertions(+), 4 deletions(-) create mode 100644 service-monitor.yaml diff --git a/grafana/custom-metrics/config.yaml b/grafana/custom-metrics/config.yaml index da7d10f..790b74b 100644 --- a/grafana/custom-metrics/config.yaml +++ b/grafana/custom-metrics/config.yaml @@ -1,13 +1,29 @@ --- customMetrics: + - metric: kubemox_virtualmachine_count + type: gauge + unit: none + expr: sum by (name, namespace) (kubemox_virtual_machine_count) + - metric: kubemox_managed_virtual_machine_count + type: gauge + unit: none + expr: sum by (name, namespace) (kubemox_managed_virtual_machine_count) - metric: virtualmachine_cpu_cores type: gauge unit: none - expr: sum by (name, namespace) (virtualmachine_cpu_cores) + expr: sum by (name, namespace) (kubemox_virtual_machine_cpu_cores) - metric: virtualmachine_memory type: gauge unit: megabytes - expr: sum by (name, namespace) (virtualmachine_memory) + expr: sum by (name, namespace) (kubemox_virtual_machine_memory) + - metric: managed_virtualmachine_cpu_cores + type: gauge + unit: none + expr: sum by (name, namespace) (kubemox_managed_virtual_machine_cpu_cores) + - metric: managed_virtualmachine_memory + type: gauge + unit: megabytes + expr: sum by (name, namespace) (kubemox_managed_virtual_machine_memory) # - metric: # Raw custom metric (required) # type: # Metric type: counter/gauge/histogram (required) diff --git a/grafana/custom-metrics/custom-metrics-dashboard.json b/grafana/custom-metrics/custom-metrics-dashboard.json index 1836877..304f5ea 100644 --- a/grafana/custom-metrics/custom-metrics-dashboard.json +++ b/grafana/custom-metrics/custom-metrics-dashboard.json @@ -117,7 +117,181 @@ { "datasource": "${DS_PROMETHEUS}", "exemplar": true, - "expr": "sum by (name, namespace) (virtualmachine_cpu_cores)", + "expr": "sum by (name, namespace) (kubemox_virtual_machine_count)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "refId": "A", + "step": 10 + } + ], + "title": "kubemox_virtualmachine_count (gauge)", + "type": "timeseries" + }, + + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24 + }, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "sum by (name, namespace) (kubemox_managed_virtual_machine_count)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "refId": "A", + "step": 10 + } + ], + "title": "kubemox_managed_virtual_machine_count (gauge)", + "type": "timeseries" + }, + + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24 + }, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "sum by (name, namespace) (kubemox_virtual_machine_cpu_cores)", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -204,7 +378,7 @@ { "datasource": "${DS_PROMETHEUS}", "exemplar": true, - "expr": "sum by (name, namespace) (virtualmachine_memory)", + "expr": "sum by (name, namespace) (kubemox_virtual_machine_memory)", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -214,6 +388,180 @@ ], "title": "virtualmachine_memory (gauge)", "type": "timeseries" + }, + + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24 + }, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "sum by (name, namespace) (kubemox_managed_virtual_machine_cpu_cores)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "refId": "A", + "step": 10 + } + ], + "title": "managed_virtualmachine_cpu_cores (gauge)", + "type": "timeseries" + }, + + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "megabytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24 + }, + "interval": "1m", + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "sum by (name, namespace) (kubemox_managed_virtual_machine_memory)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "refId": "A", + "step": 10 + } + ], + "title": "managed_virtualmachine_memory (gauge)", + "type": "timeseries" } ], "refresh": "", diff --git a/internal/controller/proxmox/managedvirtualmachine_controller.go b/internal/controller/proxmox/managedvirtualmachine_controller.go index b82d88c..03c506f 100644 --- a/internal/controller/proxmox/managedvirtualmachine_controller.go +++ b/internal/controller/proxmox/managedvirtualmachine_controller.go @@ -134,6 +134,9 @@ func (r *ManagedVirtualMachineReconciler) SetupWithManager(mgr ctrl.Manager) err if err != nil { log.Log.Info(fmt.Sprintf("ManagedVM %v could not be created", ManagedVM)) } + // Add metrics + metrics.SetManagedVirtualMachineCPUCores(managedVM.Name, managedVM.Namespace, float64(managedVM.Spec.Cores)) + metrics.SetManagedVirtualMachineMemory(managedVM.Name, managedVM.Namespace, float64(managedVM.Spec.Memory)) } metrics.IncManagedVirtualMachineCount() } diff --git a/internal/controller/proxmox/virtualmachine_controller.go b/internal/controller/proxmox/virtualmachine_controller.go index 381535f..bee70d2 100644 --- a/internal/controller/proxmox/virtualmachine_controller.go +++ b/internal/controller/proxmox/virtualmachine_controller.go @@ -136,11 +136,15 @@ func (r *VirtualMachineReconciler) Reconcile(ctx context.Context, req ctrl.Reque proxmox.CreateVMFromTemplate(vm) proxmox.StartVM(vmName, nodeName) kubernetes.CreateVMKubernetesEvent(vm, Clientset, "Created") + // metrics.SetVirtualMachineCPUCores(vmName, vm.Namespace, float64(vm.Spec.Template.Cores)) + // metrics.SetVirtualMachineMemory(vmName, vm.Namespace, float64(vm.Spec.Template.Memory)) } else if vmType == "scratch" { kubernetes.CreateVMKubernetesEvent(vm, Clientset, "Creating") proxmox.CreateVMFromScratch(vm) proxmox.StartVM(vmName, nodeName) kubernetes.CreateVMKubernetesEvent(vm, Clientset, "Created") + // metrics.SetVirtualMachineCPUCores(vmName, vm.Namespace, float64(vm.Spec.VmSpec.Cores)) + // metrics.SetVirtualMachineMemory(vmName, vm.Namespace, float64(vm.Spec.VmSpec.Memory)) } else { Log.Info(fmt.Sprintf("VM %s doesn't have any template or vmSpec defined", vmName)) } diff --git a/pkg/proxmox/proxmox.go b/pkg/proxmox/proxmox.go index 19363ea..8ead941 100644 --- a/pkg/proxmox/proxmox.go +++ b/pkg/proxmox/proxmox.go @@ -14,6 +14,7 @@ import ( proxmoxv1alpha1 "github.com/alperencelik/kubemox/api/proxmox/v1alpha1" kubernetes "github.com/alperencelik/kubemox/pkg/kubernetes" + "github.com/alperencelik/kubemox/pkg/metrics" proxmox "github.com/luthermonson/go-proxmox" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" @@ -733,6 +734,8 @@ func UpdateVM(vmName, nodeName string, vm *proxmoxv1alpha1.VirtualMachine) { DiskSize = strconv.Itoa(vm.Spec.Template.Disk[0].Size) + "G" Disk = vm.Spec.Template.Disk[0].Type + "0" DiskSizeInt = vm.Spec.Template.Disk[0].Size + metrics.SetVirtualMachineCPUCores(vmName, vm.Namespace, float64(vm.Spec.Template.Cores)) + metrics.SetVirtualMachineMemory(vmName, vm.Namespace, float64(vm.Spec.Template.Memory)) } else if CheckVMType(vm) == "scratch" { cpuOption.Value = vm.Spec.VmSpec.Cores memoryOption.Value = uint64(vm.Spec.VmSpec.Memory) @@ -741,6 +744,8 @@ func UpdateVM(vmName, nodeName string, vm *proxmoxv1alpha1.VirtualMachine) { DiskSize = DiskValue + "G" DiskSizeInt, _ = strconv.Atoi(DiskValue) Disk = vm.Spec.VmSpec.Disk.Name + metrics.SetVirtualMachineCPUCores(vmName, vm.Namespace, float64(vm.Spec.VmSpec.Cores)) + metrics.SetVirtualMachineMemory(vmName, vm.Namespace, float64(vm.Spec.VmSpec.Memory)) } else { log.Log.Info(fmt.Sprintf("VM %s doesn't have any template or vmSpec defined", vmName)) } @@ -894,6 +899,10 @@ func UpdateManagedVM(managedVMName, nodeName string, managedVM *proxmoxv1alpha1. // Revert the update since it's not possible to shrink disk managedVM.Spec.Disk = int(VirtualMachineMaxDisk) } + // Add metrics + metrics.SetManagedVirtualMachineCPUCores(managedVMName, managedVM.Namespace, float64(managedVM.Spec.Cores)) + metrics.SetManagedVirtualMachineMemory(managedVMName, managedVM.Namespace, float64(managedVM.Spec.Memory)) + if VirtualMachine.CPUs != managedVM.Spec.Cores || VirtualMachineMem != uint64(managedVM.Spec.Memory) { // Update VM // log.Log.Info(fmt.Sprintf("The comparison between CR and external resource: CPU: %d, %d || Memory: %d, %d", managedVM.Spec.Cores, VirtualMachine.CPUs, managedVM.Spec.Memory, VirtualMachineMem)) diff --git a/service-monitor.yaml b/service-monitor.yaml new file mode 100644 index 0000000..904d36c --- /dev/null +++ b/service-monitor.yaml @@ -0,0 +1,18 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: kubemox-monitor + labels: + prometheus: kube-prom-stack + release: kube-prom-stack +spec: + selector: + matchLabels: + app.kubernetes.io/name: kubemox + endpoints: + - port: http + path: /metrics + interval: 30s + namespaceSelector: + matchNames: + - default