Skip to content

Commit

Permalink
Merge pull request #318 from klihub/devel/config-manager/nri-plugin-t…
Browse files Browse the repository at this point in the history
…imeouts

config-manager: allow configuring NRI timeouts.
  • Loading branch information
fmuyassarov authored May 21, 2024
2 parents 712848d + d63bba3 commit 1f45c70
Show file tree
Hide file tree
Showing 28 changed files with 568 additions and 145 deletions.
106 changes: 98 additions & 8 deletions cmd/config-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"bufio"
"bytes"
"context"
"flag"
"fmt"
"os"
"time"
Expand All @@ -43,7 +44,26 @@ var (
log = logrus.StandardLogger()
)

type nriConfig struct {
registrationTimeout string
requestTimeout string
}

func main() {
var cfg nriConfig

flag.StringVar(&cfg.requestTimeout,
"nri-plugin-request-timeout", "", "NRI plugin request timeout to patch, as time.Duration")
flag.StringVar(&cfg.registrationTimeout,
"nri-plugin-registration-timeout", "", "NRI plugin registration timeout to patch, as time.Duration")

flag.Parse()

err := cfg.check()
if err != nil {
log.Fatalf("invalid NRI configuration requested: %v", err)
}

unit, conn, err := detectRuntime()
if err != nil {
log.Fatalf("failed to autodetect container runtime: %v", err)
Expand All @@ -52,9 +72,9 @@ func main() {

switch unit {
case containerdUnit:
err = enableNriForContainerd()
err = configureNriForContainerd(&cfg)
case crioUnit:
err = enableNriForCrio()
err = configureNriForCrio(&cfg)
default:
log.Fatalf("unknown container runtime %q", unit)
}
Expand Down Expand Up @@ -83,14 +103,14 @@ func main() {
log.Println("enabled NRI for", unit)
}

func enableNriForContainerd() error {
log.Infof("enabling NRI in containerd configuration...")
func configureNriForContainerd(cfg *nriConfig) error {
log.Infof("configuring NRI for containerd...")
tomlMap, err := readConfig(containerdConfigFile)
if err != nil {
return fmt.Errorf("error reading TOML file: %w", err)
}

updatedTomlMap := updateContainerdConfig(tomlMap)
updatedTomlMap := updateContainerdConfig(tomlMap, cfg)

err = writeToContainerdConfig(containerdConfigFile, updatedTomlMap)
if err != nil {
Expand All @@ -99,8 +119,8 @@ func enableNriForContainerd() error {
return nil
}

func enableNriForCrio() error {
log.Infof("enabling NRI in CRI-O configuration...")
func configureNriForCrio(cfg *nriConfig) error {
log.Infof("configuring NRI for CRI-O...")
f, err := os.Create(crioConfigFile)
if err != nil {
return fmt.Errorf("error creating a drop-in file for CRI-O: %w", err)
Expand All @@ -111,6 +131,12 @@ func enableNriForCrio() error {
if err != nil {
return fmt.Errorf("error writing a drop-in file for CRI-O: %w", err)
}

err = cfg.writeCrioConfig(f)
if err != nil {
return fmt.Errorf("error writing NRI configuration for CRI-O: %w", err)
}

return nil
}

Expand Down Expand Up @@ -149,7 +175,7 @@ func readConfig(file string) (map[string]interface{}, error) {
return tomlMap, nil
}

func updateContainerdConfig(config map[string]interface{}) map[string]interface{} {
func updateContainerdConfig(config map[string]interface{}, cfg *nriConfig) map[string]interface{} {
plugins, exists := config["plugins"].(map[string]interface{})
if !exists {
log.Println("top level plugins section not found, adding it to enable NRI...")
Expand All @@ -165,9 +191,73 @@ func updateContainerdConfig(config map[string]interface{}) map[string]interface{
}

nri["disable"] = false

cfg.updateContainerdConfig(config)

return config
}

func (cfg *nriConfig) check() error {
switch {
case cfg.registrationTimeout == "" && cfg.requestTimeout == "":
return nil
case cfg.registrationTimeout != "" && cfg.requestTimeout == "":
return fmt.Errorf("NRI plugin registration timeout set without request timeout")
case cfg.registrationTimeout == "" && cfg.requestTimeout != "":
return fmt.Errorf("NRI plugin request timeout set without registration timeout")
}

register, err := time.ParseDuration(cfg.registrationTimeout)
if err != nil {
return fmt.Errorf("invalid plugin registration timeout: %w", err)
}
request, err := time.ParseDuration(cfg.requestTimeout)
if err != nil {
return fmt.Errorf("invalid plugin request timeout: %w", err)
}

if register <= request {
return fmt.Errorf("NRI plugin registration timeout (%s) must be > request timeout (%s)",
register, request)
}

return nil
}

func (cfg *nriConfig) writeCrioConfig(f *os.File) error {
const (
registrationTimeout = "nri_plugin_registration_timeout"
requestTimeout = "nri_plugin_request_timeout"
)
if cfg.registrationTimeout != "" {
key, value := registrationTimeout, "\""+cfg.registrationTimeout+"\""
if _, err := f.WriteString(key + " = " + value + "\n"); err != nil {
return err
}
}
if cfg.requestTimeout != "" {
key, value := requestTimeout, "\""+cfg.requestTimeout+"\""
if _, err := f.WriteString(key + " = " + value + "\n"); err != nil {
return err
}
}
return nil
}

func (cfg *nriConfig) updateContainerdConfig(tomlCfg map[string]interface{}) {
const (
registrationTimeout = "plugin_registration_timeout"
requestTimeout = "plugin_request_timeout"
)

if cfg.registrationTimeout != "" {
tomlCfg[registrationTimeout] = cfg.registrationTimeout
}
if cfg.requestTimeout != "" {
tomlCfg[requestTimeout] = cfg.requestTimeout
}
}

func detectRuntime() (string, *dbus.Conn, error) {
log.Infof("setting up D-Bus connection...")
conn, err := dbus.NewSystemConnectionContext(context.Background())
Expand Down
19 changes: 17 additions & 2 deletions config/crd/bases/config.nri_nriplugindeployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,23 @@ spec:
nri:
type: object
properties:
patchRuntimeConfig:
type: boolean
runtime:
type: object
properties:
patchConfig:
type: boolean
config:
type: object
required:
- pluginRegistrationTimeout
- pluginRequestTimeout
properties:
pluginRegistrationTimeout:
type: string
pattern: "^(([5-9])|([1-2][0-9])|(30))s$"
pluginRequestTimeout:
type: string
pattern: "^(([2-9])|([1-2][0-9])|(30))s$"
image:
type: object
description: image defines Plugin DeamonSet image name and tag
Expand Down
23 changes: 14 additions & 9 deletions deployment/helm/balloons/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ are disjoint CPU pools.
[these](https://github.com/containerd/containerd/blob/main/docs/NRI.md#enabling-nri-support-in-containerd)
detailed instructions. You can optionally enable the NRI in containerd
using the Helm chart during the chart installation simply by setting the
`nri.patchRuntimeConfig` parameter. For instance,
`nri.runtime.patchConfig` parameter. For instance,

```sh
helm install my-balloons nri-plugins/nri-resource-policy-balloons --set nri.patchRuntimeConfig=true --namespace kube-system
helm install my-balloons nri-plugins/nri-resource-policy-balloons --set nri.runtime.patchConfig=true --namespace kube-system
```

Enabling `nri.patchRuntimeConfig` creates an init container to turn on
Enabling `nri.runtime.patchConfig` creates an init container to turn on
NRI feature in containerd and only after that proceed the plugin
installation.

Expand All @@ -34,10 +34,10 @@ are disjoint CPU pools.
[these](https://github.com/cri-o/cri-o/blob/main/docs/crio.conf.5.md#crionri-table)
detailed instructions. You can optionally enable the NRI in CRI-O using
the Helm chart during the chart installation simply by setting the
`nri.patchRuntimeConfig` parameter. For instance,
`nri.runtime.patchConfig` parameter. For instance,

```sh
helm install my-balloons nri-plugins/nri-resource-policy-balloons --namespace kube-system --set nri.patchRuntimeConfig=true
helm install my-balloons nri-plugins/nri-resource-policy-balloons --namespace kube-system --set nri.runtime.patchConfig=true
```

## Installing the Chart
Expand All @@ -57,14 +57,17 @@ values.yaml file and provide it using the `-f` flag. For example:

```sh
# Install the balloons plugin with custom values provided using the --set option
helm install my-balloons nri-plugins/nri-resource-policy-balloons --namespace kube-system --set nri.patchRuntimeConfig=true
helm install my-balloons nri-plugins/nri-resource-policy-balloons --namespace kube-system --set nri.runtime.patchConfig=true
```

```sh
# Install the balloons plugin with custom values specified in a custom values.yaml file
cat <<EOF > myPath/values.yaml
nri:
patchRuntimeConfig: true
runtime:
patchConfig: true
plugin:
index: 10
tolerations:
- key: "node-role.kubernetes.io/control-plane"
Expand Down Expand Up @@ -99,8 +102,10 @@ customize with their own values, along with the default values.
| `hostPort` | 8891 | metrics port to expose on the host |
| `config` | see [helm chart values](tree:/deployment/helm/balloons/values.yaml) for the default configuration | plugin configuration data |
| `configGroupLabel` | config.nri/group | node label for grouping configuration |
| `nri.patchRuntimeConfig` | false | enable NRI in containerd or CRI-O |
| `nri.pluginIndex` | 90 | NRI plugin index to register with |
| `nri.runtime.config.pluginRegistrationTimeout` | "" | set NRI plugin registration timeout in NRI config of containerd or CRI-O |
| `nri.runtime.config.pluginRequestTimeout` | "" | set NRI plugin request timeout in NRI config of containerd or CRI-O |
| `nri.runtime.patchConfig` | false | patch NRI configuration in containerd or CRI-O |
| `nri.plugin.index` | 90 | NRI plugin index to register with
| `initImage.name` | [ghcr.io/containers/nri-plugins/config-manager](https://ghcr.io/containers/nri-plugins/config-manager) | init container image name |
| `initImage.tag` | unstable | init container image tag |
| `initImage.pullPolicy` | Always | init container image pull policy |
Expand Down
13 changes: 10 additions & 3 deletions deployment/helm/balloons/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,16 @@ spec:
{{- with .Values.nodeSelector }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- if .Values.nri.patchRuntimeConfig }}
{{- if .Values.nri.runtime.patchConfig }}
initContainers:
- name: patch-runtime
{{- if (not (or (eq .Values.nri.runtime.config nil) (eq .Values.nri.runtime.config.pluginRegistrationTimeout ""))) }}
args:
- -nri-plugin-registration-timeout
- {{ .Values.nri.runtime.config.pluginRegistrationTimeout }}
- -nri-plugin-request-timeout
- {{ .Values.nri.runtime.config.pluginRequestTimeout }}
{{- end }}
image: {{ .Values.initContainerImage.name }}:{{ .Values.initContainerImage.tag | default .Chart.AppVersion }}
imagePullPolicy: {{ .Values.initContainerImage.pullPolicy }}
volumeMounts:
Expand All @@ -55,7 +62,7 @@ spec:
- -metrics-interval
- 5s
- --nri-plugin-index
- "{{ .Values.nri.pluginIndex | int | printf "%02d" }}"
- "{{ .Values.nri.plugin.index | int | printf "%02d" }}"
{{- if .Values.configGroupLabel }}
- --config-group-label
- {{ .Values.configGroupLabel }}
Expand Down Expand Up @@ -116,7 +123,7 @@ spec:
hostPath:
path: /var/run/nri
type: DirectoryOrCreate
{{- if .Values.nri.patchRuntimeConfig }}
{{- if .Values.nri.runtime.patchConfig }}
- name: containerd-config
hostPath:
path: /etc/containerd/
Expand Down
51 changes: 43 additions & 8 deletions deployment/helm/balloons/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,52 @@
"nri": {
"type": "object",
"required": [
"patchRuntimeConfig",
"pluginIndex"
"plugin",
"runtime"
],
"properties": {
"patchRuntimeConfig": {
"type": "boolean"
"plugin": {
"type": "object",
"required": [
"index"
],
"properties": {
"index": {
"type": "integer",
"minimum": 0,
"maximum": 99
}
}
},
"pluginIndex": {
"type": "integer",
"minimum": 0,
"maximum": 99
"runtime": {
"type": "object",
"required": [
"patchConfig"
],
"properties": {
"patchConfig": {
"type": "boolean"
},
"config": {
"type": "object",
"required": [
"pluginRegistrationTimeout",
"pluginRequestTimeout"
],
"properties": {
"pluginRegistrationTimeout": {
"type": "string",
"$comment": "allowed range is 5-30s",
"pattern": "^(([5-9])|([1-2][0-9])|(30))s$"
},
"pluginRequestTimeout": {
"type": "string",
"$comment": "allowed range is 2-30s",
"pattern": "^(([2-9])|([1-2][0-9])|(30))s$"
}
}
}
}
}
}
},
Expand Down
9 changes: 7 additions & 2 deletions deployment/helm/balloons/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,13 @@ resources:
memory: 512Mi

nri:
patchRuntimeConfig: false
pluginIndex: 90
plugin:
index: 90
runtime:
patchConfig: false
# config:
# pluginRegistrationTimeout: 5s
# pluginRequestTimeout: 2s

initContainerImage:
name: ghcr.io/containers/nri-plugins/nri-config-manager
Expand Down
Loading

0 comments on commit 1f45c70

Please sign in to comment.