Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

config-manager: allow configuring NRI timeouts. #318

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 98 additions & 8 deletions cmd/config-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"bufio"
"bytes"
"context"
"flag"
"fmt"
"os"
"time"
Expand All @@ -43,7 +44,26 @@ var (
log = logrus.StandardLogger()
)

type nriConfig struct {
registrationTimeout string
requestTimeout string
}

func main() {
var cfg nriConfig

flag.StringVar(&cfg.requestTimeout,
"nri-plugin-request-timeout", "", "NRI plugin request timeout to patch, as time.Duration")
flag.StringVar(&cfg.registrationTimeout,
"nri-plugin-registration-timeout", "", "NRI plugin registration timeout to patch, as time.Duration")

flag.Parse()

err := cfg.check()
if err != nil {
log.Fatalf("invalid NRI configuration requested: %v", err)
}

unit, conn, err := detectRuntime()
if err != nil {
log.Fatalf("failed to autodetect container runtime: %v", err)
Expand All @@ -52,9 +72,9 @@ func main() {

switch unit {
case containerdUnit:
err = enableNriForContainerd()
err = configureNriForContainerd(&cfg)
case crioUnit:
err = enableNriForCrio()
err = configureNriForCrio(&cfg)
default:
log.Fatalf("unknown container runtime %q", unit)
}
Expand Down Expand Up @@ -83,14 +103,14 @@ func main() {
log.Println("enabled NRI for", unit)
}

func enableNriForContainerd() error {
log.Infof("enabling NRI in containerd configuration...")
func configureNriForContainerd(cfg *nriConfig) error {
log.Infof("configuring NRI for containerd...")
tomlMap, err := readConfig(containerdConfigFile)
if err != nil {
return fmt.Errorf("error reading TOML file: %w", err)
}

updatedTomlMap := updateContainerdConfig(tomlMap)
updatedTomlMap := updateContainerdConfig(tomlMap, cfg)

err = writeToContainerdConfig(containerdConfigFile, updatedTomlMap)
if err != nil {
Expand All @@ -99,8 +119,8 @@ func enableNriForContainerd() error {
return nil
}

func enableNriForCrio() error {
log.Infof("enabling NRI in CRI-O configuration...")
func configureNriForCrio(cfg *nriConfig) error {
log.Infof("configuring NRI for CRI-O...")
f, err := os.Create(crioConfigFile)
if err != nil {
return fmt.Errorf("error creating a drop-in file for CRI-O: %w", err)
Expand All @@ -111,6 +131,12 @@ func enableNriForCrio() error {
if err != nil {
return fmt.Errorf("error writing a drop-in file for CRI-O: %w", err)
}

err = cfg.writeCrioConfig(f)
if err != nil {
return fmt.Errorf("error writing NRI configuration for CRI-O: %w", err)
}

return nil
}

Expand Down Expand Up @@ -149,7 +175,7 @@ func readConfig(file string) (map[string]interface{}, error) {
return tomlMap, nil
}

func updateContainerdConfig(config map[string]interface{}) map[string]interface{} {
func updateContainerdConfig(config map[string]interface{}, cfg *nriConfig) map[string]interface{} {
plugins, exists := config["plugins"].(map[string]interface{})
if !exists {
log.Println("top level plugins section not found, adding it to enable NRI...")
Expand All @@ -165,9 +191,73 @@ func updateContainerdConfig(config map[string]interface{}) map[string]interface{
}

nri["disable"] = false

cfg.updateContainerdConfig(config)

return config
}

func (cfg *nriConfig) check() error {
switch {
case cfg.registrationTimeout == "" && cfg.requestTimeout == "":
return nil
case cfg.registrationTimeout != "" && cfg.requestTimeout == "":
return fmt.Errorf("NRI plugin registration timeout set without request timeout")
case cfg.registrationTimeout == "" && cfg.requestTimeout != "":
return fmt.Errorf("NRI plugin request timeout set without registration timeout")
}

register, err := time.ParseDuration(cfg.registrationTimeout)
if err != nil {
return fmt.Errorf("invalid plugin registration timeout: %w", err)
}
request, err := time.ParseDuration(cfg.requestTimeout)
if err != nil {
return fmt.Errorf("invalid plugin request timeout: %w", err)
}

if register <= request {
return fmt.Errorf("NRI plugin registration timeout (%s) must be > request timeout (%s)",
register, request)
}

return nil
}

func (cfg *nriConfig) writeCrioConfig(f *os.File) error {
const (
registrationTimeout = "nri_plugin_registration_timeout"
requestTimeout = "nri_plugin_request_timeout"
)
if cfg.registrationTimeout != "" {
klihub marked this conversation as resolved.
Show resolved Hide resolved
key, value := registrationTimeout, "\""+cfg.registrationTimeout+"\""
if _, err := f.WriteString(key + " = " + value + "\n"); err != nil {
return err
}
}
if cfg.requestTimeout != "" {
key, value := requestTimeout, "\""+cfg.requestTimeout+"\""
if _, err := f.WriteString(key + " = " + value + "\n"); err != nil {
return err
}
}
return nil
}

func (cfg *nriConfig) updateContainerdConfig(tomlCfg map[string]interface{}) {
const (
registrationTimeout = "plugin_registration_timeout"
requestTimeout = "plugin_request_timeout"
)
klihub marked this conversation as resolved.
Show resolved Hide resolved

if cfg.registrationTimeout != "" {
klihub marked this conversation as resolved.
Show resolved Hide resolved
tomlCfg[registrationTimeout] = cfg.registrationTimeout
}
if cfg.requestTimeout != "" {
tomlCfg[requestTimeout] = cfg.requestTimeout
}
}

func detectRuntime() (string, *dbus.Conn, error) {
log.Infof("setting up D-Bus connection...")
conn, err := dbus.NewSystemConnectionContext(context.Background())
Expand Down
19 changes: 17 additions & 2 deletions config/crd/bases/config.nri_nriplugindeployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,23 @@ spec:
nri:
type: object
properties:
patchRuntimeConfig:
type: boolean
runtime:
type: object
properties:
patchConfig:
type: boolean
fmuyassarov marked this conversation as resolved.
Show resolved Hide resolved
config:
type: object
required:
- pluginRegistrationTimeout
- pluginRequestTimeout
properties:
pluginRegistrationTimeout:
type: string
pattern: "^(([5-9])|([1-2][0-9])|(30))s$"
pluginRequestTimeout:
type: string
pattern: "^(([2-9])|([1-2][0-9])|(30))s$"
image:
type: object
description: image defines Plugin DeamonSet image name and tag
Expand Down
23 changes: 14 additions & 9 deletions deployment/helm/balloons/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ are disjoint CPU pools.
[these](https://github.com/containerd/containerd/blob/main/docs/NRI.md#enabling-nri-support-in-containerd)
detailed instructions. You can optionally enable the NRI in containerd
using the Helm chart during the chart installation simply by setting the
`nri.patchRuntimeConfig` parameter. For instance,
`nri.runtime.patchConfig` parameter. For instance,

```sh
helm install my-balloons nri-plugins/nri-resource-policy-balloons --set nri.patchRuntimeConfig=true --namespace kube-system
helm install my-balloons nri-plugins/nri-resource-policy-balloons --set nri.runtime.patchConfig=true --namespace kube-system
```

Enabling `nri.patchRuntimeConfig` creates an init container to turn on
Enabling `nri.runtime.patchConfig` creates an init container to turn on
NRI feature in containerd and only after that proceed the plugin
installation.

Expand All @@ -34,10 +34,10 @@ are disjoint CPU pools.
[these](https://github.com/cri-o/cri-o/blob/main/docs/crio.conf.5.md#crionri-table)
detailed instructions. You can optionally enable the NRI in CRI-O using
the Helm chart during the chart installation simply by setting the
`nri.patchRuntimeConfig` parameter. For instance,
`nri.runtime.patchConfig` parameter. For instance,

```sh
helm install my-balloons nri-plugins/nri-resource-policy-balloons --namespace kube-system --set nri.patchRuntimeConfig=true
helm install my-balloons nri-plugins/nri-resource-policy-balloons --namespace kube-system --set nri.runtime.patchConfig=true
```

## Installing the Chart
Expand All @@ -57,14 +57,17 @@ values.yaml file and provide it using the `-f` flag. For example:

```sh
# Install the balloons plugin with custom values provided using the --set option
helm install my-balloons nri-plugins/nri-resource-policy-balloons --namespace kube-system --set nri.patchRuntimeConfig=true
helm install my-balloons nri-plugins/nri-resource-policy-balloons --namespace kube-system --set nri.runtime.patchConfig=true
```

```sh
# Install the balloons plugin with custom values specified in a custom values.yaml file
cat <<EOF > myPath/values.yaml
nri:
patchRuntimeConfig: true
runtime:
patchConfig: true
plugin:
index: 10

tolerations:
- key: "node-role.kubernetes.io/control-plane"
Expand Down Expand Up @@ -99,8 +102,10 @@ customize with their own values, along with the default values.
| `hostPort` | 8891 | metrics port to expose on the host |
| `config` | see [helm chart values](tree:/deployment/helm/balloons/values.yaml) for the default configuration | plugin configuration data |
| `configGroupLabel` | config.nri/group | node label for grouping configuration |
| `nri.patchRuntimeConfig` | false | enable NRI in containerd or CRI-O |
| `nri.pluginIndex` | 90 | NRI plugin index to register with |
| `nri.runtime.config.pluginRegistrationTimeout` | "" | set NRI plugin registration timeout in NRI config of containerd or CRI-O |
| `nri.runtime.config.pluginRequestTimeout` | "" | set NRI plugin request timeout in NRI config of containerd or CRI-O |
| `nri.runtime.patchConfig` | false | patch NRI configuration in containerd or CRI-O |
| `nri.plugin.index` | 90 | NRI plugin index to register with
| `initImage.name` | [ghcr.io/containers/nri-plugins/config-manager](https://ghcr.io/containers/nri-plugins/config-manager) | init container image name |
| `initImage.tag` | unstable | init container image tag |
| `initImage.pullPolicy` | Always | init container image pull policy |
Expand Down
13 changes: 10 additions & 3 deletions deployment/helm/balloons/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,16 @@ spec:
{{- with .Values.nodeSelector }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- if .Values.nri.patchRuntimeConfig }}
{{- if .Values.nri.runtime.patchConfig }}
initContainers:
- name: patch-runtime
{{- if (not (or (eq .Values.nri.runtime.config nil) (eq .Values.nri.runtime.config.pluginRegistrationTimeout ""))) }}
args:
klihub marked this conversation as resolved.
Show resolved Hide resolved
- -nri-plugin-registration-timeout
- {{ .Values.nri.runtime.config.pluginRegistrationTimeout }}
- -nri-plugin-request-timeout
- {{ .Values.nri.runtime.config.pluginRequestTimeout }}
{{- end }}
image: {{ .Values.initContainerImage.name }}:{{ .Values.initContainerImage.tag | default .Chart.AppVersion }}
imagePullPolicy: {{ .Values.initContainerImage.pullPolicy }}
volumeMounts:
Expand All @@ -55,7 +62,7 @@ spec:
- -metrics-interval
- 5s
- --nri-plugin-index
- "{{ .Values.nri.pluginIndex | int | printf "%02d" }}"
- "{{ .Values.nri.plugin.index | int | printf "%02d" }}"
{{- if .Values.configGroupLabel }}
- --config-group-label
- {{ .Values.configGroupLabel }}
Expand Down Expand Up @@ -116,7 +123,7 @@ spec:
hostPath:
path: /var/run/nri
type: DirectoryOrCreate
{{- if .Values.nri.patchRuntimeConfig }}
{{- if .Values.nri.runtime.patchConfig }}
- name: containerd-config
hostPath:
path: /etc/containerd/
Expand Down
51 changes: 43 additions & 8 deletions deployment/helm/balloons/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,52 @@
"nri": {
"type": "object",
"required": [
"patchRuntimeConfig",
"pluginIndex"
"plugin",
"runtime"
],
"properties": {
"patchRuntimeConfig": {
"type": "boolean"
"plugin": {
"type": "object",
"required": [
"index"
],
"properties": {
"index": {
"type": "integer",
"minimum": 0,
"maximum": 99
}
}
},
"pluginIndex": {
"type": "integer",
"minimum": 0,
"maximum": 99
"runtime": {
"type": "object",
"required": [
"patchConfig"
],
"properties": {
"patchConfig": {
"type": "boolean"
},
"config": {
"type": "object",
"required": [
"pluginRegistrationTimeout",
"pluginRequestTimeout"
],
"properties": {
"pluginRegistrationTimeout": {
"type": "string",
"$comment": "allowed range is 5-30s",
"pattern": "^(([5-9])|([1-2][0-9])|(30))s$"
},
"pluginRequestTimeout": {
"type": "string",
"$comment": "allowed range is 2-30s",
"pattern": "^(([2-9])|([1-2][0-9])|(30))s$"
}
}
}
}
}
}
},
Expand Down
9 changes: 7 additions & 2 deletions deployment/helm/balloons/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,13 @@ resources:
memory: 512Mi

nri:
patchRuntimeConfig: false
pluginIndex: 90
plugin:
index: 90
runtime:
patchConfig: false
# config:
# pluginRegistrationTimeout: 5s
# pluginRequestTimeout: 2s

initContainerImage:
name: ghcr.io/containers/nri-plugins/nri-config-manager
Expand Down
Loading