diff --git a/cmd/plugins/balloons/policy/balloons-policy.go b/cmd/plugins/balloons/policy/balloons-policy.go index 8a8068f62..490d43fe3 100644 --- a/cmd/plugins/balloons/policy/balloons-policy.go +++ b/cmd/plugins/balloons/policy/balloons-policy.go @@ -53,6 +53,9 @@ const ( // virtDevReservedCpus is the name of a virtual device close to // CPUs that are configured as ReservedResources. virtDevReservedCpus = "reserved CPUs" + // virtDevIsolatedCpus is the name of a virtual device close to + // host isolated CPUs. + virtDevIsolatedCpus = "isolated CPUs" ) // balloons contains configuration and runtime attributes of the balloons policy @@ -558,6 +561,7 @@ func (p *balloons) newBalloon(blnDef *BalloonDef, confCpus bool) (*Balloon, erro preferFarFromDevices: blnDef.PreferFarFromDevices, virtDevCpusets: map[string][]cpuset.CPUSet{ virtDevReservedCpus: {p.reserved}, + virtDevIsolatedCpus: {p.options.System.Isolated()}, }, } if blnDef.AllocatorTopologyBalancing != nil { @@ -994,6 +998,9 @@ func (p *balloons) validateConfig(bpoptions *BalloonsOptions) error { blnDef.Name, blnDef.MaxBalloons) } } + if blnDef.PreferIsolCpus && blnDef.ShareIdleCpusInSame != "" { + log.Warn("WARNING: using PreferIsolCpus with ShareIdleCpusInSame is highly discouraged") + } } return nil } @@ -1030,6 +1037,7 @@ func (p *balloons) setConfig(bpoptions *BalloonsOptions) error { if err = p.validateConfig(bpoptions); err != nil { return balloonsError("invalid configuration: %w", err) } + p.fillCloseToDevices(bpoptions.BalloonDefs) p.fillFarFromDevices(bpoptions.BalloonDefs) // Preparation and configuration validation is now done @@ -1189,6 +1197,14 @@ func (p *balloons) fillBuiltinBalloonDefs(bpoptions *BalloonsOptions) (*BalloonD return reservedBalloonDef, defaultBalloonDef, nil } +func (p *balloons) fillCloseToDevices(blnDefs []*BalloonDef) { + for _, blnDef := range blnDefs { + if blnDef.PreferIsolCpus { + blnDef.PreferCloseToDevices = append(blnDef.PreferCloseToDevices, virtDevIsolatedCpus) + } + } +} + // fillFarFromDevices adds BalloonDefs implicit device anti-affinities // towards devices that other BalloonDefs prefer to be close to. func (p *balloons) fillFarFromDevices(blnDefs []*BalloonDef) { @@ -1201,6 +1217,9 @@ func (p *balloons) fillFarFromDevices(blnDefs []*BalloonDef) { // beginning of the list will be more effectively avoided than // devices later in the list. avoidDevs := []string{} + if p.options.System.Isolated().Size() != 0 { + avoidDevs = append(avoidDevs, virtDevIsolatedCpus) + } for _, blnDef := range blnDefs { for _, closeDev := range blnDef.PreferCloseToDevices { if _, ok := devDefClose[closeDev]; !ok { @@ -1361,6 +1380,7 @@ func (p *balloons) shareIdleCpus(addCpus, removeCpus cpuset.CPUSet) []*Balloon { } } } + addCpus = addCpus.Difference(p.options.System.Isolated()) if addCpus.Size() > 0 { for blnIdx, bln := range p.balloons { topoLevel := bln.Def.ShareIdleCpusInSame diff --git a/config/crd/bases/config.nri_balloonspolicies.yaml b/config/crd/bases/config.nri_balloonspolicies.yaml index f2a1d3d44..6babf6d1c 100644 --- a/config/crd/bases/config.nri_balloonspolicies.yaml +++ b/config/crd/bases/config.nri_balloonspolicies.yaml @@ -195,6 +195,10 @@ spec: items: type: string type: array + preferIsolCpus: + default: false + description: 'preferIsolCpus: prefer kernel isolated cpus' + type: boolean preferNewBalloons: description: |- PreferNewBalloons: prefer creating new balloons over adding diff --git a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml index f2a1d3d44..6babf6d1c 100644 --- a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml +++ b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml @@ -195,6 +195,10 @@ spec: items: type: string type: array + preferIsolCpus: + default: false + description: 'preferIsolCpus: prefer kernel isolated cpus' + type: boolean preferNewBalloons: description: |- PreferNewBalloons: prefer creating new balloons over adding diff --git a/deployment/helm/balloons/values.yaml b/deployment/helm/balloons/values.yaml index 922211ddd..a73691dbe 100644 --- a/deployment/helm/balloons/values.yaml +++ b/deployment/helm/balloons/values.yaml @@ -21,6 +21,7 @@ config: minBalloons: 0 allocatorPriority: normal shareIdleCPUsInSame: system + preferIsolCpus: false reservedPoolNamespaces: - kube-system log: @@ -30,6 +31,8 @@ config: instrumentation: reportPeriod: 60s samplingRatePerMillion: 0 + httpEndpoint: :8891 + prometheusExport: true # configGroupLabel: config.nri/group diff --git a/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go b/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go index ed44769cc..f5b73c17e 100644 --- a/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go +++ b/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go @@ -216,6 +216,9 @@ type BalloonDef struct { // TODO: PreferFarFromDevices is considered too untested for usage. Hence, // for the time being we prevent its usage through CRDs. PreferFarFromDevices []string `json:"-"` + // preferIsolCpus: prefer kernel isolated cpus + // +kubebuilder:default:=false + PreferIsolCpus bool `json:"preferIsolCpus,omitempty"` } // String stringifies a BalloonDef