From c98ff9713af8ef55d1ce60f85f195fa7154e8f85 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Wed, 13 Mar 2024 00:03:53 +0200 Subject: [PATCH] WiP: topology-aware: support for CPU allocator priorities. Add support for configurable default and annotated per-container CPU priority preferences. These determine the preferred priority for CPUs when doing fully or partially exclusive CPU allocation. Priorities are calculated for such allocations and passed on to the CPU allocator which then tries to fulfill these preferences. It should now be possible to configure the policy to allocate (exclusive) E-cores by default and P-cores to containers which are annotated so, or to do it the other way around. Signed-off-by: Krisztian Litkey --- .../topology-aware/policy/pod-preferences.go | 66 ++++++++++++++----- .../topology-aware/policy/resources.go | 40 +++++++++-- .../policy/topology-aware-policy.go | 4 +- .../config.nri_topologyawarepolicies.yaml | 13 ++++ .../config.nri_topologyawarepolicies.yaml | 13 ++++ .../resmgr/policy/topologyaware/config.go | 32 +++++++++ 6 files changed, 147 insertions(+), 21 deletions(-) diff --git a/cmd/plugins/topology-aware/policy/pod-preferences.go b/cmd/plugins/topology-aware/policy/pod-preferences.go index af1bb1b90..ce35215c9 100644 --- a/cmd/plugins/topology-aware/policy/pod-preferences.go +++ b/cmd/plugins/topology-aware/policy/pod-preferences.go @@ -42,6 +42,8 @@ const ( keyColdStartPreference = "cold-start" // annotation key for reserved pools keyReservedCPUsPreference = "prefer-reserved-cpus" + // annotation key for CPU Priority preference + keyCpuPriorityPreference = "prefer-cpu-priority" // effective annotation key for isolated CPU preference preferIsolatedCPUsKey = keyIsolationPreference + "." + kubernetes.ResmgrKeyNamespace @@ -53,6 +55,8 @@ const ( preferColdStartKey = keyColdStartPreference + "." + kubernetes.ResmgrKeyNamespace // annotation key for reserved pools preferReservedCPUsKey = keyReservedCPUsPreference + "." + kubernetes.ResmgrKeyNamespace + // effective annotation key for CPU priority preference + preferCpuPriorityKey = keyCpuPriorityPreference + "." + kubernetes.ResmgrKeyNamespace ) // cpuClass is a type of CPU to allocate @@ -153,6 +157,36 @@ func sharedCPUsPreference(pod cache.Pod, container cache.Container) (bool, bool) return preference, true } +// cpuPrioPreference returns the CPU priority preference for the given container +// and whether the container was explicitly annotated with this setting. +func cpuPrioPreference(pod cache.Pod, container cache.Container, fallback cpuPrio) (cpuPrio, bool) { + key := preferCpuPriorityKey + value, ok := pod.GetEffectiveAnnotation(key, container.GetName()) + + if !ok { + prio := fallback + log.Debug("%s: implicit CPU priority preference %q", container.PrettyName(), prio) + return prio, false + } + + if value == "default" { + prio := defaultPrio + log.Debug("%s: explicit CPU priority preference %q", container.PrettyName(), prio) + return prio, true + } + + prio, ok := cpuPrioByName[value] + if !ok { + log.Error("%s: invalid CPU priority preference %q", container.PrettyName(), value) + prio := fallback + log.Debug("%s: implicit CPU priority preference %q", container.PrettyName(), prio) + return prio, false + } + + log.Debug("%s: explicit CPU priority preference %q", container.PrettyName(), prio) + return prio, true +} + // memoryTypePreference returns what type of memory should be allocated for the container. // // If the effective annotations are not found, this function falls back to @@ -370,7 +404,7 @@ func checkReservedCPUsAnnotations(c cache.Container) (bool, bool) { // 2. fraction: amount of fractional CPU in milli-CPU // 3. isolate: (bool) whether to prefer isolated full CPUs // 4. cpuType: (cpuClass) class of CPU to allocate (reserved vs. normal) -func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, int, bool, cpuClass) { +func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, int, bool, cpuClass, cpuPrio) { // // CPU allocation preferences for a container consist of // @@ -439,20 +473,21 @@ func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, in request := reqs.Requests[corev1.ResourceCPU] qosClass := pod.GetQOSClass() fraction := int(request.MilliValue()) + prio := defaultPrio // ignored for fractional allocations // easy cases: kube-system namespace, Burstable or BestEffort QoS class containers preferReserved, explicitReservation := checkReservedCPUsAnnotations(container) switch { case container.PreserveCpuResources(): - return 0, fraction, false, cpuPreserve + return 0, fraction, false, cpuPreserve, prio case preferReserved == true: - return 0, fraction, false, cpuReserved + return 0, fraction, false, cpuReserved, prio case checkReservedPoolNamespaces(namespace) && !explicitReservation: - return 0, fraction, false, cpuReserved + return 0, fraction, false, cpuReserved, prio case qosClass == corev1.PodQOSBurstable: - return 0, fraction, false, cpuNormal + return 0, fraction, false, cpuNormal, prio case qosClass == corev1.PodQOSBestEffort: - return 0, 0, false, cpuNormal + return 0, 0, false, cpuNormal, prio } // complex case: Guaranteed QoS class containers @@ -460,39 +495,40 @@ func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, in fraction = fraction % 1000 preferIsolated, explicitIsolated := isolatedCPUsPreference(pod, container) preferShared, explicitShared := sharedCPUsPreference(pod, container) + prio, _ = cpuPrioPreference(pod, container, defaultPrio) // ignored for fractional allocations switch { // sub-core CPU request case cores == 0: - return 0, fraction, false, cpuNormal + return 0, fraction, false, cpuNormal, prio // 1 <= CPU request < 2 case cores < 2: // fractional allocation, potentially mixed if fraction > 0 { if preferShared { - return 0, 1000*cores + fraction, false, cpuNormal + return 0, 1000*cores + fraction, false, cpuNormal, prio } - return cores, fraction, preferIsolated, cpuNormal + return cores, fraction, preferIsolated, cpuNormal, prio } // non-fractional allocation if preferShared && explicitShared { - return 0, 1000*cores + fraction, false, cpuNormal + return 0, 1000*cores + fraction, false, cpuNormal, prio } - return cores, fraction, preferIsolated, cpuNormal + return cores, fraction, preferIsolated, cpuNormal, prio // CPU request >= 2 default: // fractional allocation, only mixed if explicitly annotated as unshared if fraction > 0 { if !preferShared && explicitShared { - return cores, fraction, preferIsolated && explicitIsolated, cpuNormal + return cores, fraction, preferIsolated && explicitIsolated, cpuNormal, prio } - return 0, 1000*cores + fraction, false, cpuNormal + return 0, 1000*cores + fraction, false, cpuNormal, prio } // non-fractional allocation if preferShared && explicitShared { - return 0, 1000 * cores, false, cpuNormal + return 0, 1000 * cores, false, cpuNormal, prio } - return cores, fraction, preferIsolated && explicitIsolated, cpuNormal + return cores, fraction, preferIsolated && explicitIsolated, cpuNormal, prio } } diff --git a/cmd/plugins/topology-aware/policy/resources.go b/cmd/plugins/topology-aware/policy/resources.go index b2e4ef210..3c5121cc5 100644 --- a/cmd/plugins/topology-aware/policy/resources.go +++ b/cmd/plugins/topology-aware/policy/resources.go @@ -28,6 +28,28 @@ import ( idset "github.com/intel/goresctrl/pkg/utils" ) +type ( + cpuPrio = cpuallocator.CPUPriority +) + +const ( + highPrio = cpuallocator.PriorityHigh + normalPrio = cpuallocator.PriorityNormal + lowPrio = cpuallocator.PriorityLow + nonePrio = cpuallocator.PriorityNone +) + +var ( + defaultPrio = nonePrio + + cpuPrioByName = map[string]cpuPrio{ + "high": highPrio, + "normal": normalPrio, + "low": lowPrio, + "none": nonePrio, + } +) + // Supply represents avaialbe CPU and memory capacity of a node. type Supply interface { // GetNode returns the node supplying this capacity. @@ -95,6 +117,8 @@ type Request interface { String() string // CPUType returns the type of requested CPU. CPUType() cpuClass + // CPUPrio returns the preferred priority of requested CPU. + CPUPrio() cpuPrio // SetCPUType sets the type of requested CPU. SetCPUType(cpuType cpuClass) // FullCPUs return the number of full CPUs requested. @@ -223,6 +247,7 @@ type request struct { fraction int // amount of fractional CPU requested isolate bool // prefer isolated exclusive CPUs cpuType cpuClass // preferred CPU type (normal, reserved) + prio cpuPrio // CPU priority preference, ignored for fraction requests memReq uint64 // memory request memLim uint64 // memory limit @@ -575,7 +600,7 @@ func (cs *supply) AllocateCPU(r Request) (Grant, error) { // allocate isolated exclusive CPUs or slice them off the sharable set switch { case full > 0 && cs.isolated.Size() >= full && cr.isolate: - exclusive, err = cs.takeCPUs(&cs.isolated, nil, full) + exclusive, err = cs.takeCPUs(&cs.isolated, nil, full, cr.CPUPrio()) if err != nil { return nil, policyError("internal error: "+ "%s: can't take %d exclusive isolated CPUs from %s: %v", @@ -583,7 +608,7 @@ func (cs *supply) AllocateCPU(r Request) (Grant, error) { } case full > 0 && cs.AllocatableSharedCPU() > 1000*full: - exclusive, err = cs.takeCPUs(&cs.sharable, nil, full) + exclusive, err = cs.takeCPUs(&cs.sharable, nil, full, cr.CPUPrio()) if err != nil { return nil, policyError("internal error: "+ "%s: can't take %d exclusive CPUs from %s: %v", @@ -764,8 +789,8 @@ func (cs *supply) ReserveMemory(g Grant) error { } // takeCPUs takes up to cnt CPUs from a given CPU set to another. -func (cs *supply) takeCPUs(from, to *cpuset.CPUSet, cnt int) (cpuset.CPUSet, error) { - cset, err := cs.node.Policy().cpuAllocator.AllocateCpus(from, cnt, cpuallocator.PriorityHigh) +func (cs *supply) takeCPUs(from, to *cpuset.CPUSet, cnt int, prio cpuPrio) (cpuset.CPUSet, error) { + cset, err := cs.node.Policy().cpuAllocator.AllocateCpus(from, cnt, prio) if err != nil { return cset, err } @@ -942,7 +967,7 @@ func (cs *supply) DumpMemoryState(prefix string) { // newRequest creates a new request for the given container. func newRequest(container cache.Container) Request { pod, _ := container.GetPod() - full, fraction, isolate, cpuType := cpuAllocationPreferences(pod, container) + full, fraction, isolate, cpuType, prio := cpuAllocationPreferences(pod, container) req, lim, mtype := memoryAllocationPreference(pod, container) coldStart := time.Duration(0) @@ -984,6 +1009,7 @@ func newRequest(container cache.Container) Request { memLim: lim, memType: mtype, coldStart: coldStart, + prio: prio, } } @@ -1019,6 +1045,10 @@ func (cr *request) CPUType() cpuClass { return cr.cpuType } +func (cr *request) CPUPrio() cpuPrio { + return cr.prio +} + // SetCPUType sets the requested type of CPU for the grant. func (cr *request) SetCPUType(cpuType cpuClass) { cr.cpuType = cpuType diff --git a/cmd/plugins/topology-aware/policy/topology-aware-policy.go b/cmd/plugins/topology-aware/policy/topology-aware-policy.go index e25984c2f..c3a5d2181 100644 --- a/cmd/plugins/topology-aware/policy/topology-aware-policy.go +++ b/cmd/plugins/topology-aware/policy/topology-aware-policy.go @@ -425,6 +425,7 @@ func (p *policy) Reconfigure(newCfg interface{}) error { opt = cfg p.cfg = cfg + defaultPrio = cfg.DefaultCPUPriority.Value() if err := p.initialize(); err != nil { *p = savedPolicy @@ -435,6 +436,7 @@ func (p *policy) Reconfigure(newCfg interface{}) error { if err := grant.RefetchNodes(); err != nil { *p = savedPolicy opt = p.cfg + defaultPrio = p.cfg.DefaultCPUPriority.Value() return policyError("failed to reconfigure: %v", err) } } @@ -523,7 +525,7 @@ func (p *policy) checkConstraints() error { // Use CpuAllocator to pick reserved CPUs among // allowed ones. Because using those CPUs is allowed, // they remain (they are put back) in the allowed set. - cset, err := p.cpuAllocator.AllocateCpus(&p.allowed, p.reserveCnt, cpuallocator.PriorityNormal) + cset, err := p.cpuAllocator.AllocateCpus(&p.allowed, p.reserveCnt, normalPrio) p.allowed = p.allowed.Union(cset) if err != nil { log.Fatal("cannot reserve %dm CPUs for ReservedResources from AvailableResources: %s", qty.MilliValue(), err) diff --git a/config/crd/bases/config.nri_topologyawarepolicies.yaml b/config/crd/bases/config.nri_topologyawarepolicies.yaml index af7faacee..caffebaeb 100644 --- a/config/crd/bases/config.nri_topologyawarepolicies.yaml +++ b/config/crd/bases/config.nri_topologyawarepolicies.yaml @@ -94,6 +94,19 @@ spec: - classes type: object type: object + defaultCPUPriority: + default: none + description: |- + DefaultCPUPriority (high, normal, low, none) + This parameter is passed to CPU allocator when allocating CPUs exclusively. + If a container is not annotated otherwise, this is the requested priority + from the allocator. + enum: + - high + - normal + - low + - none + type: string instrumentation: description: Config provides runtime configuration for instrumentation. properties: diff --git a/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml b/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml index af7faacee..caffebaeb 100644 --- a/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml +++ b/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml @@ -94,6 +94,19 @@ spec: - classes type: object type: object + defaultCPUPriority: + default: none + description: |- + DefaultCPUPriority (high, normal, low, none) + This parameter is passed to CPU allocator when allocating CPUs exclusively. + If a container is not annotated otherwise, this is the requested priority + from the allocator. + enum: + - high + - normal + - low + - none + type: string instrumentation: description: Config provides runtime configuration for instrumentation. properties: diff --git a/pkg/apis/config/v1alpha1/resmgr/policy/topologyaware/config.go b/pkg/apis/config/v1alpha1/resmgr/policy/topologyaware/config.go index 5adfca087..feafe17dc 100644 --- a/pkg/apis/config/v1alpha1/resmgr/policy/topologyaware/config.go +++ b/pkg/apis/config/v1alpha1/resmgr/policy/topologyaware/config.go @@ -15,7 +15,10 @@ package topologyaware import ( + "strings" + policy "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1/resmgr/policy" + "github.com/containers/nri-plugins/pkg/cpuallocator" ) type ( @@ -33,6 +36,27 @@ const ( AmountCPUSet = policy.AmountCPUSet ) +type CPUPriority string + +const ( + PriorityHigh CPUPriority = "high" + PriorityNormal CPUPriority = "normal" + PriorityLow CPUPriority = "low" + PriorityNone CPUPriority = "none" +) + +func (p CPUPriority) Value() cpuallocator.CPUPriority { + switch strings.ToLower(string(p)) { + case string(PriorityHigh): + return cpuallocator.PriorityHigh + case string(PriorityNormal): + return cpuallocator.PriorityNormal + case string(PriorityLow): + return cpuallocator.PriorityLow + } + return cpuallocator.PriorityNone +} + // +k8s:deepcopy-gen=true // +optional type Config struct { @@ -77,4 +101,12 @@ type Config struct { // of it. // +kubebuilder:validation:Required ReservedResources Constraints `json:"reservedResources"` + // DefaultCPUPriority (high, normal, low, none) + // This parameter is passed to CPU allocator when allocating CPUs exclusively. + // If a container is not annotated otherwise, this is the requested priority + // from the allocator. + // +kubebuilder:validation:Enum=high;normal;low;none + // +kubebuilder:default=none + // +kubebuilder:validation:Format:string + DefaultCPUPriority CPUPriority `json:"defaultCPUPriority,omitempty"` }