From 27bc9065bc89dfa300699b4c116de300102fd69b Mon Sep 17 00:00:00 2001 From: David Ortiz Date: Wed, 4 Sep 2024 17:08:17 +0200 Subject: [PATCH] [clusteragent/admission/mutate] Mark injected volumes as safe to evict for the k8s cluster autoscaler (#28993) --- .../agent_sidecar/agent_sidecar_test.go | 7 +++ .../mutate/agent_sidecar/providers.go | 4 ++ .../mutate/agent_sidecar/providers_test.go | 12 +++++ .../auto_instrumentation_test.go | 4 ++ .../mutate/autoinstrumentation/mutators.go | 2 + .../admission/mutate/common/common.go | 32 +++++++++++++ .../admission/mutate/common/common_test.go | 47 +++++++++++++++++++ .../admission/mutate/config/config.go | 4 ++ .../admission/mutate/config/config_test.go | 1 + .../cwsinstrumentation/cws_instrumentation.go | 2 + .../cws_instrumentation_test.go | 1 + ...s-for-k8s-autoscaler-d96fa7357c4be699.yaml | 12 +++++ test/new-e2e/tests/containers/k8s_test.go | 14 +++++- 13 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 releasenotes-dca/notes/configure-volumes-for-k8s-autoscaler-d96fa7357c4be699.yaml diff --git a/pkg/clusteragent/admission/mutate/agent_sidecar/agent_sidecar_test.go b/pkg/clusteragent/admission/mutate/agent_sidecar/agent_sidecar_test.go index efdcf52d7de17..87b537b208324 100644 --- a/pkg/clusteragent/admission/mutate/agent_sidecar/agent_sidecar_test.go +++ b/pkg/clusteragent/admission/mutate/agent_sidecar/agent_sidecar_test.go @@ -17,6 +17,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + mutatecommon "github.com/DataDog/datadog-agent/pkg/clusteragent/admission/mutate/common" configmock "github.com/DataDog/datadog-agent/pkg/config/mock" apicommon "github.com/DataDog/datadog-agent/pkg/util/kubernetes/apiserver/common" "github.com/DataDog/datadog-agent/pkg/util/pointer" @@ -180,6 +181,9 @@ func TestInjectAgentSidecar(t *testing.T) { return &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "pod-name", + Annotations: map[string]string{ + mutatecommon.K8sAutoscalerSafeToEvictVolumesAnnotation: "ddsockets", + }, }, Spec: corev1.PodSpec{ ShareProcessNamespace: pointer.Ptr(true), @@ -297,6 +301,9 @@ func TestInjectAgentSidecar(t *testing.T) { return &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "pod-name", + Annotations: map[string]string{ + mutatecommon.K8sAutoscalerSafeToEvictVolumesAnnotation: "ddsockets", + }, }, Spec: corev1.PodSpec{ ShareProcessNamespace: pointer.Ptr(true), diff --git a/pkg/clusteragent/admission/mutate/agent_sidecar/providers.go b/pkg/clusteragent/admission/mutate/agent_sidecar/providers.go index c4a2efea3341c..8e8d424e17eef 100644 --- a/pkg/clusteragent/admission/mutate/agent_sidecar/providers.go +++ b/pkg/clusteragent/admission/mutate/agent_sidecar/providers.go @@ -92,6 +92,10 @@ func applyFargateOverrides(pod *corev1.Pod) (bool, error) { volume, volumeMount := socketsVolume() injected := common.InjectVolume(pod, volume, volumeMount) + if injected { + common.MarkVolumeAsSafeToEvictForAutoscaler(pod, volume.Name) + } + mutated = mutated || injected // ShareProcessNamespace is required for the process collection feature diff --git a/pkg/clusteragent/admission/mutate/agent_sidecar/providers_test.go b/pkg/clusteragent/admission/mutate/agent_sidecar/providers_test.go index a277a52f6d888..ee8ceb1a544f5 100644 --- a/pkg/clusteragent/admission/mutate/agent_sidecar/providers_test.go +++ b/pkg/clusteragent/admission/mutate/agent_sidecar/providers_test.go @@ -14,7 +14,9 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + mutatecommon "github.com/DataDog/datadog-agent/pkg/clusteragent/admission/mutate/common" configmock "github.com/DataDog/datadog-agent/pkg/config/mock" "github.com/DataDog/datadog-agent/pkg/util/pointer" ) @@ -99,6 +101,11 @@ func TestApplyProviderOverrides(t *testing.T) { }, }, expectedPodAfterOverride: &corev1.Pod{ + ObjectMeta: v1.ObjectMeta{ + Annotations: map[string]string{ + mutatecommon.K8sAutoscalerSafeToEvictVolumesAnnotation: "ddsockets", + }, + }, Spec: corev1.PodSpec{ ShareProcessNamespace: pointer.Ptr(true), Containers: []corev1.Container{ @@ -203,6 +210,11 @@ func TestApplyProviderOverrides(t *testing.T) { }, }, expectedPodAfterOverride: &corev1.Pod{ + ObjectMeta: v1.ObjectMeta{ + Annotations: map[string]string{ + mutatecommon.K8sAutoscalerSafeToEvictVolumesAnnotation: "ddsockets", + }, + }, Spec: corev1.PodSpec{ ShareProcessNamespace: pointer.Ptr(true), Containers: []corev1.Container{ diff --git a/pkg/clusteragent/admission/mutate/autoinstrumentation/auto_instrumentation_test.go b/pkg/clusteragent/admission/mutate/autoinstrumentation/auto_instrumentation_test.go index 1722cb5cb0832..d7b217f755020 100644 --- a/pkg/clusteragent/admission/mutate/autoinstrumentation/auto_instrumentation_test.go +++ b/pkg/clusteragent/admission/mutate/autoinstrumentation/auto_instrumentation_test.go @@ -262,6 +262,10 @@ func TestInjectAutoInstruConfigV2(t *testing.T) { require.Equal(t, etcVolume.Name, tt.pod.Spec.Volumes[1].Name, "expected datadog-etc volume to be injected") + volumesMarkedAsSafeToEvict := strings.Split(tt.pod.Annotations[common.K8sAutoscalerSafeToEvictVolumesAnnotation], ",") + require.Contains(t, volumesMarkedAsSafeToEvict, volumeName, "expected volume %s to be marked as safe to evict", volumeName) + require.Contains(t, volumesMarkedAsSafeToEvict, etcVolume.Name, "expected volume %s to be marked as safe to evict", etcVolume.Name) + require.Equal(t, len(tt.libInfo.libs)+1, len(tt.pod.Spec.InitContainers), "expected there to be one more than the number of libs to inject for init containers") diff --git a/pkg/clusteragent/admission/mutate/autoinstrumentation/mutators.go b/pkg/clusteragent/admission/mutate/autoinstrumentation/mutators.go index d113de8cf95e1..ecd3119fd6b0b 100644 --- a/pkg/clusteragent/admission/mutate/autoinstrumentation/mutators.go +++ b/pkg/clusteragent/admission/mutate/autoinstrumentation/mutators.go @@ -101,6 +101,8 @@ func (v volume) mount(mount corev1.VolumeMount) volumeMount { // mutatePod implements podMutator for volume. func (v volume) mutatePod(pod *corev1.Pod) error { + common.MarkVolumeAsSafeToEvictForAutoscaler(pod, v.Name) + vol := v.Volume for idx, i := range pod.Spec.Volumes { if i.Name == v.Volume.Name { diff --git a/pkg/clusteragent/admission/mutate/common/common.go b/pkg/clusteragent/admission/mutate/common/common.go index 0a08038db49fc..ba60a05f472b8 100644 --- a/pkg/clusteragent/admission/mutate/common/common.go +++ b/pkg/clusteragent/admission/mutate/common/common.go @@ -11,7 +11,9 @@ package common import ( "encoding/json" "fmt" + "slices" "strconv" + "strings" "github.com/wI2L/jsondiff" corev1 "k8s.io/api/core/v1" @@ -22,6 +24,10 @@ import ( "github.com/DataDog/datadog-agent/pkg/util/log" ) +// K8sAutoscalerSafeToEvictVolumesAnnotation is the annotation used by the +// Kubernetes cluster-autoscaler to mark a volume as safe to evict +const K8sAutoscalerSafeToEvictVolumesAnnotation = "cluster-autoscaler.kubernetes.io/safe-to-evict-local-volumes" + // MutationFunc is a function that mutates a pod type MutationFunc func(pod *corev1.Pod, ns string, cl dynamic.Interface) (bool, error) @@ -181,3 +187,29 @@ func ContainerRegistry(specificConfigOpt string) string { return config.Datadog().GetString("admission_controller.container_registry") } + +// MarkVolumeAsSafeToEvictForAutoscaler adds the Kubernetes cluster-autoscaler +// annotation to the given pod, marking the specified local volume as safe to +// evict. This annotation allows the cluster-autoscaler to evict pods with the +// local volume mounted, enabling the node to scale down if necessary. +// This function will not add the volume to the annotation if it is already +// there. +// Ref: https://github.com/kubernetes/autoscaler/blob/cluster-autoscaler-release-1.31/cluster-autoscaler/FAQ.md#what-types-of-pods-can-prevent-ca-from-removing-a-node +func MarkVolumeAsSafeToEvictForAutoscaler(pod *corev1.Pod, volumeNameToAdd string) { + if pod.Annotations == nil { + pod.Annotations = make(map[string]string) + } + + currentVolumes := pod.Annotations[K8sAutoscalerSafeToEvictVolumesAnnotation] + var volumeList []string + if currentVolumes != "" { + volumeList = strings.Split(currentVolumes, ",") + } + + if slices.Contains(volumeList, volumeNameToAdd) { + return // Volume already in the list, no need to add + } + + volumeList = append(volumeList, volumeNameToAdd) + pod.Annotations[K8sAutoscalerSafeToEvictVolumesAnnotation] = strings.Join(volumeList, ",") +} diff --git a/pkg/clusteragent/admission/mutate/common/common_test.go b/pkg/clusteragent/admission/mutate/common/common_test.go index 1a5606f33f886..6423a17759e9d 100644 --- a/pkg/clusteragent/admission/mutate/common/common_test.go +++ b/pkg/clusteragent/admission/mutate/common/common_test.go @@ -218,3 +218,50 @@ func Test_injectVolume(t *testing.T) { }) } } + +func TestMarkVolumeAsSafeToEvictForAutoscaler(t *testing.T) { + tests := []struct { + name string + currentSafeToEvictAnnotationValue string + volumeToAdd string + expectedNewSafeToEvictAnnotationValue string + }{ + { + name: "the annotation is not set", + currentSafeToEvictAnnotationValue: "", + volumeToAdd: "datadog", + expectedNewSafeToEvictAnnotationValue: "datadog", + }, + { + name: "the annotation is already set", + currentSafeToEvictAnnotationValue: "someVolume1,someVolume2", + volumeToAdd: "datadog", + expectedNewSafeToEvictAnnotationValue: "someVolume1,someVolume2,datadog", + }, + { + name: "the annotation is already set and the volume is already in the list", + currentSafeToEvictAnnotationValue: "someVolume1,someVolume2", + volumeToAdd: "someVolume2", + expectedNewSafeToEvictAnnotationValue: "someVolume1,someVolume2", + }, + } + + for _, test := range tests { + t.Run(test.name, func(_ *testing.T) { + annotations := map[string]string{} + if test.currentSafeToEvictAnnotationValue != "" { + annotations["cluster-autoscaler.kubernetes.io/safe-to-evict-local-volumes"] = test.currentSafeToEvictAnnotationValue + } + pod := FakePodWithAnnotations(annotations) + + MarkVolumeAsSafeToEvictForAutoscaler(pod, test.volumeToAdd) + + assert.Equal( + t, + test.expectedNewSafeToEvictAnnotationValue, + pod.Annotations["cluster-autoscaler.kubernetes.io/safe-to-evict-local-volumes"], + ) + }) + } + +} diff --git a/pkg/clusteragent/admission/mutate/config/config.go b/pkg/clusteragent/admission/mutate/config/config.go index 4da8680f2a2ae..0d43bdfb3c229 100644 --- a/pkg/clusteragent/admission/mutate/config/config.go +++ b/pkg/clusteragent/admission/mutate/config/config.go @@ -186,6 +186,10 @@ func (w *Webhook) inject(pod *corev1.Pod, _ string, _ dynamic.Interface) (bool, case socket: volume, volumeMount := buildVolume(DatadogVolumeName, config.Datadog().GetString("admission_controller.inject_config.socket_path"), true) injectedVol := common.InjectVolume(pod, volume, volumeMount) + if injectedVol { + common.MarkVolumeAsSafeToEvictForAutoscaler(pod, DatadogVolumeName) + } + injectedEnv := common.InjectEnv(pod, traceURLSocketEnvVar) injectedEnv = common.InjectEnv(pod, dogstatsdURLSocketEnvVar) || injectedEnv injectedConfig = injectedEnv || injectedVol diff --git a/pkg/clusteragent/admission/mutate/config/config_test.go b/pkg/clusteragent/admission/mutate/config/config_test.go index 049d103cffab9..c8dd5437edf85 100644 --- a/pkg/clusteragent/admission/mutate/config/config_test.go +++ b/pkg/clusteragent/admission/mutate/config/config_test.go @@ -315,6 +315,7 @@ func TestInjectSocket(t *testing.T) { assert.Equal(t, pod.Spec.Volumes[0].Name, "datadog") assert.Equal(t, pod.Spec.Volumes[0].VolumeSource.HostPath.Path, "/var/run/datadog") assert.Equal(t, *pod.Spec.Volumes[0].VolumeSource.HostPath.Type, corev1.HostPathDirectoryOrCreate) + assert.Equal(t, "datadog", pod.Annotations[mutatecommon.K8sAutoscalerSafeToEvictVolumesAnnotation]) } func TestInjectSocketWithConflictingVolumeAndInitContainer(t *testing.T) { diff --git a/pkg/clusteragent/admission/mutate/cwsinstrumentation/cws_instrumentation.go b/pkg/clusteragent/admission/mutate/cwsinstrumentation/cws_instrumentation.go index b361c31e3f1ad..86c6c3ecf30c2 100644 --- a/pkg/clusteragent/admission/mutate/cwsinstrumentation/cws_instrumentation.go +++ b/pkg/clusteragent/admission/mutate/cwsinstrumentation/cws_instrumentation.go @@ -714,6 +714,8 @@ func injectCWSVolume(pod *corev1.Pod) { Name: cwsVolumeName, VolumeSource: volumeSource, }) + + common.MarkVolumeAsSafeToEvictForAutoscaler(pod, cwsVolumeName) } func injectCWSVolumeMount(container *corev1.Container) { diff --git a/pkg/clusteragent/admission/mutate/cwsinstrumentation/cws_instrumentation_test.go b/pkg/clusteragent/admission/mutate/cwsinstrumentation/cws_instrumentation_test.go index 2d3d1fa578eef..dcaecf230eae7 100644 --- a/pkg/clusteragent/admission/mutate/cwsinstrumentation/cws_instrumentation_test.go +++ b/pkg/clusteragent/admission/mutate/cwsinstrumentation/cws_instrumentation_test.go @@ -875,6 +875,7 @@ func Test_injectCWSPodInstrumentation(t *testing.T) { require.NotNil(t, annotations, "failed to annotate pod") if annotations != nil { require.Equal(t, cwsInstrumentationPodAnotationReady, annotations[cwsInstrumentationPodAnotationStatus], "CWS instrumentation annotation is missing") + require.Equal(t, cwsVolumeName, annotations[common.K8sAutoscalerSafeToEvictVolumesAnnotation], "CWS instrumentation volume should be marked as safe to evict") } } else { testNoInjectedCWSVolume(t, tt.args.pod) diff --git a/releasenotes-dca/notes/configure-volumes-for-k8s-autoscaler-d96fa7357c4be699.yaml b/releasenotes-dca/notes/configure-volumes-for-k8s-autoscaler-d96fa7357c4be699.yaml new file mode 100644 index 0000000000000..3a189e5474685 --- /dev/null +++ b/releasenotes-dca/notes/configure-volumes-for-k8s-autoscaler-d96fa7357c4be699.yaml @@ -0,0 +1,12 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +fixes: + - | + Fixed an issue that prevented the Kubernetes autoscaler from evicting pods + injected by the Admission Controller. diff --git a/test/new-e2e/tests/containers/k8s_test.go b/test/new-e2e/tests/containers/k8s_test.go index 06d0915cf268e..af2f9917a2f9e 100644 --- a/test/new-e2e/tests/containers/k8s_test.go +++ b/test/new-e2e/tests/containers/k8s_test.go @@ -983,8 +983,13 @@ func (suite *k8sSuite) testAdmissionControllerPod(namespace string, name string, } } + volumesMarkedAsSafeToEvict := strings.Split( + pod.Annotations["cluster-autoscaler.kubernetes.io/safe-to-evict-local-volumes"], ",", + ) + if suite.Contains(hostPathVolumes, "datadog") { suite.Equal("/var/run/datadog", hostPathVolumes["datadog"].Path) + suite.Contains(volumesMarkedAsSafeToEvict, "datadog") } volumeMounts := make(map[string][]string) @@ -1006,7 +1011,14 @@ func (suite *k8sSuite) testAdmissionControllerPod(namespace string, name string, } } - suite.Contains(emptyDirVolumes, "datadog-auto-instrumentation") + if suite.Contains(emptyDirVolumes, "datadog-auto-instrumentation") { + suite.Contains(volumesMarkedAsSafeToEvict, "datadog-auto-instrumentation") + } + + if suite.Contains(emptyDirVolumes, "datadog-auto-instrumentation-etc") { + suite.Contains(volumesMarkedAsSafeToEvict, "datadog-auto-instrumentation-etc") + } + if suite.Contains(volumeMounts, "datadog-auto-instrumentation") { suite.ElementsMatch([]string{ "/opt/datadog-packages/datadog-apm-inject",