diff --git a/test/extended/node/README.md b/test/extended/node/README.md index 4d0f1f7abd0c..7121036760db 100644 --- a/test/extended/node/README.md +++ b/test/extended/node/README.md @@ -19,6 +19,7 @@ This directory contains OpenShift end-to-end tests for node-related features. - **image_volume.go** - Tests mounting container images as volumes in pods, including subPath and error handling - **node_swap.go** - Tests default kubelet swap settings (failSwapOn and swapBehavior) and rejection of user overrides - **zstd_chunked.go** - Tests building and running images with zstd:chunked compression format +- **node_e2e/probe_termination.go** - Probe-level terminationGracePeriodSeconds (OCP-44493) - Tests configurable termination grace period for liveness and startup probes. Includes 3 test cases: probe-level config for liveness probe, probe-level config for startup probe, and fallback to pod-level config when probe-level is not set [Lifecycle:informing] ## Directory Structure diff --git a/test/extended/node/node_e2e/node.go b/test/extended/node/node_e2e/node.go index 5f43c93e20af..c6dc56ee8eaf 100644 --- a/test/extended/node/node_e2e/node.go +++ b/test/extended/node/node_e2e/node.go @@ -164,6 +164,7 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager", e2e.Logf("/dev/fuse mount output: %s", output) o.Expect(output).To(o.ContainSubstring("fuse"), "dev fuse is not mounted inside pod") }) + }) // author: asahay@redhat.com diff --git a/test/extended/node/node_e2e/probe_termination.go b/test/extended/node/node_e2e/probe_termination.go new file mode 100644 index 000000000000..1e8a2e8224a3 --- /dev/null +++ b/test/extended/node/node_e2e/probe_termination.go @@ -0,0 +1,265 @@ +package node + +import ( + "context" + "fmt" + "strings" + "time" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + e2e "k8s.io/kubernetes/test/e2e/framework" + "k8s.io/utils/ptr" + + exutil "github.com/openshift/origin/test/extended/util" +) + +var _ = g.Describe("[sig-node] Probe configuration", func() { + var ( + oc = exutil.NewCLIWithoutNamespace("probe-termination") + ) + + //author: bgudi@redhat.com + g.It("[OTP] Liveness probe should respect probe-level terminationGracePeriodSeconds [OCP-44493]", ote.Informing(), func() { + ctx := context.Background() + + oc.SetupProject() + namespace := oc.Namespace() + + g.By("Create pod with liveness probe having probe-level terminationGracePeriodSeconds=10s") + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "liveness-probe-level", + Namespace: namespace, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: ptr.To[int64](60), + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: ptr.To(true), + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + Containers: []corev1.Container{ + { + Name: "test", + Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0", + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: ptr.To(false), + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{"ALL"}, + }, + }, + Command: []string{"sh", "-c", "sleep 100000000"}, + Ports: []corev1.ContainerPort{ + {ContainerPort: 8080}, + }, + LivenessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/healthz", + Port: intstr.FromInt(8080), + }, + }, + FailureThreshold: 1, + PeriodSeconds: 60, + TerminationGracePeriodSeconds: ptr.To[int64](10), + }, + }, + }, + }, + } + + _, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod") + + g.By("Verify probe-level terminationGracePeriodSeconds is honored (10s)") + timeDiff, err := verifyProbeTermination(ctx, oc, namespace, "liveness-probe-level", "test", 10) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to get probe termination events") + o.Expect(timeDiff).To(o.BeNumerically(">=", 10-3), "time difference is less than expected minimum") + o.Expect(timeDiff).To(o.BeNumerically("<=", 10+10), "time difference is greater than expected maximum") + }) + + //author: bgudi@redhat.com + g.It("[OTP] Startup probe should respect probe-level terminationGracePeriodSeconds [OCP-44493]", ote.Informing(), func() { + ctx := context.Background() + + oc.SetupProject() + namespace := oc.Namespace() + + g.By("Create pod with startup probe having probe-level terminationGracePeriodSeconds=10s") + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "startup-probe-level", + Namespace: namespace, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: ptr.To[int64](60), + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: ptr.To(true), + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + Containers: []corev1.Container{ + { + Name: "teststartup", + Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0", + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: ptr.To(false), + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{"ALL"}, + }, + }, + Command: []string{"sh", "-c", "sleep 100000000"}, + Ports: []corev1.ContainerPort{ + {ContainerPort: 8080}, + }, + StartupProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/healthz", + Port: intstr.FromInt(8080), + }, + }, + FailureThreshold: 1, + PeriodSeconds: 60, + TerminationGracePeriodSeconds: ptr.To[int64](10), + }, + }, + }, + }, + } + + _, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to create startup probe pod") + + g.By("Verify probe-level terminationGracePeriodSeconds is honored (10s)") + timeDiff, err := verifyProbeTermination(ctx, oc, namespace, "startup-probe-level", "teststartup", 10) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to get probe termination events") + o.Expect(timeDiff).To(o.BeNumerically(">=", 10-3), "time difference is less than expected minimum") + o.Expect(timeDiff).To(o.BeNumerically("<=", 10+10), "time difference is greater than expected maximum") + }) + + //author: bgudi@redhat.com + g.It("[OTP] Liveness probe should fall back to pod-level terminationGracePeriodSeconds when probe-level is not set [OCP-44493]", ote.Informing(), func() { + ctx := context.Background() + + oc.SetupProject() + namespace := oc.Namespace() + + g.By("Create pod with liveness probe without probe-level terminationGracePeriodSeconds") + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "liveness-pod-level", + Namespace: namespace, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: ptr.To[int64](60), + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: ptr.To(true), + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + Containers: []corev1.Container{ + { + Name: "test", + Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0", + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: ptr.To(false), + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{"ALL"}, + }, + }, + Command: []string{"sh", "-c", "sleep 100000000"}, + Ports: []corev1.ContainerPort{ + {ContainerPort: 8080}, + }, + LivenessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/healthz", + Port: intstr.FromInt(8080), + }, + }, + FailureThreshold: 1, + PeriodSeconds: 60, + // No TerminationGracePeriodSeconds - should use pod-level (60s) + }, + }, + }, + }, + } + + _, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod without probe-level termination") + + g.By("Verify pod-level terminationGracePeriodSeconds is used (60s)") + timeDiff, err := verifyProbeTermination(ctx, oc, namespace, "liveness-pod-level", "test", 60) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to get probe termination events") + o.Expect(timeDiff).To(o.BeNumerically(">=", 60-3), "time difference is less than expected minimum") + o.Expect(timeDiff).To(o.BeNumerically("<=", 60+10), "time difference is greater than expected maximum") + }) +}) + +// verifyProbeTermination verifies that the probe termination grace period is honored +// by checking the time difference between probe failure (Killing) and container restart (Started) events +// Returns the time difference in seconds, or an error if events are not found +func verifyProbeTermination(ctx context.Context, oc *exutil.CLI, namespace, podName, containerName string, expectedTerminationSec int) (int, error) { + var timeDiff int + err := wait.PollUntilContextTimeout(ctx, 10*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + // Get events using the Events API + events, err := oc.KubeClient().CoreV1().Events(namespace).List(ctx, metav1.ListOptions{ + FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.kind=Pod", podName), + }) + if err != nil { + e2e.Logf("Error getting events: %v", err) + return false, nil + } + + // Look for probe failure (Killing) and container restart (Started) events + var killingEvent, startedEvent *corev1.Event + for i := range events.Items { + event := &events.Items[i] + if event.Reason == "Killing" && strings.Contains(event.Message, containerName) && + strings.Contains(event.Message, "failed") && strings.Contains(event.Message, "probe") { + if killingEvent == nil || event.LastTimestamp.Time.After(killingEvent.LastTimestamp.Time) { + killingEvent = event + } + } + if event.Reason == "Started" && strings.Contains(event.Message, "Started container") { + // Find Started event after the Killing event + if killingEvent != nil && event.FirstTimestamp.Time.After(killingEvent.LastTimestamp.Time) { + if startedEvent == nil || event.FirstTimestamp.Time.Before(startedEvent.FirstTimestamp.Time) { + startedEvent = event + } + } + } + } + + if killingEvent == nil || startedEvent == nil { + e2e.Logf("Waiting for probe failure (Killing) and container restart (Started) events") + return false, nil + } + + e2e.Logf("Killing event: %s at %v", killingEvent.Message, killingEvent.LastTimestamp) + e2e.Logf("Started event: %s at %v", startedEvent.Message, startedEvent.FirstTimestamp) + + // Calculate time difference in seconds + timeDiff = int(startedEvent.FirstTimestamp.Sub(killingEvent.LastTimestamp.Time).Seconds()) + e2e.Logf("Time difference: %d seconds (expected: %d ±10 seconds)", timeDiff, expectedTerminationSec) + + return true, nil + }) + if err != nil { + return 0, err + } + return timeDiff, nil +}