Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions test/extended/node/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ This directory contains OpenShift end-to-end tests for node-related features.
- **image_volume.go** - Tests mounting container images as volumes in pods, including subPath and error handling
- **node_swap.go** - Tests default kubelet swap settings (failSwapOn and swapBehavior) and rejection of user overrides
- **zstd_chunked.go** - Tests building and running images with zstd:chunked compression format
- **node_e2e/node.go** - Probe-level terminationGracePeriodSeconds (OCP-44493) - Tests configurable termination grace period for liveness and startup probes [Lifecycle:informing]

## Directory Structure

Expand Down
282 changes: 282 additions & 0 deletions test/extended/node/node_e2e/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,19 @@ import (
"context"
"fmt"
"path/filepath"
"strconv"
"strings"
"time"

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo"

configv1 "github.com/openshift/api/config/v1"
"github.com/openshift/origin/test/extended/imagepolicy"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
utilrand "k8s.io/apimachinery/pkg/util/rand"
"k8s.io/apimachinery/pkg/util/wait"
e2e "k8s.io/kubernetes/test/e2e/framework"
Expand Down Expand Up @@ -164,6 +168,284 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
e2e.Logf("/dev/fuse mount output: %s", output)
o.Expect(output).To(o.ContainSubstring("fuse"), "dev fuse is not mounted inside pod")
})

//author: minmli@redhat.com
//migrated from openshift-tests-private
//automates: https://issues.redhat.com/browse/OCPBUGS-44493
g.It("[OTP] add configurable terminationGracePeriodSeconds to liveness and startup probes [OCP-44493]", ote.Informing(), func() {
ctx := context.Background()
var err error

oc.SetupProject()
namespace := oc.Namespace()

// Helper function to parse duration string like "1m30s" or "45s" to seconds
parseDurationToSeconds := func(durationStr string) (int, error) {
var totalSeconds int
if strings.Contains(durationStr, "m") {
parts := strings.Split(durationStr, "m")
minutes, err := strconv.Atoi(parts[0])
if err != nil {
return 0, err
}
totalSeconds = minutes * 60
if len(parts) > 1 && strings.Contains(parts[1], "s") {
secStr := strings.TrimSuffix(parts[1], "s")
if secStr != "" {
seconds, err := strconv.Atoi(secStr)
if err != nil {
return 0, err
}
totalSeconds += seconds
}
}
} else if strings.Contains(durationStr, "s") {
secStr := strings.TrimSuffix(durationStr, "s")
seconds, err := strconv.Atoi(secStr)
if err != nil {
return 0, err
}
totalSeconds = seconds
}
return totalSeconds, nil
}

// Helper to verify probe termination period
verifyProbeTermination := func(podName, containerName string, expectedTerminationSec int) error {
return wait.PollUntilContextTimeout(ctx, 10*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
podDesc, err := oc.AsAdmin().WithoutNamespace().Run("describe").Args("pod", podName, "-n", namespace).Output()
if err != nil {
e2e.Logf("Error describing pod: %v", err)
return false, nil
}

// Look for probe failure (killing) and container restart events
// Event format: "Normal Killing <time> kubelet Container <name> failed <probe> probe, will be restarted"
// Event format: "Normal Started <time> kubelet Container started"
killingLine := ""
restartLine := ""

inEvents := false
for _, line := range strings.Split(podDesc, "\n") {
if strings.Contains(line, "Events:") {
inEvents = true
continue
}
if !inEvents {
continue
}

// Look for killing event with container name
if strings.Contains(line, "Killing") && strings.Contains(line, containerName) &&
strings.Contains(line, "failed") && strings.Contains(line, "probe") &&
strings.Contains(line, "will be restarted") {
killingLine = line
}
// Look for Started event after Killing
if killingLine != "" && strings.Contains(line, "Started") && strings.Contains(line, "Container started") {
restartLine = line
break
}
}

if killingLine == "" || restartLine == "" {
e2e.Logf("Waiting for probe failure (killing) and container restart events")
return false, nil
}

e2e.Logf("Killing event: %s", killingLine)
e2e.Logf("Restart event: %s", restartLine)

// Extract timestamps (format: "1m30s" or "45s")
// Event format: "Normal Killing 2m30s kubelet Container..."
killingFields := strings.Fields(killingLine)
restartFields := strings.Fields(restartLine)
if len(killingFields) < 3 || len(restartFields) < 3 {
e2e.Logf("Unable to parse event timestamps")
return false, nil
}

killingTime := killingFields[2]
restartTime := restartFields[2]

killingSec, err := parseDurationToSeconds(killingTime)
if err != nil {
e2e.Logf("Error parsing killing time: %v", err)
return false, nil
}

restartSec, err := parseDurationToSeconds(restartTime)
if err != nil {
e2e.Logf("Error parsing restart time: %v", err)
return false, nil
}

// Time difference: killing happened earlier, restart happened later
// So we need to calculate how long between them
timeDiff := killingSec - restartSec
e2e.Logf("Time difference: %d seconds (expected: %d ±10 seconds)", timeDiff, expectedTerminationSec)

// Allow range: [expectedTerminationSec-3, expectedTerminationSec+10]
if timeDiff >= (expectedTerminationSec-3) && timeDiff <= (expectedTerminationSec+10) {
e2e.Logf("Termination grace period check passed")
return true, nil
}

e2e.Logf("Time difference %d is outside expected range [%d, %d]", timeDiff, expectedTerminationSec-3, expectedTerminationSec+10)
return false, nil
})
}

g.By("Test liveness probe with probe-level terminationGracePeriodSeconds")
livenessPod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "liveness-probe",
Namespace: namespace,
},
Spec: corev1.PodSpec{
TerminationGracePeriodSeconds: &[]int64{60}[0],
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: &[]bool{true}[0],
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{
{
Name: "test",
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: &[]bool{false}[0],
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
},
Command: []string{"sh", "-c", "sleep 100000000"},
Ports: []corev1.ContainerPort{
{ContainerPort: 8080},
},
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz",
Port: intstr.FromInt(8080),
},
},
FailureThreshold: 1,
PeriodSeconds: 60,
TerminationGracePeriodSeconds: &[]int64{10}[0],
},
},
},
},
}

_, err = oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, livenessPod, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod")

err = verifyProbeTermination("liveness-probe", "test", 10)
o.Expect(err).NotTo(o.HaveOccurred(), "liveness probe termination grace period not honored")

g.By("Test startup probe with probe-level terminationGracePeriodSeconds")
startupPod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "startup-probe",
Namespace: namespace,
},
Spec: corev1.PodSpec{
TerminationGracePeriodSeconds: &[]int64{60}[0],
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: &[]bool{true}[0],
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{
{
Name: "teststartup",
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: &[]bool{false}[0],
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
},
Command: []string{"sh", "-c", "sleep 100000000"},
Ports: []corev1.ContainerPort{
{ContainerPort: 8080},
},
StartupProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz",
Port: intstr.FromInt(8080),
},
},
FailureThreshold: 1,
PeriodSeconds: 60,
TerminationGracePeriodSeconds: &[]int64{10}[0],
},
},
},
},
}

_, err = oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, startupPod, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create startup probe pod")

err = verifyProbeTermination("startup-probe", "teststartup", 10)
o.Expect(err).NotTo(o.HaveOccurred(), "startup probe termination grace period not honored")

g.By("Test liveness probe without probe-level terminationGracePeriodSeconds (should use pod-level)")
livenessPodNoProbeTerm := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "liveness-probe-no-term",
Namespace: namespace,
},
Spec: corev1.PodSpec{
TerminationGracePeriodSeconds: &[]int64{60}[0],
SecurityContext: &corev1.PodSecurityContext{
RunAsNonRoot: &[]bool{true}[0],
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{
{
Name: "test",
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: &[]bool{false}[0],
Capabilities: &corev1.Capabilities{
Drop: []corev1.Capability{"ALL"},
},
},
Command: []string{"sh", "-c", "sleep 100000000"},
Ports: []corev1.ContainerPort{
{ContainerPort: 8080},
},
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz",
Port: intstr.FromInt(8080),
},
},
FailureThreshold: 1,
PeriodSeconds: 60,
// No TerminationGracePeriodSeconds - should use pod-level (60s)
},
},
},
},
}

_, err = oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, livenessPodNoProbeTerm, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod without probe termination")

err = verifyProbeTermination("liveness-probe-no-term", "test", 60)
o.Expect(err).NotTo(o.HaveOccurred(), "liveness probe should use pod-level termination grace period when probe-level not set")
})
})

// author: asahay@redhat.com
Expand Down