-
Notifications
You must be signed in to change notification settings - Fork 226
USHIFT-6401, USHIFT-6788: Add fail-fast RBAC bootstrap hook deadlock detection #6471
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ import ( | |
| "io" | ||
| "net" | ||
| "os" | ||
| "os/exec" | ||
| "path/filepath" | ||
| "strconv" | ||
| "strings" | ||
|
|
@@ -55,6 +56,17 @@ import ( | |
|
|
||
| const ( | ||
| kubeAPIStartupTimeout = 60 | ||
| // rbacHookDeadlockTimeout is the time to wait for the RBAC bootstrap hook | ||
| // before declaring a deadlock. This is shorter than kubeAPIStartupTimeout | ||
| // to allow for faster recovery. | ||
| rbacHookDeadlockTimeout = 15 | ||
| // rbacHookCheckInterval is how often to check the RBAC hook status | ||
| rbacHookPollDelayStart = 5 * time.Second | ||
| rbacHookCheckInterval = 2 | ||
| // rbacHookMaxWaitDuration is the absolute maximum time to wait for the RBAC hook | ||
| // regardless of etcd health state changes. This prevents flapping from extending | ||
| // detection indefinitely. | ||
| rbacHookMaxWaitDuration = 30 * time.Second | ||
| ) | ||
|
|
||
| var ( | ||
|
|
@@ -348,7 +360,13 @@ func (s *KubeAPIServer) Run(ctx context.Context, ready chan<- struct{}, stopped | |
| return err | ||
| } | ||
|
|
||
| // run readiness check | ||
| // Channel to signal RBAC hook deadlock detection | ||
| rbacDeadlockDetected := make(chan struct{}) | ||
|
|
||
| // Run RBAC hook deadlock detector | ||
| go s.detectRBACHookDeadlock(ctx, restClient, rbacDeadlockDetected) | ||
|
|
||
| // Run standard readiness check | ||
| go func() { | ||
| err := wait.PollUntilContextTimeout(ctx, time.Second, kubeAPIStartupTimeout*time.Second, true, func(ctx context.Context) (bool, error) { | ||
| var status int | ||
|
|
@@ -420,7 +438,168 @@ func (s *KubeAPIServer) Run(ctx context.Context, ready chan<- struct{}, stopped | |
| return err | ||
| case perr := <-panicChannel: | ||
| panic(perr) | ||
| case <-rbacDeadlockDetected: | ||
| klog.Error("RBAC bootstrap hook deadlock detected - restarting microshift-etcd.scope to recover") | ||
| if err := restartMicroshiftEtcdScope(); err != nil { | ||
| klog.Errorf("Failed to restart microshift-etcd.scope: %v", err) | ||
| } | ||
| return fmt.Errorf("RBAC bootstrap hook deadlock detected after %d seconds", rbacHookDeadlockTimeout) | ||
| } | ||
| } | ||
|
|
||
| // detectRBACHookDeadlock monitors the RBAC bootstrap hook status and detects deadlock conditions. | ||
| // A deadlock is detected when: | ||
| // 1. The RBAC hook is not completing (stuck in "not finished" state) | ||
| // 2. etcd is healthy and responsive | ||
| // This indicates the circular dependency where the hook waits for API server | ||
| // while API server waits for the hook. | ||
| // | ||
| // Closed upstream Kubernetes issues: | ||
| // https://github.com/kubernetes/kubernetes/issues/86715 | ||
| // https://github.com/kubernetes/kubernetes/issues/97119 | ||
| func (s *KubeAPIServer) detectRBACHookDeadlock(ctx context.Context, restClient rest.Interface, deadlockDetected chan<- struct{}) { | ||
| // Wait a few seconds before starting detection to allow normal startup | ||
| select { | ||
| case <-ctx.Done(): | ||
| return | ||
| case <-time.After(rbacHookPollDelayStart): | ||
| } | ||
|
|
||
| checkCount := 0 | ||
| maxChecks := int((rbacHookDeadlockTimeout - rbacHookPollDelayStart) / rbacHookCheckInterval) // Account for initial delay | ||
| // Track wall-clock deadline to prevent flapping from extending detection indefinitely | ||
| startTime := time.Now() | ||
|
|
||
| for { | ||
| // Check absolute deadline first - this cannot be reset by etcd state changes | ||
| if time.Since(startTime) >= rbacHookMaxWaitDuration { | ||
| klog.Errorf("RBAC bootstrap hook exceeded maximum wait duration of %v", rbacHookMaxWaitDuration) | ||
| // Only trigger deadlock recovery if we've confirmed the predicate enough times | ||
| if checkCount >= maxChecks { | ||
| break // Fall through to close(deadlockDetected) | ||
| } | ||
| // Timeout but not confirmed deadlock - exit without triggering recovery | ||
| return | ||
| } | ||
|
|
||
| // Check if we've confirmed deadlock enough times | ||
| if checkCount >= maxChecks { | ||
| break // Fall through to close(deadlockDetected) | ||
| } | ||
|
|
||
| select { | ||
| case <-ctx.Done(): | ||
| return | ||
| case <-time.After(rbacHookCheckInterval * time.Second): | ||
| } | ||
|
|
||
| // Check RBAC hook status | ||
| probeCtx, cancel := context.WithTimeout(ctx, time.Second) | ||
| var status int | ||
| err := restClient.Get(). | ||
| AbsPath("/readyz/poststarthook/rbac/bootstrap-roles"). | ||
| Do(probeCtx). | ||
| StatusCode(&status). | ||
| Error() | ||
| cancel() | ||
|
|
||
|
copejon marked this conversation as resolved.
|
||
| // If hook is ready, no deadlock | ||
| if err == nil && status == 200 { | ||
| klog.V(4).Info("RBAC bootstrap hook completed successfully") | ||
| return | ||
| } | ||
|
|
||
| // If RBAC probe errored, skip this iteration (don't count toward deadlock) | ||
| if err != nil { | ||
| klog.V(4).Infof("RBAC probe error (not counting toward deadlock): %v", err) | ||
| continue | ||
|
Comment on lines
+496
to
+515
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
rg -n 'k8s.io/client-go' go.mod
modcache="$(go env GOPATH 2>/dev/null)/pkg/mod"
rg -n -C4 'func \(r Result\) (Error|StatusCode)\(' "$modcache"/k8s.io/client-go@* 2>/dev/null || trueRepository: openshift/microshift Length of output: 212 🏁 Script executed: cat -n pkg/controllers/kube-apiserver.go | sed -n '490,520p'Repository: openshift/microshift Length of output: 1110 🏁 Script executed: find . -path ./deps -prune -o -type f -name "*.go" -print | xargs grep -l "type Result" 2>/dev/null | head -5Repository: openshift/microshift Length of output: 351 🏁 Script executed: # Look for the local client-go implementation
find ./deps -type f -name "*.go" -path "*client-go*" | grep -i result | head -5Repository: openshift/microshift Length of output: 698 🏁 Script executed: # Search for Result type and Error method definition in the local client-go
rg -A10 "type Result struct" ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/ 2>/dev/null | head -30Repository: openshift/microshift Length of output: 1171 🏁 Script executed: rg -A5 "func \(r \*Result\) Error\(\)" ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/rest/request.goRepository: openshift/microshift Length of output: 46 🏁 Script executed: rg -A5 "func \(r \*Result\) StatusCode\(" ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/rest/request.goRepository: openshift/microshift Length of output: 46 🏁 Script executed: # Look at the Do() method to understand what happens
rg -B5 -A15 "func \(r \*Request\) Do\(ctx" ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/rest/request.go | head -50Repository: openshift/microshift Length of output: 742 🏁 Script executed: rg -A20 "func \(r \*Request\) transformResponse" ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/rest/request.go | head -60Repository: openshift/microshift Length of output: 1126 🏁 Script executed: # Search for StatusCode and Error methods more broadly
grep -n "func.*Result.*StatusCode\|func.*Result.*Error" ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/rest/request.goRepository: openshift/microshift Length of output: 158 🏁 Script executed: # Get a broader view of Result methods
rg -n "^func \(r.*Result\)" ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/rest/request.goRepository: openshift/microshift Length of output: 486 🏁 Script executed: sed -n '1433,1448p' ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/rest/request.goRepository: openshift/microshift Length of output: 664 🏁 Script executed: sed -n '1489,1515p' ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/rest/request.goRepository: openshift/microshift Length of output: 1043 🏁 Script executed: # Also check transformResponse to see if statusCode is populated for error responses
rg -A50 "func \(r \*Request\) transformResponse" ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/rest/request.go | grep -A50 "resp.StatusCode"Repository: openshift/microshift Length of output: 46 🏁 Script executed: # Let's see more of transformResponse to understand when err is set
sed -n '1269,1350p' ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/rest/request.goRepository: openshift/microshift Length of output: 3859 🏁 Script executed: # Check where statusCode is assigned in transformResponse
rg -n "statusCode.*=" ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/rest/request.go | head -10Repository: openshift/microshift Length of output: 151 🏁 Script executed: # Search for where statusCode is assigned in transformResponse
rg -B5 -A5 "result.statusCode\|statusCode:" ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/rest/request.go | head -80Repository: openshift/microshift Length of output: 46 🏁 Script executed: # Get the full transformResponse implementation
rg -A100 "^func \(r \*Request\) transformResponse\(ctx context.Context" ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go/rest/request.go | head -150Repository: openshift/microshift Length of output: 3722 HTTP error responses should be distinguished from transport errors. When the server returns HTTP 5xx status codes, Modify the condition to skip only when there's no HTTP status code (transport/timeout error): Proposed fix- err := restClient.Get().
+ result := restClient.Get().
AbsPath("/readyz/poststarthook/rbac/bootstrap-roles").
Do(probeCtx).
- StatusCode(&status).
- Error()
+ StatusCode(&status)
+ err := result.Error()
cancel()
// If hook is ready, no deadlock
if err == nil && status == 200 {
klog.V(4).Info("RBAC bootstrap hook completed successfully")
return
}
- // If RBAC probe errored, skip this iteration (don't count toward deadlock)
- if err != nil {
+ // If no HTTP status was received, skip this iteration (transport/timeout).
+ if err != nil && status == 0 {
klog.V(4).Infof("RBAC probe error (not counting toward deadlock): %v", err)
continue
}🤖 Prompt for AI Agents |
||
| } | ||
|
|
||
| // Hook not ready (status != 200) - check if etcd is healthy | ||
| etcdHealthy, etcdErr := isEtcdHealthy(ctx) | ||
| if etcdErr != nil { | ||
| klog.V(4).Infof("Could not check etcd health (not counting toward deadlock): %v", etcdErr) | ||
| continue | ||
| } | ||
|
|
||
| if etcdHealthy { | ||
| // Only increment when BOTH conditions are met: | ||
| // RBAC probe returned not-ready AND etcd is healthy | ||
| checkCount++ | ||
| klog.Warningf("RBAC bootstrap hook not ready (check %d/%d, elapsed %v), but etcd is healthy - potential deadlock", | ||
| checkCount, maxChecks, time.Since(startTime).Round(time.Second)) | ||
| } else { | ||
| // etcd not healthy - not a deadlock, just waiting for etcd | ||
| klog.V(4).Infof("RBAC hook waiting, etcd not yet healthy (check %d/%d)", checkCount, maxChecks) | ||
| // Reset counter since this isn't a deadlock condition | ||
| // Note: wall-clock deadline (startTime) is NOT reset - flapping cannot extend indefinitely | ||
| checkCount = 0 | ||
| } | ||
|
copejon marked this conversation as resolved.
|
||
| } | ||
|
|
||
| // Only reached when checkCount >= maxChecks (deadlock confirmed) | ||
| klog.Errorf("RBAC bootstrap hook deadlock confirmed after %v: etcd healthy but hook not completing", | ||
| time.Since(startTime).Round(time.Second)) | ||
| close(deadlockDetected) | ||
| } | ||
|
|
||
| // isEtcdHealthy checks if etcd is responsive by attempting to connect and get status. | ||
| func isEtcdHealthy(ctx context.Context) (bool, error) { | ||
| certsDir := cryptomaterial.CertsDirectory(config.DataDir) | ||
| etcdAPIServerClientCertDir := cryptomaterial.EtcdAPIServerClientCertDir(certsDir) | ||
|
|
||
| tlsInfo := transport.TLSInfo{ | ||
| CertFile: cryptomaterial.ClientCertPath(etcdAPIServerClientCertDir), | ||
| KeyFile: cryptomaterial.ClientKeyPath(etcdAPIServerClientCertDir), | ||
| TrustedCAFile: cryptomaterial.CACertPath(cryptomaterial.EtcdSignerDir(certsDir)), | ||
| } | ||
| tlsConfig, err := tlsInfo.ClientConfig() | ||
| if err != nil { | ||
| return false, fmt.Errorf("failed to create TLS config: %w", err) | ||
| } | ||
|
|
||
| // Use a short timeout for health check | ||
| checkCtx, cancel := context.WithTimeout(ctx, 2*time.Second) | ||
| defer cancel() | ||
|
|
||
| client, err := clientv3.New(clientv3.Config{ | ||
| Endpoints: []string{"https://localhost:2379"}, | ||
| DialTimeout: 1 * time.Second, | ||
| TLS: tlsConfig, | ||
| Context: checkCtx, | ||
| }) | ||
| if err != nil { | ||
| return false, fmt.Errorf("failed to create etcd client: %w", err) | ||
| } | ||
| defer func() { _ = client.Close() }() | ||
|
|
||
| _, err = client.Status(checkCtx, "localhost:2379") | ||
| if err != nil { | ||
| return false, nil // etcd not healthy, but not an error condition | ||
| } | ||
|
|
||
| return true, nil | ||
| } | ||
|
|
||
| // restartMicroshiftEtcdScope restarts the microshift-etcd.scope to recover from deadlock. | ||
| // This forces a clean restart of etcd which can help break the circular dependency. | ||
| func restartMicroshiftEtcdScope() error { | ||
| klog.Info("Stopping microshift-etcd.scope for recovery") | ||
|
|
||
| // Set a timeout in case systemd or DBus stalls and the fail-fast recovery path hangs and Run never returns | ||
| cmdCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) | ||
| defer cancel() | ||
|
|
||
| stopCmd := exec.CommandContext(cmdCtx, "systemctl", "stop", "microshift-etcd.scope") | ||
| if out, err := stopCmd.CombinedOutput(); err != nil { | ||
| return fmt.Errorf("failed to stop microshift-etcd.scope: %w, output: %s", err, string(out)) | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| } | ||
|
|
||
| // Wait briefly for cleanup | ||
| time.Sleep(1 * time.Second) | ||
|
|
||
| klog.Info("microshift-etcd.scope stopped - MicroShift will restart") | ||
| return nil | ||
| } | ||
|
|
||
| func discoverEtcdServers(ctx context.Context, kubeconfigPath string) ([]string, error) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: openshift/microshift
Length of output: 1035
Fix type mismatch in RBAC timing constants.
Lines 62 and 65 define integer values but are used in Duration arithmetic at line 469, producing
maxChecks = -2,499,999,992. This causes the deadlock check to trigger immediately, breaking RBAC bootstrap detection.Add
* time.Secondto lines 62 and 65:rbacHookDeadlockTimeout = 15 * time.SecondrbacHookCheckInterval = 2 * time.SecondThen update line 446 to use
%sformat instead of%d seconds, and remove the* time.Secondconversion from line 493 sincerbacHookCheckIntervalbecomes a Duration.🤖 Prompt for AI Agents