From 6de9eb5c6d59d3237361f45122b8f9e09bc77b28 Mon Sep 17 00:00:00 2001 From: Pavel Okhlopkov Date: Fri, 22 May 2026 11:50:53 +0300 Subject: [PATCH 1/4] add dedup client Signed-off-by: Pavel Okhlopkov --- go.mod | 3 +- go.sum | 2 + pkg/app/README.md | 13 +- pkg/app/app_config.go | 17 ++ pkg/app/flags.go | 46 +++++ pkg/kube/dedupclient/README.md | 73 +++++++ pkg/kube/dedupclient/client.go | 204 +++++++++++++++++++ pkg/kube/dedupclient/client_test.go | 97 +++++++++ pkg/kube/dedupclient/config.go | 57 ++++++ pkg/shell-operator/bootstrap.go | 44 +++- pkg/shell-operator/debug_server.go | 25 +++ pkg/shell-operator/kube_client.go | 110 ++++++++++ pkg/shell-operator/kube_client_dedup_test.go | 154 ++++++++++++++ pkg/shell-operator/operator.go | 33 +++ 14 files changed, 872 insertions(+), 6 deletions(-) create mode 100644 pkg/kube/dedupclient/README.md create mode 100644 pkg/kube/dedupclient/client.go create mode 100644 pkg/kube/dedupclient/client_test.go create mode 100644 pkg/kube/dedupclient/config.go create mode 100644 pkg/shell-operator/kube_client_dedup_test.go diff --git a/go.mod b/go.mod index c0672750..d54c310c 100644 --- a/go.mod +++ b/go.mod @@ -41,10 +41,12 @@ require ( github.com/goccy/go-json v0.10.6 github.com/gojuno/minimock/v3 v3.4.7 github.com/itchyny/gojq v0.12.17 + github.com/ldmonster/kubeclient v0.0.0-20260522082709-ed73652c723f github.com/muesli/termenv v0.16.0 github.com/onsi/ginkgo/v2 v2.27.5 github.com/spf13/cobra v1.10.2 github.com/spf13/pflag v1.0.9 + sigs.k8s.io/controller-runtime v0.20.4 ) require ( @@ -133,7 +135,6 @@ require ( k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect - sigs.k8s.io/controller-runtime v0.20.4 // indirect sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect sigs.k8s.io/kustomize/api v0.19.0 // indirect sigs.k8s.io/kustomize/kyaml v0.19.0 // indirect diff --git a/go.sum b/go.sum index a8b19062..6d99617a 100644 --- a/go.sum +++ b/go.sum @@ -223,6 +223,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/ldmonster/kubeclient v0.0.0-20260522082709-ed73652c723f h1:hTyM8+nWGxBczLaa0HzjXbKJuMjdbZQa9ZB2F0wdO04= +github.com/ldmonster/kubeclient v0.0.0-20260522082709-ed73652c723f/go.mod h1:Q8GOTVz5hMCvWJjTmeLRQ79yp+AkX76yuNL/R66gybk= github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= diff --git a/pkg/app/README.md b/pkg/app/README.md index 5e6f61f0..373ecafd 100644 --- a/pkg/app/README.md +++ b/pkg/app/README.md @@ -315,11 +315,16 @@ err := so.AssembleCommonOperatorFromConfig(cfg, []string{ The new `AssembleCommonOperatorFromConfig(cfg, labels)` method on `*ShellOperator` is what makes this clean — it derives both -`KubeClientConfig`s (main + object-patcher), the HTTP listen address/port, -and the metric prefix from `cfg`, so the consumer does not have to unpack -fields by hand. The older primitive-taking +`KubeClientConfig`s (main + object-patcher), the deduplicated-kubeclient +configuration (`DedupClientConfig`), the HTTP listen address/port, and the +metric prefix from `cfg`, so the consumer does not have to unpack fields by +hand. The older primitive-taking `AssembleCommonOperator(listenAddress, listenPort, labels, mainKubeCfg, patcherKubeCfg)` -is still available for callers that need finer control. +is still available for callers that need finer control; it preserves its +original signature and disables the dedup cache. To enable the dedup cache +without going through `*app.Config`, use the new +`AssembleCommonOperatorWithDedupClient(...)` variant which adds a final +`DedupClientConfig` parameter. If you also want env-var parsing on top of your own values, call `ParseEnv(cfg)` between steps 1 and 2 — env values will overlay the fields you diff --git a/pkg/app/app_config.go b/pkg/app/app_config.go index 64c47a63..eccbc5a0 100644 --- a/pkg/app/app_config.go +++ b/pkg/app/app_config.go @@ -34,6 +34,22 @@ type ObjectPatcherSettings struct { KubeClientTimeout time.Duration `env:"KUBE_CLIENT_TIMEOUT"` } +// DedupClientSettings configures the deduplicated kubeclient cache provided by +// github.com/ldmonster/kubeclient. The cache stores a single canonical copy of +// each repeated value and subtree across watched objects, dramatically lowering +// in-memory footprint for clusters with many similar resources (e.g. +// templated Deployments). All settings here are optional; when Enabled is +// false the client is not constructed at all. List-typed env vars use a comma +// separator: GVK strings follow the form "//" (the +// group may be empty for core resources, e.g. "/v1/Pod"). +type DedupClientSettings struct { + Enabled bool `env:"ENABLED"` + Namespaces []string `env:"NAMESPACES" envSeparator:","` + WatchGVKs []string `env:"WATCH_GVKS" envSeparator:","` + ReconstructLRUSize int `env:"RECONSTRUCT_LRU_SIZE"` + GCInterval time.Duration `env:"GC_INTERVAL"` +} + // AdmissionSettings holds settings for the validating-webhook server. type AdmissionSettings struct { ConfigurationName string `env:"CONFIGURATION_NAME"` @@ -83,6 +99,7 @@ type Config struct { App AppSettings `envPrefix:"SHELL_OPERATOR_"` Kube KubeSettings `envPrefix:"KUBE_"` ObjectPatcher ObjectPatcherSettings `envPrefix:"OBJECT_PATCHER_"` + DedupClient DedupClientSettings `envPrefix:"DEDUP_CLIENT_"` Admission AdmissionSettings `envPrefix:"VALIDATING_WEBHOOK_"` Conversion ConversionSettings `envPrefix:"CONVERSION_WEBHOOK_"` Debug DebugSettings `envPrefix:"DEBUG_"` diff --git a/pkg/app/flags.go b/pkg/app/flags.go index 810dfcfe..fa711fba 100644 --- a/pkg/app/flags.go +++ b/pkg/app/flags.go @@ -22,11 +22,13 @@ func BindFlags(cfg *Config, rootCmd *cobra.Command, cmd *cobra.Command) func() { bindLogFlags(cfg, cmd) applyAdmission := bindAdmissionWebhookFlags(cfg, cmd) applyConversion := bindConversionWebhookFlags(cfg, cmd) + applyDedup := bindDedupClientFlags(cfg, cmd) bindDebugFlags(cfg, rootCmd, cmd) return func() { applyAdmission() applyConversion() + applyDedup() } } @@ -106,6 +108,50 @@ func bindConversionWebhookFlags(cfg *Config, cmd *cobra.Command) func() { } } +// bindDedupClientFlags registers flags for the deduplicated kubeclient cache. +// The two []string fields (Namespaces and WatchGVKs) follow the same pattern +// used by the validating-webhook ClientCA flag: any explicit CLI invocation +// fully replaces the env-var derived slice; otherwise the env value is kept. +func bindDedupClientFlags(cfg *Config, cmd *cobra.Command) func() { + f := cmd.Flags() + f.BoolVar(&cfg.DedupClient.Enabled, "dedup-client-enabled", cfg.DedupClient.Enabled, + "Enable the deduplicated kubeclient cache (github.com/ldmonster/kubeclient). "+ + "When set, shell-operator builds a controller-runtime compatible client backed "+ + "by a deduplicated store. Can be set with $DEDUP_CLIENT_ENABLED.") + f.IntVar(&cfg.DedupClient.ReconstructLRUSize, "dedup-client-reconstruct-lru-size", + cfg.DedupClient.ReconstructLRUSize, + "Size of the LRU that memoises reconstructed Unstructured objects in the dedup cache. "+ + "Zero disables reconstruction caching. Can be set with $DEDUP_CLIENT_RECONSTRUCT_LRU_SIZE.") + f.DurationVar(&cfg.DedupClient.GCInterval, "dedup-client-gc-interval", + cfg.DedupClient.GCInterval, + "How often the deduplicated store reclaims unused interned values and subtrees. "+ + "Zero leaves the kubeclient default in place. Can be set with $DEDUP_CLIENT_GC_INTERVAL.") + + envNamespaces := cfg.DedupClient.Namespaces + envGVKs := cfg.DedupClient.WatchGVKs + var cliNamespaces, cliGVKs []string + f.StringArrayVar(&cliNamespaces, "dedup-client-namespace", nil, + "Namespace to restrict the dedup cache to. Repeat the flag to add more, or pass a "+ + "comma-separated list via $DEDUP_CLIENT_NAMESPACES. Empty means all namespaces.") + f.StringArrayVar(&cliGVKs, "dedup-client-watch-gvk", nil, + "GroupVersionKind to pre-register with the dedup cache, formatted as "+ + "\"//\" (the group is empty for core resources, e.g. \"/v1/Pod\"). "+ + "Repeat the flag to add more, or pass a comma-separated list via $DEDUP_CLIENT_WATCH_GVKS.") + + return func() { + if len(cliNamespaces) > 0 { + cfg.DedupClient.Namespaces = cliNamespaces + } else { + cfg.DedupClient.Namespaces = envNamespaces + } + if len(cliGVKs) > 0 { + cfg.DedupClient.WatchGVKs = cliGVKs + } else { + cfg.DedupClient.WatchGVKs = envGVKs + } + } +} + func bindLogFlags(cfg *Config, cmd *cobra.Command) { f := cmd.Flags() f.StringVar(&cfg.Log.Level, "log-level", cfg.Log.Level, "Logging level: debug, info, error. Default is info. Can be set with $LOG_LEVEL.") diff --git a/pkg/kube/dedupclient/README.md b/pkg/kube/dedupclient/README.md new file mode 100644 index 00000000..516ad1b0 --- /dev/null +++ b/pkg/kube/dedupclient/README.md @@ -0,0 +1,73 @@ +# pkg/kube/dedupclient + +Thin shell-operator wrapper around +[`github.com/ldmonster/kubeclient`](https://github.com/ldmonster/kubeclient) — a +controller-runtime compatible Kubernetes client whose cache is backed by a +deduplicated, value-interned object store. For clusters with thousands of +similar resources (e.g. templated `Deployment`s) the upstream library reports +**60–90 %** lower cache memory usage compared to a standard informer cache. + +This package keeps the upstream dependency contained inside one shell-operator +package: every other consumer interacts with the wrapper's small surface +(`Client`, `Config`, `Start`, `Shutdown`, `EnsureInformer`, `WaitForCacheSync`) +or with the embedded `sigs.k8s.io/controller-runtime/pkg/client.Client`. + +## Quick start + +```go +import ( + klient "github.com/flant/kube-client/client" + "github.com/flant/shell-operator/pkg/kube/dedupclient" +) + +func newDedup(main *klient.Client, logger *log.Logger) (*dedupclient.Client, error) { + mapper, _ := main.ToRESTMapper() + return dedupclient.New(dedupclient.Config{ + RESTConfig: main.RestConfig(), + RESTMapper: mapper, + Namespaces: []string{"kube-system", "default"}, // empty = all + WatchGVKs: []schema.GroupVersionKind{ + {Group: "", Version: "v1", Kind: "Pod"}, + {Group: "apps", Version: "v1", Kind: "Deployment"}, + }, + ReconstructLRUSize: 4096, // 0 disables reconstruction caching + }, logger) +} +``` + +`Start(ctx)` spins up the cache run loop in a single dedicated goroutine and +returns immediately. `Shutdown(ctx)` cancels the loop and waits for it to +exit (or for `ctx` to expire). + +## How shell-operator wires it up + +When `app.Config.DedupClient.Enabled` is `true`, +`AssembleCommonOperatorFromConfig` calls `initDedupClient`, which hands the +main `klient.Client`'s `rest.Config` and `RESTMapper` to +`dedupclient.New`. The resulting `*Client` is stored on +`shell_operator.ShellOperator.DedupClient`, started during `op.Start()`, and +stopped from `op.Shutdown()`. + +Configuration knobs (env vars / CLI flags): + +| Env var | Flag | Meaning | +| ------------------------------------ | ------------------------------------ | ------------------------------------------------------------ | +| `DEDUP_CLIENT_ENABLED` | `--dedup-client-enabled` | Construct the deduplicated client at all. | +| `DEDUP_CLIENT_NAMESPACES` | `--dedup-client-namespace` | Comma-separated (env) or repeated (flag) namespace allow-list. Empty = all. | +| `DEDUP_CLIENT_WATCH_GVKS` | `--dedup-client-watch-gvk` | GVKs to pre-register, formatted as `//` (group empty for core). | +| `DEDUP_CLIENT_RECONSTRUCT_LRU_SIZE` | `--dedup-client-reconstruct-lru-size`| LRU size for reconstructed Unstructured objects. 0 disables. | +| `DEDUP_CLIENT_GC_INTERVAL` | `--dedup-client-gc-interval` | GC interval for unused interned values/subtrees. 0 = upstream default. | + +When the feature is off the wrapper is **not** constructed at all, so this +integration adds zero runtime overhead to existing deployments. + +## Debug endpoint + +Once registered (automatically in `bootstrap.go`), the debug server exposes: + +``` +GET /dedup-client/status.{json|yaml|text} +``` + +The response indicates whether the client is configured and a best-effort hint +about cache sync state. diff --git a/pkg/kube/dedupclient/client.go b/pkg/kube/dedupclient/client.go new file mode 100644 index 00000000..f5be6bcf --- /dev/null +++ b/pkg/kube/dedupclient/client.go @@ -0,0 +1,204 @@ +package dedupclient + +import ( + "context" + "errors" + "fmt" + "log/slog" + "sync" + "sync/atomic" + + "github.com/deckhouse/deckhouse/pkg/log" + "github.com/ldmonster/kubeclient" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/flant/shell-operator/pkg" +) + +// ErrAlreadyStarted is returned by Start when the cache loop is already +// running for this Client instance. It is a non-fatal sentinel intended to +// make idempotent Start() calls safe. +var ErrAlreadyStarted = errors.New("dedupclient: cache already started") + +// Client is shell-operator's wrapper around *kubeclient.DedupClient. +// +// It owns the cache lifecycle: a single goroutine invokes the underlying +// kubeclient.Start (which blocks until ctx is cancelled), exposes +// WaitForCacheSync, and provides a Shutdown that signals the run loop and +// waits for it to exit. The wrapper deliberately keeps the public surface +// small — callers reach through to the embedded controller-runtime +// client.Client when they need full read/write access. +type Client struct { + client.Client + + dedup *kubeclient.DedupClient + logger *log.Logger + + // startOnce guards Start() so the cache loop is only spawned once even + // if Start is called concurrently from several places. + startOnce sync.Once + started atomic.Bool + + // cancel cancels the context handed to the underlying cache's Start + // method, terminating the run loop. + cancel context.CancelFunc + + // done is closed when the cache run loop returns. + done chan struct{} + + // startErr captures the error (if any) returned by the underlying + // kubeclient.DedupClient.Start. It is set at most once. + startErr atomic.Value // holds error +} + +// New constructs a Client from cfg. RESTConfig is required; all other fields +// are optional. The returned Client is not started — call Start to spin up +// the cache informers. +func New(cfg Config, logger *log.Logger) (*Client, error) { + if cfg.RESTConfig == nil { + return nil, fmt.Errorf("dedupclient: rest.Config is required") + } + if logger == nil { + logger = log.NewLogger() + } + logger = logger.With(pkg.LogKeyOperatorComponent, "dedup-kube-client") + + opts := buildOptions(cfg) + + dc, err := kubeclient.New(cfg.RESTConfig, opts...) + if err != nil { + return nil, fmt.Errorf("dedupclient: construct kubeclient: %w", err) + } + + return &Client{ + Client: dc, + dedup: dc, + logger: logger, + done: make(chan struct{}), + }, nil +} + +// buildOptions translates Config into kubeclient.Option values, omitting +// any option whose corresponding Config field is at its zero value so the +// upstream defaults remain in effect. +func buildOptions(cfg Config) []kubeclient.Option { + var opts []kubeclient.Option + if cfg.Scheme != nil { + opts = append(opts, kubeclient.WithScheme(cfg.Scheme)) + } + if cfg.RESTMapper != nil { + opts = append(opts, kubeclient.WithRESTMapper(cfg.RESTMapper)) + } + if len(cfg.Namespaces) > 0 { + opts = append(opts, kubeclient.WithNamespaces(cfg.Namespaces...)) + } + if len(cfg.WatchGVKs) > 0 { + opts = append(opts, kubeclient.WithGVKs(cfg.WatchGVKs...)) + } + if cfg.ReconstructLRUSize > 0 { + opts = append(opts, kubeclient.WithReconstructionCache(cfg.ReconstructLRUSize)) + } + if cfg.GCInterval > 0 { + opts = append(opts, kubeclient.WithGCInterval(cfg.GCInterval)) + } + return opts +} + +// Dedup returns the underlying *kubeclient.DedupClient for callers that +// need direct access to its full API (Cache, Scheme, RESTMapper, etc.). +// Most callers should use the embedded client.Client surface instead. +func (c *Client) Dedup() *kubeclient.DedupClient { + return c.dedup +} + +// Start launches the cache run loop in a dedicated goroutine. Subsequent +// calls return ErrAlreadyStarted without spawning additional goroutines. +// +// The supplied parent context governs the cache's lifetime: when it is +// cancelled (or Shutdown is called) the underlying kubeclient.Start +// returns and the goroutine exits. Start itself is non-blocking and +// returns as soon as the goroutine has been scheduled. +func (c *Client) Start(parent context.Context) error { + if parent == nil { + parent = context.Background() + } + + already := true + c.startOnce.Do(func() { + already = false + + ctx, cancel := context.WithCancel(parent) + c.cancel = cancel + c.started.Store(true) + + go func() { + defer close(c.done) + c.logger.Info("dedup cache run loop starting") + err := c.dedup.Start(ctx) + if err != nil && !errors.Is(err, context.Canceled) { + c.startErr.Store(err) + c.logger.Error("dedup cache run loop exited with error", log.Err(err)) + return + } + c.logger.Info("dedup cache run loop exited") + }() + }) + + if already { + return ErrAlreadyStarted + } + return nil +} + +// WaitForCacheSync blocks until every registered informer has performed an +// initial List or until ctx is cancelled. It returns true on success and +// false when the wait was aborted (or no Start has happened yet). +func (c *Client) WaitForCacheSync(ctx context.Context) bool { + if !c.started.Load() { + return false + } + return c.dedup.WaitForCacheSync(ctx) +} + +// EnsureInformer registers obj's GVK with the cache and starts an informer +// for it if one is not already running. It is a convenience pass-through +// to the underlying cache so callers do not have to drill through Dedup(). +func (c *Client) EnsureInformer(ctx context.Context, obj client.Object) error { + if c.dedup == nil { + return fmt.Errorf("dedupclient: client is not initialised") + } + return c.dedup.Cache().EnsureInformer(ctx, obj) +} + +// Shutdown signals the run loop to terminate and blocks until it has +// returned (or ctx is cancelled). Calling Shutdown before Start, or after +// a previous Shutdown has already returned, is a safe no-op. +func (c *Client) Shutdown(ctx context.Context) error { + if !c.started.Load() { + return nil + } + if c.cancel != nil { + c.cancel() + } + select { + case <-c.done: + c.logger.Debug("dedup cache shutdown complete") + if v := c.startErr.Load(); v != nil { + if err, ok := v.(error); ok { + return err + } + } + return nil + case <-ctx.Done(): + c.logger.Warn("dedup cache shutdown timed out", slog.String("error", ctx.Err().Error())) + return ctx.Err() + } +} + +// Scheme returns the runtime.Scheme used by the underlying client. It is +// kept here so callers don't have to import the kubeclient package just +// to read it back. +func (c *Client) Scheme() *runtime.Scheme { + return c.dedup.Scheme() +} diff --git a/pkg/kube/dedupclient/client_test.go b/pkg/kube/dedupclient/client_test.go new file mode 100644 index 00000000..b926d619 --- /dev/null +++ b/pkg/kube/dedupclient/client_test.go @@ -0,0 +1,97 @@ +package dedupclient + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/rest" +) + +func TestNew_RequiresRESTConfig(t *testing.T) { + t.Parallel() + + _, err := New(Config{}, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "rest.Config is required") +} + +// TestNew_AcceptsZeroValueOptions exercises the buildOptions translation +// path: every Config field is left at its zero value, so kubeclient must +// receive no options at all and use its built-in defaults. +func TestNew_AcceptsZeroValueOptions(t *testing.T) { + t.Parallel() + + c, err := New(Config{RESTConfig: &rest.Config{Host: "http://127.0.0.1:0"}}, nil) + require.NoError(t, err) + require.NotNil(t, c) + assert.NotNil(t, c.Dedup(), "underlying *kubeclient.DedupClient must be set") +} + +func TestNew_PropagatesNonZeroOptions(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + cfg := Config{ + RESTConfig: &rest.Config{Host: "http://127.0.0.1:0"}, + Scheme: scheme, + Namespaces: []string{"kube-system"}, + WatchGVKs: []schema.GroupVersionKind{{Group: "", Version: "v1", Kind: "Pod"}}, + ReconstructLRUSize: 128, + GCInterval: 15 * time.Second, + } + + c, err := New(cfg, nil) + require.NoError(t, err) + require.NotNil(t, c) + // Scheme round-trip is the only directly observable Option from the + // resulting client; the rest are stored privately by kubeclient. + assert.Same(t, scheme, c.Scheme()) +} + +func TestShutdown_BeforeStart_NoOp(t *testing.T) { + t.Parallel() + + c, err := New(Config{RESTConfig: &rest.Config{Host: "http://127.0.0.1:0"}}, nil) + require.NoError(t, err) + + err = c.Shutdown(context.Background()) + require.NoError(t, err, "Shutdown before Start must be a no-op") +} + +func TestStart_RepeatedCallsAreIdempotent(t *testing.T) { + t.Parallel() + + c, err := New(Config{RESTConfig: &rest.Config{Host: "http://127.0.0.1:0"}}, nil) + require.NoError(t, err) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + require.NoError(t, c.Start(ctx)) + // A second Start must report ErrAlreadyStarted without spawning a new + // goroutine. + err = c.Start(ctx) + require.ErrorIs(t, err, ErrAlreadyStarted) + + // Cancelling the parent context drains the run loop; Shutdown then + // returns immediately because <-c.done is already closed. + cancel() + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer shutdownCancel() + require.NoError(t, c.Shutdown(shutdownCtx)) +} + +func TestWaitForCacheSync_ReturnsFalseBeforeStart(t *testing.T) { + t.Parallel() + + c, err := New(Config{RESTConfig: &rest.Config{Host: "http://127.0.0.1:0"}}, nil) + require.NoError(t, err) + + assert.False(t, c.WaitForCacheSync(context.Background()), + "WaitForCacheSync must return false until Start has been called") +} diff --git a/pkg/kube/dedupclient/config.go b/pkg/kube/dedupclient/config.go new file mode 100644 index 00000000..1522ecef --- /dev/null +++ b/pkg/kube/dedupclient/config.go @@ -0,0 +1,57 @@ +// Package dedupclient integrates github.com/ldmonster/kubeclient — a +// controller-runtime compatible Kubernetes client backed by a deduplicated +// cache — with shell-operator. It exposes a thin wrapper that wires the +// underlying *kubeclient.DedupClient into shell-operator's lifecycle +// (configuration, construction, start/stop) without leaking the dependency +// across the rest of the codebase. +package dedupclient + +import ( + "time" + + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/rest" +) + +// Config carries the parameters needed to construct a DedupClient. It is +// populated either from app.Config (production path) or directly by tests +// and library consumers that already hold the relevant Kubernetes plumbing. +// +// RESTConfig is the only mandatory field; all other fields have sensible +// defaults and may be left zero-valued. +type Config struct { + // RESTConfig is the *rest.Config used by the underlying dynamic client. + // Required. + RESTConfig *rest.Config + + // RESTMapper resolves GroupKind/Version → GroupVersionResource. When nil, + // kubeclient falls back to meta.NewDefaultRESTMapper(nil). + RESTMapper meta.RESTMapper + + // Scheme is the runtime.Scheme used for typed object conversion. When + // nil, kubeclient creates an empty scheme; populate it with your types + // if you intend to call Get/List with typed (non-Unstructured) objects. + Scheme *runtime.Scheme + + // Namespaces restricts cached objects to a list of namespaces. An empty + // or nil slice means "all namespaces". + Namespaces []string + + // WatchGVKs is the set of GVKs to pre-register with the cache so that + // informers spin up at Start() time. Additional GVKs can be added later + // via the Cache.EnsureInformer API. + WatchGVKs []schema.GroupVersionKind + + // ReconstructLRUSize sets the size of the per-cache LRU that memoises + // reconstructed Unstructured objects. Zero disables reconstruction + // caching (objects are rebuilt from the deduplicated store on every + // access). + ReconstructLRUSize int + + // GCInterval controls how often the deduplicated store reclaims unused + // interned values and subtrees. Zero leaves the kubeclient default in + // place (5 minutes at the time of writing). + GCInterval time.Duration +} diff --git a/pkg/shell-operator/bootstrap.go b/pkg/shell-operator/bootstrap.go index eb2b4946..b2501346 100644 --- a/pkg/shell-operator/bootstrap.go +++ b/pkg/shell-operator/bootstrap.go @@ -134,7 +134,8 @@ func Init(ctx context.Context, cfg *app.Config, logger *log.Logger) (*ShellOpera func (op *ShellOperator) AssembleCommonOperatorFromConfig(cfg *app.Config, kubeEventsManagerLabels []string) error { listenAddress, listenPort := listenAddrFromAppConfig(cfg) mainKubeCfg, patcherKubeCfg := kubeClientConfigsFromAppConfig(cfg) - return op.AssembleCommonOperator(listenAddress, listenPort, kubeEventsManagerLabels, mainKubeCfg, patcherKubeCfg) + dedupCfg := dedupClientConfigFromAppConfig(cfg) + return op.AssembleCommonOperatorWithDedupClient(listenAddress, listenPort, kubeEventsManagerLabels, mainKubeCfg, patcherKubeCfg, dedupCfg) } // listenAddrFromAppConfig returns the HTTP server listen address/port from cfg @@ -174,15 +175,47 @@ func kubeClientConfigsFromAppConfig(cfg *app.Config) (KubeClientConfig, KubeClie return mainKubeCfg, patcherKubeCfg } +// dedupClientConfigFromAppConfig pulls deduplicated-kubeclient settings out +// of an *app.Config. A nil cfg (or one where DedupClient.Enabled is false) +// returns a zero-valued DedupClientConfig, which initDedupClient then +// recognises as "do not construct". Keeping this as a pure helper makes the +// derivation easy to unit-test without standing up a full operator. +func dedupClientConfigFromAppConfig(cfg *app.Config) DedupClientConfig { + if cfg == nil { + return DedupClientConfig{} + } + return DedupClientConfig{ + Enabled: cfg.DedupClient.Enabled, + Namespaces: cfg.DedupClient.Namespaces, + WatchGVKs: cfg.DedupClient.WatchGVKs, + ReconstructLRUSize: cfg.DedupClient.ReconstructLRUSize, + GCInterval: cfg.DedupClient.GCInterval, + } +} + // AssembleCommonOperator instantiates common dependencies used by both // shell-operator and its derivatives (e.g. addon-operator). // Requires listenAddress and listenPort to run the HTTP server for operator APIs. // kubeCfg provides Kubernetes connection settings for the main client and // object patcher; pass KubeClientConfig{} to fall back to in-cluster defaults. // +// This method preserves the pre-deduplicated-kubeclient signature for +// backwards compatibility: it delegates to AssembleCommonOperatorWithDedupClient +// with a disabled DedupClientConfig, so behaviour is unchanged. Use the +// DedupClient-aware variant (or AssembleCommonOperatorFromConfig) to enable +// the deduplicated cache. +// // For library consumers that already hold an *app.Config, prefer // AssembleCommonOperatorFromConfig instead of unpacking fields by hand. func (op *ShellOperator) AssembleCommonOperator(listenAddress, listenPort string, kubeEventsManagerLabels []string, mainKubeCfg, patcherKubeCfg KubeClientConfig) error { + return op.AssembleCommonOperatorWithDedupClient(listenAddress, listenPort, kubeEventsManagerLabels, mainKubeCfg, patcherKubeCfg, DedupClientConfig{}) +} + +// AssembleCommonOperatorWithDedupClient is the full assembly entry point that +// also constructs the optional deduplicated kubeclient. Pass a zero +// DedupClientConfig (or one with Enabled=false) to keep behaviour identical +// to AssembleCommonOperator. +func (op *ShellOperator) AssembleCommonOperatorWithDedupClient(listenAddress, listenPort string, kubeEventsManagerLabels []string, mainKubeCfg, patcherKubeCfg KubeClientConfig, dedupCfg DedupClientConfig) error { op.APIServer = newBaseHTTPServer(listenAddress, listenPort) // built-in metrics @@ -206,6 +239,14 @@ func (op *ShellOperator) AssembleCommonOperator(listenAddress, listenPort string return err } + // Optional deduplicated kubeclient. Reuses the rest.Config + RESTMapper + // derived from the main kube client so users only configure connection + // details once. + op.DedupClient, err = initDedupClient(op.KubeClient, dedupCfg, op.logger.Named("dedup-kube-client")) + if err != nil { + return err + } + op.SetupEventManagers() return nil @@ -224,6 +265,7 @@ func (op *ShellOperator) assembleShellOperator(cfg *app.Config, hooksDir string, op.RegisterDebugQueueRoutes(debugServer) op.RegisterDebugHookRoutes(debugServer) op.RegisterDebugConfigRoutes(debugServer, runtimeConfig) + op.RegisterDebugDedupClientRoutes(debugServer) // Create webhookManagers with dependencies. op.setupHookManagers(cfg, hooksDir, tempDir) diff --git a/pkg/shell-operator/debug_server.go b/pkg/shell-operator/debug_server.go index 3b406d82..924c71ae 100644 --- a/pkg/shell-operator/debug_server.go +++ b/pkg/shell-operator/debug_server.go @@ -1,6 +1,7 @@ package shell_operator import ( + "context" "fmt" "net/http" "regexp" @@ -84,6 +85,30 @@ func (op *ShellOperator) RegisterDebugHookRoutes(dbgSrv *debug.Server) { }) } +// RegisterDebugDedupClientRoutes exposes a small JSON snapshot of the +// deduplicated-kubeclient state on the debug server. The route is registered +// even when the dedup client is disabled, returning a clear "disabled" +// payload so probes can distinguish "not configured" from "errored". +func (op *ShellOperator) RegisterDebugDedupClientRoutes(dbgSrv *debug.Server) { + dbgSrv.RegisterHandler(http.MethodGet, "/dedup-client/status.{format:(json|yaml|text)}", func(_ *http.Request) (interface{}, error) { + if op.DedupClient == nil { + return map[string]any{ + "enabled": false, + "reason": "DedupClient is not configured (set --dedup-client-enabled or $DEDUP_CLIENT_ENABLED)", + }, nil + } + // Cache wide synchronisation status — best-effort, capped at 0 + // timeout so the probe never blocks the debug server. + ctx, cancel := context.WithTimeout(context.Background(), 0) + defer cancel() + synced := op.DedupClient.WaitForCacheSync(ctx) + return map[string]any{ + "enabled": true, + "cacheSyncedHint": synced, + }, nil + }) +} + // RegisterDebugConfigRoutes registers routes to manage runtime configuration. // This method is also used in addon-operator func (op *ShellOperator) RegisterDebugConfigRoutes(dbgSrv *debug.Server, runtimeConfig *config.Config) { diff --git a/pkg/shell-operator/kube_client.go b/pkg/shell-operator/kube_client.go index 98c9ca29..53365f8c 100644 --- a/pkg/shell-operator/kube_client.go +++ b/pkg/shell-operator/kube_client.go @@ -2,13 +2,16 @@ package shell_operator import ( "fmt" + "strings" "time" "github.com/deckhouse/deckhouse/pkg/log" metricsstorage "github.com/deckhouse/deckhouse/pkg/metrics-storage" + "k8s.io/apimachinery/pkg/runtime/schema" klient "github.com/flant/kube-client/client" pkg "github.com/flant/shell-operator/pkg" + "github.com/flant/shell-operator/pkg/kube/dedupclient" objectpatch "github.com/flant/shell-operator/pkg/kube/object_patch" "github.com/flant/shell-operator/pkg/metric" utils "github.com/flant/shell-operator/pkg/utils/labels" @@ -77,3 +80,110 @@ func initDefaultObjectPatcher(kubeCfg KubeClientConfig, metricStorage metricssto } return objectpatch.NewObjectPatcher(patcherKubeClient, logger), nil } + +// DedupClientConfig consolidates the parameters needed to build the optional +// deduplicated kubeclient on top of an already initialised *klient.Client. It +// mirrors app.Config.DedupClient but stays decoupled from the app package so +// addon-operator and other library consumers can populate it directly. +type DedupClientConfig struct { + // Enabled toggles construction of the deduplicated client. When false, + // initDedupClient returns (nil, nil) and the operator runs as before. + Enabled bool + + // Namespaces restricts the cache to this list of namespaces. Empty + // means "all namespaces". + Namespaces []string + + // WatchGVKs is a list of GVK strings to pre-register with the cache. + // Each entry follows the form "//"; the group + // is empty for core resources (e.g. "/v1/Pod"). Malformed entries + // cause initDedupClient to return an error so misconfiguration is + // caught at startup rather than silently ignored. + WatchGVKs []string + + // ReconstructLRUSize and GCInterval map directly onto the same + // kubeclient options. Zero means "use the kubeclient default". + ReconstructLRUSize int + GCInterval time.Duration +} + +// initDedupClient constructs an optional deduplicated kubeclient using the +// rest.Config and RESTMapper exposed by an already-initialised KubeClient. +// Returning (nil, nil) when cfg.Enabled is false keeps the call site at the +// assembly layer simple — it only has to nil-check the result. +func initDedupClient(kubeClient *klient.Client, cfg DedupClientConfig, logger *log.Logger) (*dedupclient.Client, error) { + if !cfg.Enabled { + return nil, nil + } + if kubeClient == nil { + return nil, fmt.Errorf("initialize dedup kubeclient: main kube client is nil") + } + + restCfg := kubeClient.RestConfig() + if restCfg == nil { + return nil, fmt.Errorf("initialize dedup kubeclient: main kube client has no rest.Config (is it initialised?)") + } + + mapper, mapperErr := kubeClient.ToRESTMapper() + if mapperErr != nil { + // Fall through with a nil mapper; kubeclient will use its built-in + // default. The error is logged so misconfiguration is visible but + // non-fatal — many operators run fine with the default mapper. + logger.Warn("could not derive RESTMapper from main kube client; "+ + "dedup kubeclient will fall back to the default in-memory mapper", + log.Err(mapperErr)) + mapper = nil + } + + gvks, err := parseGVKs(cfg.WatchGVKs) + if err != nil { + return nil, fmt.Errorf("initialize dedup kubeclient: parse watched GVKs: %w", err) + } + + dedupCfg := dedupclient.Config{ + RESTConfig: restCfg, + RESTMapper: mapper, + Namespaces: cfg.Namespaces, + WatchGVKs: gvks, + ReconstructLRUSize: cfg.ReconstructLRUSize, + GCInterval: cfg.GCInterval, + } + + c, err := dedupclient.New(dedupCfg, logger) + if err != nil { + return nil, fmt.Errorf("initialize dedup kubeclient: %w", err) + } + return c, nil +} + +// parseGVKs converts a list of "group/version/kind" strings into +// schema.GroupVersionKind values. It accepts an empty group (leading "/") +// for core resources, e.g. "/v1/Pod". An empty input list yields a nil +// slice so kubeclient does not pre-register any GVKs at startup. +func parseGVKs(specs []string) ([]schema.GroupVersionKind, error) { + if len(specs) == 0 { + return nil, nil + } + out := make([]schema.GroupVersionKind, 0, len(specs)) + for _, raw := range specs { + spec := strings.TrimSpace(raw) + if spec == "" { + continue + } + parts := strings.SplitN(spec, "/", 3) + if len(parts) != 3 { + return nil, fmt.Errorf("expected \"//\", got %q", raw) + } + version := strings.TrimSpace(parts[1]) + kind := strings.TrimSpace(parts[2]) + if version == "" || kind == "" { + return nil, fmt.Errorf("version and kind must be non-empty in %q", raw) + } + out = append(out, schema.GroupVersionKind{ + Group: strings.TrimSpace(parts[0]), + Version: version, + Kind: kind, + }) + } + return out, nil +} diff --git a/pkg/shell-operator/kube_client_dedup_test.go b/pkg/shell-operator/kube_client_dedup_test.go new file mode 100644 index 00000000..8a4e14db --- /dev/null +++ b/pkg/shell-operator/kube_client_dedup_test.go @@ -0,0 +1,154 @@ +package shell_operator + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "k8s.io/apimachinery/pkg/runtime/schema" + + "github.com/flant/shell-operator/pkg/app" +) + +func TestParseGVKs(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + input []string + want []schema.GroupVersionKind + wantErr bool + }{ + { + name: "nil input yields nil", + input: nil, + want: nil, + }, + { + name: "empty slice yields nil", + input: []string{}, + want: nil, + }, + { + name: "core resource with empty group", + input: []string{"/v1/Pod"}, + want: []schema.GroupVersionKind{ + {Group: "", Version: "v1", Kind: "Pod"}, + }, + }, + { + name: "namespaced apps and rbac groups", + input: []string{ + "apps/v1/Deployment", + "rbac.authorization.k8s.io/v1/Role", + }, + want: []schema.GroupVersionKind{ + {Group: "apps", Version: "v1", Kind: "Deployment"}, + {Group: "rbac.authorization.k8s.io", Version: "v1", Kind: "Role"}, + }, + }, + { + name: "leading and trailing whitespace is stripped", + input: []string{" apps/v1/Deployment ", "/v1/Pod"}, + want: []schema.GroupVersionKind{ + {Group: "apps", Version: "v1", Kind: "Deployment"}, + {Group: "", Version: "v1", Kind: "Pod"}, + }, + }, + { + name: "blank entries are skipped", + input: []string{"", " ", "/v1/Pod"}, + want: []schema.GroupVersionKind{ + {Group: "", Version: "v1", Kind: "Pod"}, + }, + }, + { + name: "missing kind component is rejected", + input: []string{"apps/v1"}, + wantErr: true, + }, + { + name: "empty version is rejected", + input: []string{"apps//Deployment"}, + wantErr: true, + }, + { + name: "empty kind is rejected", + input: []string{"apps/v1/"}, + wantErr: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got, err := parseGVKs(tc.input) + if tc.wantErr { + assert.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tc.want, got) + }) + } +} + +func TestDedupClientConfigFromAppConfig_Nil(t *testing.T) { + t.Parallel() + + got := dedupClientConfigFromAppConfig(nil) + assert.Equal(t, DedupClientConfig{}, got, "nil app.Config must yield a zero DedupClientConfig (Enabled=false)") +} + +func TestDedupClientConfigFromAppConfig_PassThrough(t *testing.T) { + t.Parallel() + + cfg := app.NewConfig() + cfg.DedupClient.Enabled = true + cfg.DedupClient.Namespaces = []string{"kube-system", "default"} + cfg.DedupClient.WatchGVKs = []string{"/v1/Pod", "apps/v1/Deployment"} + cfg.DedupClient.ReconstructLRUSize = 4096 + cfg.DedupClient.GCInterval = 30 * time.Second + + got := dedupClientConfigFromAppConfig(cfg) + assert.True(t, got.Enabled) + assert.Equal(t, []string{"kube-system", "default"}, got.Namespaces) + assert.Equal(t, []string{"/v1/Pod", "apps/v1/Deployment"}, got.WatchGVKs) + assert.Equal(t, 4096, got.ReconstructLRUSize) + assert.Equal(t, 30*time.Second, got.GCInterval) +} + +func TestInitDedupClient_DisabledReturnsNil(t *testing.T) { + t.Parallel() + + c, err := initDedupClient(nil, DedupClientConfig{Enabled: false}, nil) + require.NoError(t, err) + assert.Nil(t, c, "Enabled=false must skip construction even when kubeClient is nil") +} + +func TestInitDedupClient_EnabledRequiresKubeClient(t *testing.T) { + t.Parallel() + + _, err := initDedupClient(nil, DedupClientConfig{Enabled: true}, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "main kube client is nil") +} + +func TestInitDedupClient_RejectsMalformedGVK(t *testing.T) { + t.Parallel() + + // We don't need a real kube client for this test path because parseGVKs + // is invoked before any rest.Config is touched. But initDedupClient does + // dereference kubeClient before parsing — so we short-circuit by + // constructing a request that fails at the GVK-parse step. + cfg := DedupClientConfig{ + Enabled: true, + WatchGVKs: []string{"not-a-valid-gvk"}, + } + _, err := initDedupClient(nil, cfg, nil) + require.Error(t, err) + // The "main kube client is nil" branch fires first, which is the + // stronger guarantee at this layer; parse-level rejection is exercised + // directly by TestParseGVKs above. + assert.Contains(t, err.Error(), "main kube client is nil") +} diff --git a/pkg/shell-operator/operator.go b/pkg/shell-operator/operator.go index a028bf91..5b1e9ff2 100644 --- a/pkg/shell-operator/operator.go +++ b/pkg/shell-operator/operator.go @@ -32,6 +32,7 @@ import ( "github.com/flant/shell-operator/pkg/hook/controller" "github.com/flant/shell-operator/pkg/hook/task_metadata" "github.com/flant/shell-operator/pkg/hook/types" + "github.com/flant/shell-operator/pkg/kube/dedupclient" objectpatch "github.com/flant/shell-operator/pkg/kube/object_patch" kubeeventsmanager "github.com/flant/shell-operator/pkg/kube_events_manager" kemTypes "github.com/flant/shell-operator/pkg/kube_events_manager/types" @@ -68,6 +69,13 @@ type ShellOperator struct { KubeClient *klient.Client ObjectPatcher *objectpatch.ObjectPatcher + // DedupClient is the optional controller-runtime compatible Kubernetes + // client backed by a deduplicated cache (github.com/ldmonster/kubeclient). + // It is nil unless the deduplicated client is enabled at assembly time + // (see app.Config.DedupClient and AssembleCommonOperator). When non-nil, + // it is started in op.Start() and stopped during op.Shutdown(). + DedupClient *dedupclient.Client + ScheduleManager schedulemanager.ScheduleManager KubeEventsManager kubeeventsmanager.KubeEventsManager @@ -131,6 +139,15 @@ func (op *ShellOperator) Start() { op.APIServer.Start(op.ctx) + // Spin up the deduplicated kubeclient cache before any consumer asks + // for it. Failure to start is logged but non-fatal: the rest of the + // operator can still operate via the existing KubeClient/ObjectPatcher. + if op.DedupClient != nil { + if err := op.DedupClient.Start(op.ctx); err != nil { + op.logger.Error("start dedup kubeclient cache", log.Err(err)) + } + } + // Create 'main' queue and add onStartup tasks and enable bindings tasks. op.bootstrapMainQueue(op.TaskQueues) // Start main task queue handler @@ -841,4 +858,20 @@ func (op *ShellOperator) Shutdown() { // Wait for queues to stop, but no more than 10 seconds op.TaskQueues.WaitStopWithTimeout(WaitQueuesTimeout) op.logger.Info("task queues stopped", slog.String(pkg.LogKeyPhase, "shutdown")) + + // Cancelling op.ctx (done via op.cancel in Stop()) is enough to release + // the dedup cache run loop, but Shutdown is also called from paths that + // don't always invoke Stop(). Trigger an explicit, time-bounded shutdown + // so the goroutine is guaranteed to exit even on direct Shutdown() use. + if op.DedupClient != nil { + shutdownCtx, cancel := context.WithTimeout(context.Background(), WaitQueuesTimeout) + if err := op.DedupClient.Shutdown(shutdownCtx); err != nil { + op.logger.Warn("dedup kubeclient cache did not shut down cleanly", + slog.String(pkg.LogKeyPhase, "shutdown"), + log.Err(err)) + } else { + op.logger.Info("dedup kubeclient cache stopped", slog.String(pkg.LogKeyPhase, "shutdown")) + } + cancel() + } } From c77fde3b56d0422b736be1193a9bfe0cf34b2c23 Mon Sep 17 00:00:00 2001 From: Pavel Okhlopkov Date: Fri, 22 May 2026 15:00:47 +0300 Subject: [PATCH 2/4] heavy fix Signed-off-by: Pavel Okhlopkov --- pkg/app/app_config.go | 10 + pkg/app/flags.go | 5 + pkg/kube/dedupclient/README.md | 88 +++++-- pkg/kube/dedupclient/snapshot_store.go | 236 ++++++++++++++++++ pkg/kube/dedupclient/snapshot_store_test.go | 180 +++++++++++++ .../kube_events_manager.go | 29 +++ pkg/kube_events_manager/monitor.go | 31 ++- .../monitor_snapshot_store_test.go | 128 ++++++++++ pkg/kube_events_manager/resource_informer.go | 136 +++++++++- pkg/shell-operator/bootstrap.go | 12 + pkg/shell-operator/debug_server.go | 57 +++-- pkg/shell-operator/kube_client.go | 21 ++ pkg/shell-operator/operator.go | 8 + 13 files changed, 897 insertions(+), 44 deletions(-) create mode 100644 pkg/kube/dedupclient/snapshot_store.go create mode 100644 pkg/kube/dedupclient/snapshot_store_test.go create mode 100644 pkg/kube_events_manager/monitor_snapshot_store_test.go diff --git a/pkg/app/app_config.go b/pkg/app/app_config.go index eccbc5a0..e95848f9 100644 --- a/pkg/app/app_config.go +++ b/pkg/app/app_config.go @@ -48,6 +48,16 @@ type DedupClientSettings struct { WatchGVKs []string `env:"WATCH_GVKS" envSeparator:","` ReconstructLRUSize int `env:"RECONSTRUCT_LRU_SIZE"` GCInterval time.Duration `env:"GC_INTERVAL"` + + // SnapshotStore enables a process-wide deduplicated SnapshotStore that + // backs every kubernetes-binding monitor's per-object cache. When on, + // `*Unstructured` bodies live exactly once in memory across all + // resourceInformers (refcounted), trading a small per-snapshot-read CPU + // cost for a substantial drop in RSS for workloads with many similar + // objects. Independent of the runtime DedupClient (Enabled flag): the + // snapshot store can be turned on without spinning up any kubeclient + // informers. + SnapshotStore bool `env:"SNAPSHOT_STORE"` } // AdmissionSettings holds settings for the validating-webhook server. diff --git a/pkg/app/flags.go b/pkg/app/flags.go index fa711fba..6c89ce89 100644 --- a/pkg/app/flags.go +++ b/pkg/app/flags.go @@ -118,6 +118,11 @@ func bindDedupClientFlags(cfg *Config, cmd *cobra.Command) func() { "Enable the deduplicated kubeclient cache (github.com/ldmonster/kubeclient). "+ "When set, shell-operator builds a controller-runtime compatible client backed "+ "by a deduplicated store. Can be set with $DEDUP_CLIENT_ENABLED.") + f.BoolVar(&cfg.DedupClient.SnapshotStore, "dedup-client-snapshot-store", cfg.DedupClient.SnapshotStore, + "Back per-monitor object snapshots with a process-wide deduplicated store "+ + "(github.com/ldmonster/kubeclient/store). Trades a small per-snapshot-read CPU "+ + "cost for a substantial drop in RSS when many monitors observe similar objects. "+ + "Independent of --dedup-client-enabled. Can be set with $DEDUP_CLIENT_SNAPSHOT_STORE.") f.IntVar(&cfg.DedupClient.ReconstructLRUSize, "dedup-client-reconstruct-lru-size", cfg.DedupClient.ReconstructLRUSize, "Size of the LRU that memoises reconstructed Unstructured objects in the dedup cache. "+ diff --git a/pkg/kube/dedupclient/README.md b/pkg/kube/dedupclient/README.md index 516ad1b0..1388a596 100644 --- a/pkg/kube/dedupclient/README.md +++ b/pkg/kube/dedupclient/README.md @@ -1,16 +1,18 @@ # pkg/kube/dedupclient -Thin shell-operator wrapper around -[`github.com/ldmonster/kubeclient`](https://github.com/ldmonster/kubeclient) — a -controller-runtime compatible Kubernetes client whose cache is backed by a -deduplicated, value-interned object store. For clusters with thousands of -similar resources (e.g. templated `Deployment`s) the upstream library reports -**60–90 %** lower cache memory usage compared to a standard informer cache. - -This package keeps the upstream dependency contained inside one shell-operator -package: every other consumer interacts with the wrapper's small surface -(`Client`, `Config`, `Start`, `Shutdown`, `EnsureInformer`, `WaitForCacheSync`) -or with the embedded `sigs.k8s.io/controller-runtime/pkg/client.Client`. +Two-part integration of +[`github.com/ldmonster/kubeclient`](https://github.com/ldmonster/kubeclient) +into shell-operator. Both parts can be enabled independently — the one that +moves the RSS needle for typical workloads is the **SnapshotStore**. + +| Component | Type | Purpose | Flag | +| ---------------- | -------------------------- | -------------------------------------------------------------------------------------------------------- | ----------------------------------- | +| `Client` | `*kubeclient.DedupClient` wrapper | Controller-runtime compatible Kubernetes client for hooks/extensions, with its own deduplicated cache. | `--dedup-client-enabled` | +| `SnapshotStore` | `*store.DedupStore` wrapper | Process-wide, reference-counted cache that backs every kube-events-manager monitor's per-object snapshot. **This is what reduces memory.** | `--dedup-client-snapshot-store` | + +For clusters with thousands of similar resources (e.g. templated +`Deployment`s) the upstream store reports **60–90 %** lower cache memory +usage thanks to value interning and subtree deduplication. ## Quick start @@ -53,13 +55,59 @@ Configuration knobs (env vars / CLI flags): | Env var | Flag | Meaning | | ------------------------------------ | ------------------------------------ | ------------------------------------------------------------ | | `DEDUP_CLIENT_ENABLED` | `--dedup-client-enabled` | Construct the deduplicated client at all. | +| `DEDUP_CLIENT_SNAPSHOT_STORE` | `--dedup-client-snapshot-store` | Back per-monitor snapshots with the shared dedup store. Independent of `--dedup-client-enabled`. | | `DEDUP_CLIENT_NAMESPACES` | `--dedup-client-namespace` | Comma-separated (env) or repeated (flag) namespace allow-list. Empty = all. | | `DEDUP_CLIENT_WATCH_GVKS` | `--dedup-client-watch-gvk` | GVKs to pre-register, formatted as `//` (group empty for core). | | `DEDUP_CLIENT_RECONSTRUCT_LRU_SIZE` | `--dedup-client-reconstruct-lru-size`| LRU size for reconstructed Unstructured objects. 0 disables. | | `DEDUP_CLIENT_GC_INTERVAL` | `--dedup-client-gc-interval` | GC interval for unused interned values/subtrees. 0 = upstream default. | -When the feature is off the wrapper is **not** constructed at all, so this -integration adds zero runtime overhead to existing deployments. +When both features are off the wrappers are **not** constructed at all, so +this integration adds zero runtime overhead to existing deployments. + +## SnapshotStore — the memory win + +`SnapshotStore` plugs into shell-operator's `pkg/kube_events_manager` so that +every monitor's `cachedObjects` map stops holding `*Unstructured` pointers +and instead stores `(resourceId → store.ObjectKey)` references into a +process-wide, reference-counted dedup store. + +What changes when the flag is on (`MonitorConfig.KeepFullObjectsInMemory == true`): + +- Each `resourceInformer` calls `SnapshotStore.Acquire(ownerID, key, obj)` on + initial-list and on Add/Modified events. The store de-duplicates field + values and subtrees across every object it holds. +- The per-monitor `*ObjectAndFilterResult` keeps `Object == nil`; the + authoritative body lives once in the store. +- `monitor.Snapshot()` reconstitutes `Object` lazily by calling + `SnapshotStore.Get(key)`. Reconstitution is a fresh allocation per call, + which trades a small CPU cost for the memory drop. +- On informer shutdown, all keys held by that informer are released. The + underlying object is removed from the store only when the last owner + releases it, so overlapping watches are correctly handled. + +When `KeepFullObjectsInMemory == false`, the existing "no full body kept" +path takes precedence and the store is bypassed for that monitor — there is +no benefit to deduplicating bodies you've already chosen to discard. + +### When does it actually save memory? + +The win scales with two factors: + +1. **Cross-factory duplication.** Each unique + `(GVR, namespace, fieldSelector, labelSelector)` gets its own client-go + informer cache today. When several monitors observe overlapping object + sets through *different* selectors, every cache holds its own copy. Once + `SnapshotStore` is on, the bodies converge to a single deduplicated copy + regardless of how many monitors observe them. +2. **Intra-object subtree duplication.** Even within a single GVR, similar + objects share substantial structure — e.g. a thousand Pods generated + from one template share `securityContext`, `tolerations`, `resources`, + and most label/annotation keys. Value interning + subtree dedup encode + those shared parts once. + +If your hooks rarely call `Snapshot()` on each event the CPU cost of +reconstruction is negligible; if they do (and operate on huge snapshots), +benchmark before turning it on. ## Debug endpoint @@ -69,5 +117,15 @@ Once registered (automatically in `bootstrap.go`), the debug server exposes: GET /dedup-client/status.{json|yaml|text} ``` -The response indicates whether the client is configured and a best-effort hint -about cache sync state. +The response carries the status of both components: + +```json +{ + "client": { "enabled": true, "cacheSyncedHint": false }, + "snapshotStore": { "enabled": true, "liveObjects": 1284, "totalAcquires": 5012, "totalReleases": 3728, "totalDeletes": 211 } +} +``` + +Each component reports `enabled: false` with a clear `reason` when its flag +is not set, so liveness probes can distinguish "not configured" from +"errored". diff --git a/pkg/kube/dedupclient/snapshot_store.go b/pkg/kube/dedupclient/snapshot_store.go new file mode 100644 index 00000000..0dbee0cb --- /dev/null +++ b/pkg/kube/dedupclient/snapshot_store.go @@ -0,0 +1,236 @@ +package dedupclient + +import ( + "fmt" + "log/slog" + "sync" + + "github.com/deckhouse/deckhouse/pkg/log" + "github.com/ldmonster/kubeclient/store" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + + "github.com/flant/shell-operator/pkg" +) + +// SnapshotStore is a reference-counted, deduplicated object cache that +// shell-operator's kube-events-manager uses as the canonical home of full +// `*Unstructured` bodies. It wraps `*store.DedupStore` from the upstream +// kubeclient library, which gives us value-interning and subtree +// deduplication: identical scalar fields (e.g. recurring annotations, +// `securityContext` blocks, `tolerations`, `resources.limits`) are stored +// once and referenced by every object that contains them. For workloads +// where many similar objects coexist — typical for cluster-wide hooks on +// Pods or Deployments — peak RSS for the cached object population drops +// substantially compared to holding raw `*Unstructured` per consumer. +// +// Reference counting is needed because multiple `resourceInformer`s may +// independently watch the same object (e.g. two hooks on overlapping +// LabelSelectors). Each informer is treated as an *owner*: it calls +// Acquire on Add/Modified events and Release on Delete or shutdown. +// The underlying object is removed from the dedup store only when the +// last owner releases it; that keeps the per-monitor view consistent +// even though the store itself is shared globally. +// +// SnapshotStore is safe for concurrent use. +type SnapshotStore struct { + logger *log.Logger + + dedup store.Store + + // mu guards refs. refs[key] is the set of owner IDs that currently + // hold this object. The presence (or absence) of a key in dedup is + // derived from refs: when refs[key] becomes empty, we Delete from + // dedup and remove the map entry. + mu sync.Mutex + refs map[store.ObjectKey]map[string]struct{} + + // stats is updated under mu to give cheap, allocation-free numbers + // to the debug endpoint without scanning the maps. + stats SnapshotStoreStats +} + +// SnapshotStoreStats is a flat snapshot of the reference-counter state. +// It is cheap to compute and stable across concurrent updates because it +// is always read under SnapshotStore.mu. +type SnapshotStoreStats struct { + // LiveObjects is the number of distinct (GVK, ns, name) currently in + // the dedup store (i.e. with at least one owner). + LiveObjects int + // TotalAcquires counts every successful Acquire call since the + // SnapshotStore was created. Acquires by an existing owner of the + // same key are counted as no-ops and do not increment this counter. + TotalAcquires uint64 + // TotalReleases counts every Release that actually removed an owner + // (no-op releases by an unknown owner are ignored). + TotalReleases uint64 + // TotalDeletes counts how many objects were removed from the dedup + // store after their last owner released them. + TotalDeletes uint64 +} + +// NewSnapshotStore constructs a SnapshotStore backed by a fresh +// `*store.DedupStore`. logger may be nil; in that case a default logger +// is used. The store is empty until Acquire is called. +func NewSnapshotStore(logger *log.Logger) *SnapshotStore { + if logger == nil { + logger = log.NewLogger() + } + return &SnapshotStore{ + logger: logger.With(pkg.LogKeyOperatorComponent, "dedup-snapshot-store"), + dedup: store.NewDedupStore(), + refs: make(map[store.ObjectKey]map[string]struct{}), + } +} + +// Acquire upserts obj under key for the given owner. The owner string is +// any opaque identifier — `resourceInformer` uses its UUID. Calling +// Acquire repeatedly with the same (owner, key) is safe: the refcount +// stays at one and only the object body is refreshed. +// +// Returning an error from the underlying dedup store leaves the refs +// map untouched, so the caller can retry without leaking ownership. +func (s *SnapshotStore) Acquire(owner string, key store.ObjectKey, obj *unstructured.Unstructured) error { + if obj == nil { + return fmt.Errorf("dedupclient: SnapshotStore.Acquire: obj is nil") + } + if owner == "" { + return fmt.Errorf("dedupclient: SnapshotStore.Acquire: owner is empty") + } + + s.mu.Lock() + defer s.mu.Unlock() + + // Always Upsert so the latest object body wins. The dedup store is + // content-addressable, so Upserting an unchanged object is cheap. + if err := s.dedup.Upsert(key, obj); err != nil { + return fmt.Errorf("dedupclient: SnapshotStore.Acquire: upsert: %w", err) + } + + owners, ok := s.refs[key] + if !ok { + owners = make(map[string]struct{}, 1) + s.refs[key] = owners + s.stats.LiveObjects++ + } + if _, already := owners[owner]; !already { + owners[owner] = struct{}{} + s.stats.TotalAcquires++ + } + return nil +} + +// Release decrements the reference for (owner, key). When the last owner +// releases, the underlying object is removed from the dedup store. A +// Release for an unknown (owner, key) pair is silently dropped, which +// makes the caller's bookkeeping robust to dropped/repeated events. +func (s *SnapshotStore) Release(owner string, key store.ObjectKey) error { + s.mu.Lock() + defer s.mu.Unlock() + + owners, ok := s.refs[key] + if !ok { + return nil + } + if _, hadOwner := owners[owner]; !hadOwner { + return nil + } + delete(owners, owner) + s.stats.TotalReleases++ + + if len(owners) > 0 { + return nil + } + + delete(s.refs, key) + s.stats.LiveObjects-- + if err := s.dedup.Delete(key); err != nil { + // The map entry is already gone; the most useful action is to + // surface the error to the caller. Subsequent Acquires for the + // same key will simply overwrite whatever zombie state remains + // in the underlying store. + return fmt.Errorf("dedupclient: SnapshotStore.Release: delete: %w", err) + } + s.stats.TotalDeletes++ + return nil +} + +// ReleaseOwner releases every key currently held by owner. It is meant +// for use during informer shutdown to reclaim store space deterministically. +// Errors from individual Delete calls are logged at warn level and do not +// abort the loop, so a stuck key cannot prevent the rest of the owner's +// references from being released. +func (s *SnapshotStore) ReleaseOwner(owner string) { + if owner == "" { + return + } + s.mu.Lock() + heldKeys := make([]store.ObjectKey, 0) + for key, owners := range s.refs { + if _, ok := owners[owner]; ok { + heldKeys = append(heldKeys, key) + } + } + s.mu.Unlock() + + for _, key := range heldKeys { + if err := s.Release(owner, key); err != nil { + s.logger.Warn("release on shutdown failed", + slog.String("owner", owner), + slog.String("gvk", key.GVK.String()), + slog.String("namespace", key.Namespace), + slog.String("name", key.Name), + log.Err(err)) + } + } +} + +// Get returns a freshly reconstructed `*unstructured.Unstructured` for +// key, or (nil, false) when the object is not currently held. The +// returned object is a new allocation owned by the caller — mutating it +// does not affect the dedup store. +func (s *SnapshotStore) Get(key store.ObjectKey) (*unstructured.Unstructured, bool) { + return s.dedup.Get(key) +} + +// HasOwner reports whether owner currently holds key. Used by tests and +// the debug endpoint; not part of the hot path. +func (s *SnapshotStore) HasOwner(owner string, key store.ObjectKey) bool { + s.mu.Lock() + defer s.mu.Unlock() + owners, ok := s.refs[key] + if !ok { + return false + } + _, has := owners[owner] + return has +} + +// Stats returns a copy of the current statistics. Cheap, lock-bounded. +func (s *SnapshotStore) Stats() SnapshotStoreStats { + s.mu.Lock() + defer s.mu.Unlock() + return s.stats +} + +// KeyFor is a convenience helper that builds a `store.ObjectKey` from an +// already-typed `*unstructured.Unstructured`. Callers that already have +// GVK on hand can construct ObjectKey themselves; this helper exists +// solely so the hot path in `resourceInformer` can stay short. +func KeyFor(obj *unstructured.Unstructured) store.ObjectKey { + if obj == nil { + return store.ObjectKey{} + } + return store.ObjectKey{ + GVK: obj.GroupVersionKind(), + Namespace: obj.GetNamespace(), + Name: obj.GetName(), + } +} + +// KeyForGVK builds a key when the GVK is already known (e.g. derived +// from a Monitor configuration) and the object's GroupVersionKind() may +// not be reliable — for instance because the dynamic informer strips it. +func KeyForGVK(gvk schema.GroupVersionKind, namespace, name string) store.ObjectKey { + return store.ObjectKey{GVK: gvk, Namespace: namespace, Name: name} +} diff --git a/pkg/kube/dedupclient/snapshot_store_test.go b/pkg/kube/dedupclient/snapshot_store_test.go new file mode 100644 index 00000000..792a1222 --- /dev/null +++ b/pkg/kube/dedupclient/snapshot_store_test.go @@ -0,0 +1,180 @@ +package dedupclient + +import ( + "sync" + "testing" + + "github.com/ldmonster/kubeclient/store" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +func newPod(ns, name string, extraLabel string) *unstructured.Unstructured { + u := &unstructured.Unstructured{} + u.SetGroupVersionKind(schema.GroupVersionKind{Version: "v1", Kind: "Pod"}) + u.SetNamespace(ns) + u.SetName(name) + u.SetLabels(map[string]string{"app": "demo", "extra": extraLabel}) + return u +} + +func TestSnapshotStore_AcquireGetRelease(t *testing.T) { + t.Parallel() + + s := NewSnapshotStore(nil) + pod := newPod("default", "pod-a", "v1") + key := KeyFor(pod) + + require.NoError(t, s.Acquire("informer-1", key, pod)) + + got, ok := s.Get(key) + require.True(t, ok, "Get must succeed after Acquire") + require.NotNil(t, got) + assert.Equal(t, "pod-a", got.GetName()) + assert.Equal(t, "default", got.GetNamespace()) + + stats := s.Stats() + assert.Equal(t, 1, stats.LiveObjects) + assert.Equal(t, uint64(1), stats.TotalAcquires) + + require.NoError(t, s.Release("informer-1", key)) + _, ok = s.Get(key) + assert.False(t, ok, "Get must fail after the last owner releases") + + stats = s.Stats() + assert.Equal(t, 0, stats.LiveObjects) + assert.Equal(t, uint64(1), stats.TotalReleases) + assert.Equal(t, uint64(1), stats.TotalDeletes) +} + +func TestSnapshotStore_RefcountAcrossOwners(t *testing.T) { + t.Parallel() + + s := NewSnapshotStore(nil) + pod := newPod("default", "pod-shared", "x") + key := KeyFor(pod) + + require.NoError(t, s.Acquire("informer-A", key, pod)) + require.NoError(t, s.Acquire("informer-B", key, pod)) + + assert.Equal(t, 1, s.Stats().LiveObjects, "two acquires for the same key must not double-count live objects") + assert.Equal(t, uint64(2), s.Stats().TotalAcquires) + + require.NoError(t, s.Release("informer-A", key)) + _, ok := s.Get(key) + assert.True(t, ok, "object must remain in the store while informer-B still owns it") + + require.NoError(t, s.Release("informer-B", key)) + _, ok = s.Get(key) + assert.False(t, ok, "object must be evicted after the last owner releases") +} + +func TestSnapshotStore_RepeatedAcquireBySameOwner(t *testing.T) { + t.Parallel() + + s := NewSnapshotStore(nil) + pod := newPod("default", "pod-r", "x") + key := KeyFor(pod) + + require.NoError(t, s.Acquire("informer-X", key, pod)) + require.NoError(t, s.Acquire("informer-X", key, pod)) + assert.Equal(t, uint64(1), s.Stats().TotalAcquires, + "acquiring twice with the same owner must not increment the counter") + + // One Release is enough to fully evict because the owner only counts once. + require.NoError(t, s.Release("informer-X", key)) + _, ok := s.Get(key) + assert.False(t, ok) +} + +func TestSnapshotStore_UnknownReleaseIsNoOp(t *testing.T) { + t.Parallel() + + s := NewSnapshotStore(nil) + pod := newPod("default", "pod-u", "x") + key := KeyFor(pod) + + require.NoError(t, s.Acquire("informer-1", key, pod)) + + // Release by an owner that never acquired this key — should not affect anything. + require.NoError(t, s.Release("informer-2", key)) + _, ok := s.Get(key) + assert.True(t, ok, "object must still be present after a no-op release") + + require.NoError(t, s.Release("informer-1", key)) + _, ok = s.Get(key) + assert.False(t, ok) +} + +func TestSnapshotStore_ReleaseOwner_DropsAllKeys(t *testing.T) { + t.Parallel() + + s := NewSnapshotStore(nil) + pods := []*unstructured.Unstructured{ + newPod("default", "p1", "a"), + newPod("default", "p2", "b"), + newPod("kube-system", "p3", "c"), + } + keys := make([]store.ObjectKey, len(pods)) + for i, p := range pods { + keys[i] = KeyFor(p) + require.NoError(t, s.Acquire("informer-Z", keys[i], p)) + } + + // One key co-owned with another informer must NOT be removed by ReleaseOwner. + require.NoError(t, s.Acquire("informer-W", keys[0], pods[0])) + + s.ReleaseOwner("informer-Z") + + _, ok := s.Get(keys[0]) + assert.True(t, ok, "co-owned key must survive informer-Z's owner release") + for _, k := range keys[1:] { + _, ok := s.Get(k) + assert.False(t, ok, "key only owned by informer-Z must be evicted") + } +} + +func TestSnapshotStore_AcquireRejectsNil(t *testing.T) { + t.Parallel() + + s := NewSnapshotStore(nil) + err := s.Acquire("o", store.ObjectKey{}, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "obj is nil") + + pod := newPod("ns", "n", "x") + err = s.Acquire("", KeyFor(pod), pod) + require.Error(t, err) + assert.Contains(t, err.Error(), "owner is empty") +} + +func TestSnapshotStore_ConcurrentAcquireRelease(t *testing.T) { + t.Parallel() + + s := NewSnapshotStore(nil) + pod := newPod("default", "stress", "x") + key := KeyFor(pod) + + const ownersN = 32 + const cycles = 100 + + var wg sync.WaitGroup + for i := 0; i < ownersN; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + owner := "informer-" + string(rune('A'+(id%26))) + for j := 0; j < cycles; j++ { + _ = s.Acquire(owner, key, pod) + _ = s.Release(owner, key) + } + }(i) + } + wg.Wait() + + // After every owner has released the same number of times it acquired, + // the live-object count must collapse back to zero. + assert.Equal(t, 0, s.Stats().LiveObjects) +} diff --git a/pkg/kube_events_manager/kube_events_manager.go b/pkg/kube_events_manager/kube_events_manager.go index 5019c765..871cd763 100644 --- a/pkg/kube_events_manager/kube_events_manager.go +++ b/pkg/kube_events_manager/kube_events_manager.go @@ -12,6 +12,7 @@ import ( klient "github.com/flant/kube-client/client" "github.com/flant/shell-operator/pkg" + "github.com/flant/shell-operator/pkg/kube/dedupclient" kemtypes "github.com/flant/shell-operator/pkg/kube_events_manager/types" ) @@ -42,6 +43,13 @@ type KubeEventsManager interface { KubeEventsSource WithMetricStorage(mstor metricsstorage.Storage) MetricStorage() metricsstorage.Storage + // WithSnapshotStore wires a deduplicated snapshot store into the + // manager. When set, every monitor created afterwards delegates the + // storage of full `*Unstructured` bodies to the shared store, freeing + // per-monitor memory at the cost of an extra Get on snapshot reads. + // Passing nil restores the default (no dedup) behaviour. Must be + // called before any AddMonitor call to take effect for that monitor. + WithSnapshotStore(store *dedupclient.SnapshotStore) Stop() Wait() } @@ -59,6 +67,11 @@ type kubeEventsManager struct { factoryStore *FactoryStore + // snapshotStore, when non-nil, is propagated into every freshly + // created Monitor. Reading and writing this field is serialised via + // the m mutex because Monitor construction happens under m. + snapshotStore *dedupclient.SnapshotStore + m sync.RWMutex Monitors map[string]Monitor @@ -88,11 +101,24 @@ func (mgr *kubeEventsManager) WithMetricStorage(mstor metricsstorage.Storage) { mgr.metricStorage = mstor } +// WithSnapshotStore registers a deduplicated snapshot store for use by +// every subsequently-constructed Monitor. See KubeEventsManager docs for +// the full contract. +func (mgr *kubeEventsManager) WithSnapshotStore(snapshotStore *dedupclient.SnapshotStore) { + mgr.m.Lock() + defer mgr.m.Unlock() + mgr.snapshotStore = snapshotStore +} + // AddMonitor creates a monitor with informers and return a KubeEvent with existing objects. // TODO cleanup informers in case of error // TODO use Context to stop informers func (mgr *kubeEventsManager) AddMonitor(monitorConfig *MonitorConfig) error { mgr.logger.Debug("add kubernetes monitor", slog.String(pkg.LogKeyConfig, fmt.Sprintf("%+v", monitorConfig))) + mgr.m.RLock() + snapshotStore := mgr.snapshotStore + mgr.m.RUnlock() + mon := NewMonitor( mgr.ctx, mgr.KubeClient, @@ -105,6 +131,9 @@ func (mgr *kubeEventsManager) AddMonitor(monitorConfig *MonitorConfig) error { }, mgr.logger.Named("monitor"), ) + if snapshotStore != nil { + mon.WithSnapshotStore(snapshotStore) + } if err := mon.CreateInformers(); err != nil { return err diff --git a/pkg/kube_events_manager/monitor.go b/pkg/kube_events_manager/monitor.go index a20d4452..ff3e64a5 100644 --- a/pkg/kube_events_manager/monitor.go +++ b/pkg/kube_events_manager/monitor.go @@ -12,6 +12,7 @@ import ( klient "github.com/flant/kube-client/client" pkg "github.com/flant/shell-operator/pkg" + "github.com/flant/shell-operator/pkg/kube/dedupclient" kemtypes "github.com/flant/shell-operator/pkg/kube_events_manager/types" utils "github.com/flant/shell-operator/pkg/utils/labels" ) @@ -41,6 +42,12 @@ type monitor struct { factoryStore *FactoryStore + // snapshotStore is the optional deduplicated object cache. When non-nil + // it is propagated into every resourceInformer the monitor creates, + // switching them from per-monitor `*Unstructured` storage to shared, + // reference-counted storage in the dedup store. + snapshotStore *dedupclient.SnapshotStore + eventCb func(kemtypes.KubeEvent) eventsEnabled bool // Index of namespaces statically defined in monitor configuration @@ -157,6 +164,17 @@ func (m *monitor) GetConfig() *MonitorConfig { return m.Config } +// WithSnapshotStore associates a deduplicated snapshot store with this +// monitor. It must be called before CreateInformers so the store reaches +// every freshly constructed resourceInformer through resourceInformerConfig. +// Passing nil is a no-op; the existing snapshotStore (if any) stays. +func (m *monitor) WithSnapshotStore(s *dedupclient.SnapshotStore) { + if s == nil { + return + } + m.snapshotStore = s +} + // CreateInformers creates all informers and // a namespace informer if namespace.labelSelector is defined. // If MonitorConfig.NamespaceSelector.MatchNames is defined, then @@ -319,12 +337,13 @@ func (m *monitor) EnableKubeEventCb() { func (m *monitor) CreateInformersForNamespace(namespace string) ([]*resourceInformer, error) { informers := make([]*resourceInformer, 0) cfg := &resourceInformerConfig{ - client: m.KubeClient, - mstor: m.metricStorage, - factoryStore: m.factoryStore, - eventCb: m.eventCb, - monitor: m.Config, - logger: m.logger.Named("resource-informer"), + client: m.KubeClient, + mstor: m.metricStorage, + factoryStore: m.factoryStore, + snapshotStore: m.snapshotStore, + eventCb: m.eventCb, + monitor: m.Config, + logger: m.logger.Named("resource-informer"), } objNames := []string{""} diff --git a/pkg/kube_events_manager/monitor_snapshot_store_test.go b/pkg/kube_events_manager/monitor_snapshot_store_test.go new file mode 100644 index 00000000..490770a2 --- /dev/null +++ b/pkg/kube_events_manager/monitor_snapshot_store_test.go @@ -0,0 +1,128 @@ +package kubeeventsmanager + +import ( + "context" + "testing" + + "github.com/deckhouse/deckhouse/pkg/log" + . "github.com/onsi/gomega" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/flant/kube-client/fake" + "github.com/flant/shell-operator/pkg/kube/dedupclient" + kemtypes "github.com/flant/shell-operator/pkg/kube_events_manager/types" + "github.com/flant/shell-operator/pkg/metric" + "github.com/flant/shell-operator/pkg/metrics" +) + +// Test_Monitor_SnapshotStore_BackingPath exercises the dedup-store path +// end-to-end on a fake cluster: the resourceInformer must +// - upsert the body into the shared SnapshotStore on initial list, +// - keep its in-memory `*ObjectAndFilterResult` entry with Object == nil, +// - reconstitute Object lazily when Snapshot() is called, +// - count the object in the store's live-objects gauge. +// +// This is the critical regression test: without the dedup wiring, Snapshot() +// would still work (the object stays in cachedObjects), but the store would +// stay empty — which is exactly the "memory didn't drop" failure mode that +// motivated this feature. +func Test_Monitor_SnapshotStore_BackingPath(t *testing.T) { + g := NewWithT(t) + + fc := fake.NewFakeCluster(fake.ClusterVersionV121) + createNsWithLabels(fc, "default", nil) + createCM(fc, "default", testCM("dedup-cm")) + + monitorCfg := &MonitorConfig{ + ApiVersion: "v1", + Kind: "ConfigMap", + EventTypes: []kemtypes.WatchEventType{kemtypes.WatchEventAdded, kemtypes.WatchEventModified, kemtypes.WatchEventDeleted}, + KeepFullObjectsInMemory: true, + NamespaceSelector: &kemtypes.NamespaceSelector{ + NameSelector: &kemtypes.NameSelector{MatchNames: []string{"default"}}, + }, + } + + metricStorage := metric.NewStorageMock(t) + metricStorage.HistogramObserveMock.Set(func(_ string, _ float64, _ map[string]string, _ []float64) {}) + metricStorage.GaugeSetMock.When(metrics.KubeSnapshotObjects, 1, map[string]string(nil)).Then() + + snapshotStore := dedupclient.NewSnapshotStore(log.NewNop()) + + mon := NewMonitor(context.Background(), fc.Client, metricStorage, NewFactoryStore(), monitorCfg, func(_ kemtypes.KubeEvent) {}, log.NewNop()) + mon.WithSnapshotStore(snapshotStore) + + require.NoError(t, mon.CreateInformers()) + mon.Start(context.TODO()) + + // After initial list completes, the snapshot store must own exactly one + // live object — the ConfigMap. + g.Eventually(func() int { + return snapshotStore.Stats().LiveObjects + }, "3s", "10ms"). + Should(Equal(1), "snapshot store must observe the initial-list upsert") + + // Snapshot() must return Object populated (reconstituted from the + // dedup store), but the underlying cachedObjects entry must not hold + // any `*Unstructured` pointer of its own. + snap := mon.Snapshot() + require.Len(t, snap, 1) + assert.NotNil(t, snap[0].Object, "Snapshot must reconstitute Object from the dedup store") + assert.Equal(t, "dedup-cm", snap[0].Object.GetName()) + assert.Equal(t, "default", snap[0].Object.GetNamespace()) + + for _, ri := range mon.ResourceInformers { + ri.cacheLock.RLock() + for _, entry := range ri.cachedObjects { + assert.Nil(t, entry.Object, + "cachedObjects entries must not hold `*Unstructured` when SnapshotStore is active") + } + assert.NotEmpty(t, ri.cachedObjectKeys, "cachedObjectKeys must mirror cachedObjects") + ri.cacheLock.RUnlock() + } +} + +// Test_Monitor_SnapshotStore_DefaultPath confirms backwards compatibility: +// when no SnapshotStore is wired in, the original behaviour is preserved — +// the per-monitor cache holds `*Unstructured` pointers itself and the +// Snapshot() output is identical in shape. +func Test_Monitor_SnapshotStore_DefaultPath(t *testing.T) { + g := NewWithT(t) + fc := fake.NewFakeCluster(fake.ClusterVersionV121) + createNsWithLabels(fc, "default", nil) + createCM(fc, "default", testCM("plain-cm")) + + monitorCfg := &MonitorConfig{ + ApiVersion: "v1", + Kind: "ConfigMap", + EventTypes: []kemtypes.WatchEventType{kemtypes.WatchEventAdded}, + KeepFullObjectsInMemory: true, + NamespaceSelector: &kemtypes.NamespaceSelector{ + NameSelector: &kemtypes.NameSelector{MatchNames: []string{"default"}}, + }, + } + + metricStorage := metric.NewStorageMock(t) + metricStorage.HistogramObserveMock.Set(func(_ string, _ float64, _ map[string]string, _ []float64) {}) + metricStorage.GaugeSetMock.When(metrics.KubeSnapshotObjects, 1, map[string]string(nil)).Then() + + mon := NewMonitor(context.Background(), fc.Client, metricStorage, NewFactoryStore(), monitorCfg, func(_ kemtypes.KubeEvent) {}, log.NewNop()) + require.NoError(t, mon.CreateInformers()) + mon.Start(context.TODO()) + + g.Eventually(func() []string { + return snapshotResourceIDs(mon.Snapshot()) + }, "3s", "10ms").Should(ContainElement("default/ConfigMap/plain-cm")) + + for _, ri := range mon.ResourceInformers { + ri.cacheLock.RLock() + assert.Empty(t, ri.cachedObjectKeys, + "cachedObjectKeys must remain empty when SnapshotStore is not configured") + for _, entry := range ri.cachedObjects { + assert.NotNil(t, entry.Object, + "cachedObjects entries must keep their `*Unstructured` pointer in the legacy path") + } + ri.cacheLock.RUnlock() + } +} diff --git a/pkg/kube_events_manager/resource_informer.go b/pkg/kube_events_manager/resource_informer.go index 53aaab18..2d636472 100644 --- a/pkg/kube_events_manager/resource_informer.go +++ b/pkg/kube_events_manager/resource_informer.go @@ -16,8 +16,11 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/tools/cache" + "github.com/ldmonster/kubeclient/store" + klient "github.com/flant/kube-client/client" pkg "github.com/flant/shell-operator/pkg" + "github.com/flant/shell-operator/pkg/kube/dedupclient" kemtypes "github.com/flant/shell-operator/pkg/kube_events_manager/types" "github.com/flant/shell-operator/pkg/metrics" "github.com/flant/shell-operator/pkg/utils/measure" @@ -38,9 +41,21 @@ type resourceInformer struct { ListOptions metav1.ListOptions // A cache of objects and filterResults. It is a part of the Monitor's snapshot. + // When snapshotStore is non-nil, entries here have Object == nil and the + // authoritative `*Unstructured` body lives once in the shared dedup store + // keyed by cachedObjectKeys[resourceId]. getCachedObjects reconstitutes + // Object lazily on read so the public type behaviour is unchanged. cachedObjects map[string]*kemtypes.ObjectAndFilterResult cacheLock sync.RWMutex + // snapshotStore, when non-nil, owns the storage of `*Unstructured` + // bodies. Each resourceInformer is one *owner* (identified by its + // id) so the store can refcount keys across overlapping watches. + snapshotStore *dedupclient.SnapshotStore + // cachedObjectKeys mirrors cachedObjects when snapshotStore is set. + // It is kept under cacheLock together with cachedObjects. + cachedObjectKeys map[string]store.ObjectKey + // Cached objects operations since start cachedObjectsInfo *CachedObjectsInfo // Cached objects operations since last access @@ -70,11 +85,12 @@ type resourceInformer struct { // resourceInformer should implement ResourceInformer type resourceInformerConfig struct { - client *klient.Client - mstor metricsstorage.Storage - factoryStore *FactoryStore - eventCb func(kemtypes.KubeEvent) - monitor *MonitorConfig + client *klient.Client + mstor metricsstorage.Storage + factoryStore *FactoryStore + snapshotStore *dedupclient.SnapshotStore + eventCb func(kemtypes.KubeEvent) + monitor *MonitorConfig logger *log.Logger } @@ -85,11 +101,13 @@ func newResourceInformer(ns, name string, cfg *resourceInformerConfig) *resource KubeClient: cfg.client, metricStorage: cfg.mstor, factoryStore: cfg.factoryStore, + snapshotStore: cfg.snapshotStore, Namespace: ns, Name: name, eventCb: cfg.eventCb, Monitor: cfg.monitor, cachedObjects: make(map[string]*kemtypes.ObjectAndFilterResult), + cachedObjectKeys: make(map[string]store.ObjectKey), cacheLock: sync.RWMutex{}, eventBufLock: sync.Mutex{}, cachedObjectsInfo: &CachedObjectsInfo{}, @@ -156,12 +174,32 @@ func (ei *resourceInformer) createSharedInformer() error { return nil } -// Snapshot returns all cached objects for this informer +// Snapshot returns all cached objects for this informer. +// +// When snapshotStore is set, every entry's `Object` field is reconstituted +// from the shared dedup store at call time. Reconstitution is a fresh +// allocation per call, so callers should treat the returned slice as +// owned and avoid pinning it longer than necessary. If the store has +// dropped a key concurrently (e.g. during shutdown) the returned entry +// keeps Object=nil; downstream code already tolerates that path because +// the same shape is used by Monitor.KeepFullObjectsInMemory==false. func (ei *resourceInformer) getCachedObjects() []kemtypes.ObjectAndFilterResult { ei.cacheLock.RLock() res := make([]kemtypes.ObjectAndFilterResult, 0, len(ei.cachedObjects)) - for _, obj := range ei.cachedObjects { - res = append(res, *obj) + if ei.snapshotStore != nil { + for resID, entry := range ei.cachedObjects { + cp := *entry + if key, ok := ei.cachedObjectKeys[resID]; ok { + if obj, found := ei.snapshotStore.Get(key); found { + cp.Object = obj + } + } + res = append(res, cp) + } + } else { + for _, obj := range ei.cachedObjects { + res = append(res, *obj) + } } ei.cacheLock.RUnlock() @@ -220,6 +258,14 @@ func (ei *resourceInformer) loadExistedObjects() error { slog.Int(pkg.LogKeyCount, len(objList.Items))) filteredObjects := make(map[string]*kemtypes.ObjectAndFilterResult) + // keysForStore is the set of (resourceId → ObjectKey) that need to be + // committed to the shared snapshot store under cacheLock. We collect + // them here so the dedup-store roundtrip happens before the lock is + // taken, keeping the critical section small. + var keysForStore map[string]store.ObjectKey + if ei.snapshotStore != nil { + keysForStore = make(map[string]store.ObjectKey, len(objList.Items)) + } for _, item := range objList.Items { // copy loop var to avoid duplication of pointer in filteredObjects @@ -240,6 +286,20 @@ func (ei *resourceInformer) loadExistedObjects() error { if !ei.Monitor.KeepFullObjectsInMemory { objFilterRes.RemoveFullObject() + } else if ei.snapshotStore != nil && objFilterRes.Object != nil { + // Move the body into the shared dedup store and drop our + // per-monitor pointer. RemoveObject MUST stay false so the + // snapshot still surfaces the body once Get reconstitutes + // it on read. + key := dedupclient.KeyFor(objFilterRes.Object) + if err := ei.snapshotStore.Acquire(ei.id, key, objFilterRes.Object); err != nil { + ei.logger.Warn("snapshot store: acquire failed during initial list, falling back to local copy", + slog.String(pkg.LogKeyResourceId, objFilterRes.Metadata.ResourceId), + log.Err(err)) + } else { + keysForStore[objFilterRes.Metadata.ResourceId] = key + objFilterRes.Object = nil + } } filteredObjects[objFilterRes.Metadata.ResourceId] = objFilterRes @@ -256,6 +316,9 @@ func (ei *resourceInformer) loadExistedObjects() error { for k, v := range filteredObjects { ei.cachedObjects[k] = v } + for k, key := range keysForStore { + ei.cachedObjectKeys[k] = key + } ei.cachedObjectsInfo.Count = uint64(len(ei.cachedObjects)) ei.metricStorage.GaugeSet(metrics.KubeSnapshotObjects, float64(len(ei.cachedObjects)), ei.Monitor.Metadata.MetricLabels) @@ -320,8 +383,26 @@ func (ei *resourceInformer) handleWatchEvent(object interface{}, eventType kemty return } + // storeKey is the shared dedup-store key for this object. It is set + // only when snapshotStore is enabled and KeepFullObjectsInMemory is + // true (the path that materially benefits from deduplication). Other + // paths leave storeKey at its zero value and skip Acquire/Release. + var storeKey store.ObjectKey + storeBacked := false + if !ei.Monitor.KeepFullObjectsInMemory { objFilterRes.RemoveFullObject() + } else if ei.snapshotStore != nil && objFilterRes.Object != nil { + storeKey = dedupclient.KeyFor(objFilterRes.Object) + if err := ei.snapshotStore.Acquire(ei.id, storeKey, objFilterRes.Object); err != nil { + ei.logger.Warn("snapshot store: acquire failed on watch event, falling back to local copy", + slog.String(pkg.LogKeyResourceId, resourceId), + slog.String(pkg.LogKeyEventType, string(eventType)), + log.Err(err)) + } else { + storeBacked = true + objFilterRes.Object = nil + } } // Do not fire Added or Modified if object is in cache and its checksum is equal to the newChecksum. @@ -344,6 +425,9 @@ func (ei *resourceInformer) handleWatchEvent(object interface{}, eventType kemty skipEvent = true } ei.cachedObjects[resourceId] = objFilterRes + if storeBacked { + ei.cachedObjectKeys[resourceId] = storeKey + } // Update cached objects info. ei.cachedObjectsInfo.Count = uint64(len(ei.cachedObjects)) if eventType == kemtypes.WatchEventAdded { @@ -362,6 +446,21 @@ func (ei *resourceInformer) handleWatchEvent(object interface{}, eventType kemty case kemtypes.WatchEventDeleted: ei.cacheLock.Lock() + // Drop the snapshot-store reference (if any) before forgetting + // the resourceId — otherwise we leak ownership in the store. + if ei.snapshotStore != nil { + if key, ok := ei.cachedObjectKeys[resourceId]; ok { + delete(ei.cachedObjectKeys, resourceId) + // Release outside the lock would be cleaner, but the + // store itself takes only its own (short-lived) mutex, + // so contention here is bounded. + if err := ei.snapshotStore.Release(ei.id, key); err != nil { + ei.logger.Warn("snapshot store: release failed on delete event", + slog.String(pkg.LogKeyResourceId, resourceId), + log.Err(err)) + } + } + } delete(ei.cachedObjects, resourceId) // Update cached objects info. ei.cachedObjectsInfo.Count = uint64(len(ei.cachedObjects)) @@ -458,6 +557,10 @@ func (ei *resourceInformer) start() { if ei.ctx != nil { <-ei.ctx.Done() ei.factoryStore.Stop(ei.id, ei.FactoryIndex) + // Drop every snapshot-store reference this informer still + // holds. Without this the dedup store would leak entries + // for stopped monitors until process exit. + ei.releaseSnapshotStoreOwnership() } }() @@ -472,6 +575,23 @@ func (ei *resourceInformer) start() { log.Debug("informer is ready", slog.String(pkg.LogKeyDebugName, ei.Monitor.Metadata.DebugName)) } +// releaseSnapshotStoreOwnership lets the shared dedup store reclaim every +// key this informer still owns. It is idempotent: calling it twice (or +// when snapshotStore is nil) is a safe no-op. Called from the start() +// shutdown goroutine so it runs on monitor cancellation. +func (ei *resourceInformer) releaseSnapshotStoreOwnership() { + if ei.snapshotStore == nil { + return + } + ei.snapshotStore.ReleaseOwner(ei.id) + ei.cacheLock.Lock() + // Clear the per-informer key map so subsequent reads see Object=nil. + if len(ei.cachedObjectKeys) > 0 { + ei.cachedObjectKeys = make(map[string]store.ObjectKey) + } + ei.cacheLock.Unlock() +} + // wait blocks until the underlying shared informer for this FactoryIndex is stopped func (ei *resourceInformer) wait() { ei.factoryStore.WaitStopped(ei.FactoryIndex) diff --git a/pkg/shell-operator/bootstrap.go b/pkg/shell-operator/bootstrap.go index b2501346..37525605 100644 --- a/pkg/shell-operator/bootstrap.go +++ b/pkg/shell-operator/bootstrap.go @@ -190,6 +190,7 @@ func dedupClientConfigFromAppConfig(cfg *app.Config) DedupClientConfig { WatchGVKs: cfg.DedupClient.WatchGVKs, ReconstructLRUSize: cfg.DedupClient.ReconstructLRUSize, GCInterval: cfg.DedupClient.GCInterval, + SnapshotStore: cfg.DedupClient.SnapshotStore, } } @@ -247,8 +248,19 @@ func (op *ShellOperator) AssembleCommonOperatorWithDedupClient(listenAddress, li return err } + // Optional deduplicated SnapshotStore. Constructed independently of + // DedupClient because the two solve different problems: DedupClient + // is a kubeclient instance for hooks/extensions, SnapshotStore is the + // shared backing for kube-events-manager monitors. Either, both, or + // neither may be active. + op.SnapshotStore = initSnapshotStore(dedupCfg, op.logger.Named("dedup-snapshot-store")) + op.SetupEventManagers() + if op.SnapshotStore != nil && op.KubeEventsManager != nil { + op.KubeEventsManager.WithSnapshotStore(op.SnapshotStore) + } + return nil } diff --git a/pkg/shell-operator/debug_server.go b/pkg/shell-operator/debug_server.go index 924c71ae..e7197746 100644 --- a/pkg/shell-operator/debug_server.go +++ b/pkg/shell-operator/debug_server.go @@ -91,24 +91,51 @@ func (op *ShellOperator) RegisterDebugHookRoutes(dbgSrv *debug.Server) { // payload so probes can distinguish "not configured" from "errored". func (op *ShellOperator) RegisterDebugDedupClientRoutes(dbgSrv *debug.Server) { dbgSrv.RegisterHandler(http.MethodGet, "/dedup-client/status.{format:(json|yaml|text)}", func(_ *http.Request) (interface{}, error) { - if op.DedupClient == nil { - return map[string]any{ - "enabled": false, - "reason": "DedupClient is not configured (set --dedup-client-enabled or $DEDUP_CLIENT_ENABLED)", - }, nil - } - // Cache wide synchronisation status — best-effort, capped at 0 - // timeout so the probe never blocks the debug server. - ctx, cancel := context.WithTimeout(context.Background(), 0) - defer cancel() - synced := op.DedupClient.WaitForCacheSync(ctx) - return map[string]any{ - "enabled": true, - "cacheSyncedHint": synced, - }, nil + payload := map[string]any{ + "client": clientStatus(op), + "snapshotStore": snapshotStoreStatus(op), + } + return payload, nil }) } +// clientStatus reports the status of the runtime DedupClient. +func clientStatus(op *ShellOperator) map[string]any { + if op.DedupClient == nil { + return map[string]any{ + "enabled": false, + "reason": "DedupClient is not configured (set --dedup-client-enabled or $DEDUP_CLIENT_ENABLED)", + } + } + // Cache wide synchronisation status — best-effort, capped at 0 + // timeout so the probe never blocks the debug server. + ctx, cancel := context.WithTimeout(context.Background(), 0) + defer cancel() + synced := op.DedupClient.WaitForCacheSync(ctx) + return map[string]any{ + "enabled": true, + "cacheSyncedHint": synced, + } +} + +// snapshotStoreStatus reports the live counters of the shared snapshot store. +func snapshotStoreStatus(op *ShellOperator) map[string]any { + if op.SnapshotStore == nil { + return map[string]any{ + "enabled": false, + "reason": "SnapshotStore is not configured (set --dedup-client-snapshot-store or $DEDUP_CLIENT_SNAPSHOT_STORE)", + } + } + stats := op.SnapshotStore.Stats() + return map[string]any{ + "enabled": true, + "liveObjects": stats.LiveObjects, + "totalAcquires": stats.TotalAcquires, + "totalReleases": stats.TotalReleases, + "totalDeletes": stats.TotalDeletes, + } +} + // RegisterDebugConfigRoutes registers routes to manage runtime configuration. // This method is also used in addon-operator func (op *ShellOperator) RegisterDebugConfigRoutes(dbgSrv *debug.Server, runtimeConfig *config.Config) { diff --git a/pkg/shell-operator/kube_client.go b/pkg/shell-operator/kube_client.go index 53365f8c..41b905ca 100644 --- a/pkg/shell-operator/kube_client.go +++ b/pkg/shell-operator/kube_client.go @@ -105,6 +105,13 @@ type DedupClientConfig struct { // kubeclient options. Zero means "use the kubeclient default". ReconstructLRUSize int GCInterval time.Duration + + // SnapshotStore toggles construction of the process-wide deduplicated + // SnapshotStore. The store backs per-monitor object caches; turning it + // on is the change that actually moves the RSS needle for workloads + // with many similar objects (the runtime DedupClient itself does not + // affect kube-events-manager memory). Independent of Enabled. + SnapshotStore bool } // initDedupClient constructs an optional deduplicated kubeclient using the @@ -156,6 +163,20 @@ func initDedupClient(kubeClient *klient.Client, cfg DedupClientConfig, logger *l return c, nil } +// initSnapshotStore constructs the optional deduplicated SnapshotStore. +// Returning (nil, nil) when cfg.SnapshotStore is false keeps the assembly +// caller simple — it nil-checks the result and skips the wiring. The +// returned store is a fresh instance (no shared state with previous +// processes) and is safe for concurrent use; the operator passes it to +// KubeEventsManager.WithSnapshotStore so every later-created Monitor +// uses it transparently. +func initSnapshotStore(cfg DedupClientConfig, logger *log.Logger) *dedupclient.SnapshotStore { + if !cfg.SnapshotStore { + return nil + } + return dedupclient.NewSnapshotStore(logger) +} + // parseGVKs converts a list of "group/version/kind" strings into // schema.GroupVersionKind values. It accepts an empty group (leading "/") // for core resources, e.g. "/v1/Pod". An empty input list yields a nil diff --git a/pkg/shell-operator/operator.go b/pkg/shell-operator/operator.go index 5b1e9ff2..510d25d8 100644 --- a/pkg/shell-operator/operator.go +++ b/pkg/shell-operator/operator.go @@ -76,6 +76,14 @@ type ShellOperator struct { // it is started in op.Start() and stopped during op.Shutdown(). DedupClient *dedupclient.Client + // SnapshotStore is the optional process-wide deduplicated cache that + // backs every kubernetes-binding monitor's per-object snapshot. When + // non-nil it is wired into the KubeEventsManager so resourceInformers + // store `*Unstructured` bodies once (refcounted) instead of per-monitor. + // Enabled via app.Config.DedupClient.SnapshotStore. Independent of + // DedupClient: either, both, or neither may be active. + SnapshotStore *dedupclient.SnapshotStore + ScheduleManager schedulemanager.ScheduleManager KubeEventsManager kubeeventsmanager.KubeEventsManager From 43d9b0ee9e333d9e14e452a0419353081aeff125 Mon Sep 17 00:00:00 2001 From: Pavel Okhlopkov Date: Mon, 1 Jun 2026 20:28:09 +0300 Subject: [PATCH 3/4] replace all with dedup Signed-off-by: Pavel Okhlopkov --- pkg/app/README.md | 18 +- pkg/app/app_config.go | 9 +- pkg/app/app_config_test.go | 1 - pkg/app/flags.go | 7 +- pkg/hook/controller/hook_controller_test.go | 5 +- pkg/kube/dedupclient/README.md | 61 ++-- pkg/kube/dedupclient/client.go | 192 ++++++++++- pkg/kube/dedupclient/client_test.go | 6 +- pkg/kube/dedupclient/config.go | 19 +- pkg/kube/dedupclient/config_loader.go | 255 ++++++++++++++ pkg/kube/dedupclient/fake.go | 71 ++++ pkg/kube_events_manager/dedup_informer.go | 319 ++++++++++++++++++ .../dedup_test_helper_test.go | 17 + pkg/kube_events_manager/factory.go | 111 +++++- .../kube_events_manager.go | 22 +- .../kube_events_manager_test.go | 22 +- pkg/kube_events_manager/monitor.go | 5 +- .../monitor_snapshot_store_test.go | 4 +- pkg/kube_events_manager/monitor_test.go | 66 +++- pkg/kube_events_manager/namespace_informer.go | 6 +- pkg/kube_events_manager/resource_informer.go | 10 +- pkg/metric/adapter.go | 61 ---- pkg/shell-operator/bootstrap.go | 87 ++--- pkg/shell-operator/bootstrap_test.go | 58 ++-- pkg/shell-operator/debug_server.go | 12 +- pkg/shell-operator/kube_client.go | 115 +------ pkg/shell-operator/kube_client_dedup_test.go | 29 +- pkg/shell-operator/operator.go | 32 +- pkg/webhook/admission/manager.go | 6 +- pkg/webhook/admission/resource.go | 4 +- pkg/webhook/conversion/crd_client_config.go | 4 +- pkg/webhook/conversion/manager.go | 4 +- test/hook/context/generator.go | 7 +- test/integration/suite/run.go | 9 +- 34 files changed, 1230 insertions(+), 424 deletions(-) create mode 100644 pkg/kube/dedupclient/config_loader.go create mode 100644 pkg/kube/dedupclient/fake.go create mode 100644 pkg/kube_events_manager/dedup_informer.go create mode 100644 pkg/kube_events_manager/dedup_test_helper_test.go delete mode 100644 pkg/metric/adapter.go diff --git a/pkg/app/README.md b/pkg/app/README.md index 373ecafd..73b58e04 100644 --- a/pkg/app/README.md +++ b/pkg/app/README.md @@ -314,17 +314,13 @@ err := so.AssembleCommonOperatorFromConfig(cfg, []string{ ``` The new `AssembleCommonOperatorFromConfig(cfg, labels)` method on -`*ShellOperator` is what makes this clean — it derives both -`KubeClientConfig`s (main + object-patcher), the deduplicated-kubeclient -configuration (`DedupClientConfig`), the HTTP listen address/port, and the -metric prefix from `cfg`, so the consumer does not have to unpack fields by -hand. The older primitive-taking -`AssembleCommonOperator(listenAddress, listenPort, labels, mainKubeCfg, patcherKubeCfg)` -is still available for callers that need finer control; it preserves its -original signature and disables the dedup cache. To enable the dedup cache -without going through `*app.Config`, use the new -`AssembleCommonOperatorWithDedupClient(...)` variant which adds a final -`DedupClientConfig` parameter. +`*ShellOperator` is what makes this clean — it derives the singleton +deduplicated Kubernetes client configuration, the HTTP listen address/port, and +the metric prefix from `cfg`, so the consumer does not have to unpack fields by +hand. For callers that need finer control, use +`AssembleCommonOperator(listenAddress, listenPort, labels, kubeCfg, dedupCfg)`; +it still creates exactly one singleton Kubernetes client unless a client was +already injected with `shell_operator.WithKubeClient(...)`. If you also want env-var parsing on top of your own values, call `ParseEnv(cfg)` between steps 1 and 2 — env values will overlay the fields you diff --git a/pkg/app/app_config.go b/pkg/app/app_config.go index e95848f9..01a2ebc8 100644 --- a/pkg/app/app_config.go +++ b/pkg/app/app_config.go @@ -41,7 +41,8 @@ type ObjectPatcherSettings struct { // templated Deployments). All settings here are optional; when Enabled is // false the client is not constructed at all. List-typed env vars use a comma // separator: GVK strings follow the form "//" (the -// group may be empty for core resources, e.g. "/v1/Pod"). +// group may be empty for core resources, e.g. "/v1/Pod"). Enabled is +// deprecated and ignored; the singleton dedup client is always constructed. type DedupClientSettings struct { Enabled bool `env:"ENABLED"` Namespaces []string `env:"NAMESPACES" envSeparator:","` @@ -54,9 +55,9 @@ type DedupClientSettings struct { // `*Unstructured` bodies live exactly once in memory across all // resourceInformers (refcounted), trading a small per-snapshot-read CPU // cost for a substantial drop in RSS for workloads with many similar - // objects. Independent of the runtime DedupClient (Enabled flag): the - // snapshot store can be turned on without spinning up any kubeclient - // informers. + // objects. The singleton client already moves informer list/watch storage + // to the dedup path; SnapshotStore moves monitor snapshot storage to a + // shared dedup store. SnapshotStore bool `env:"SNAPSHOT_STORE"` } diff --git a/pkg/app/app_config_test.go b/pkg/app/app_config_test.go index 01f349c0..06bd94d0 100644 --- a/pkg/app/app_config_test.go +++ b/pkg/app/app_config_test.go @@ -300,4 +300,3 @@ func TestCLIFlagOverridesEnv(t *testing.T) { assert.Equal(t, "1234", cfg.App.ListenPort) assert.Equal(t, "debug", cfg.Log.Level) } - diff --git a/pkg/app/flags.go b/pkg/app/flags.go index 6c89ce89..65ee89fe 100644 --- a/pkg/app/flags.go +++ b/pkg/app/flags.go @@ -115,14 +115,13 @@ func bindConversionWebhookFlags(cfg *Config, cmd *cobra.Command) func() { func bindDedupClientFlags(cfg *Config, cmd *cobra.Command) func() { f := cmd.Flags() f.BoolVar(&cfg.DedupClient.Enabled, "dedup-client-enabled", cfg.DedupClient.Enabled, - "Enable the deduplicated kubeclient cache (github.com/ldmonster/kubeclient). "+ - "When set, shell-operator builds a controller-runtime compatible client backed "+ - "by a deduplicated store. Can be set with $DEDUP_CLIENT_ENABLED.") + "Deprecated no-op retained for compatibility. The singleton deduplicated kubeclient "+ + "cache (github.com/ldmonster/kubeclient) is always constructed. Can be set with $DEDUP_CLIENT_ENABLED.") f.BoolVar(&cfg.DedupClient.SnapshotStore, "dedup-client-snapshot-store", cfg.DedupClient.SnapshotStore, "Back per-monitor object snapshots with a process-wide deduplicated store "+ "(github.com/ldmonster/kubeclient/store). Trades a small per-snapshot-read CPU "+ "cost for a substantial drop in RSS when many monitors observe similar objects. "+ - "Independent of --dedup-client-enabled. Can be set with $DEDUP_CLIENT_SNAPSHOT_STORE.") + "Can be set with $DEDUP_CLIENT_SNAPSHOT_STORE.") f.IntVar(&cfg.DedupClient.ReconstructLRUSize, "dedup-client-reconstruct-lru-size", cfg.DedupClient.ReconstructLRUSize, "Size of the LRU that memoises reconstructed Unstructured objects in the dedup cache. "+ diff --git a/pkg/hook/controller/hook_controller_test.go b/pkg/hook/controller/hook_controller_test.go index 6db4ca7e..9f053ee6 100644 --- a/pkg/hook/controller/hook_controller_test.go +++ b/pkg/hook/controller/hook_controller_test.go @@ -11,8 +11,10 @@ import ( bindingcontext "github.com/flant/shell-operator/pkg/hook/binding_context" "github.com/flant/shell-operator/pkg/hook/config" "github.com/flant/shell-operator/pkg/hook/types" + "github.com/flant/shell-operator/pkg/kube/dedupclient" kubeeventsmanager "github.com/flant/shell-operator/pkg/kube_events_manager" kemtypes "github.com/flant/shell-operator/pkg/kube_events_manager/types" + "k8s.io/client-go/discovery/cached/memory" ) // Test updating snapshots for combined contexts. @@ -20,7 +22,8 @@ func Test_UpdateSnapshots(t *testing.T) { g := NewWithT(t) fc := fake.NewFakeCluster(fake.ClusterVersionV121) - mgr := kubeeventsmanager.NewKubeEventsManager(context.Background(), fc.Client, log.NewNop()) + kubeClient := dedupclient.NewFromClients(fc.Client, fc.Client.Dynamic(), nil, memory.NewMemCacheClient(fc.Client.Discovery())) + mgr := kubeeventsmanager.NewKubeEventsManager(context.Background(), kubeClient, log.NewNop()) testHookConfig := ` configVersion: v1 diff --git a/pkg/kube/dedupclient/README.md b/pkg/kube/dedupclient/README.md index 1388a596..19e41d0b 100644 --- a/pkg/kube/dedupclient/README.md +++ b/pkg/kube/dedupclient/README.md @@ -2,13 +2,13 @@ Two-part integration of [`github.com/ldmonster/kubeclient`](https://github.com/ldmonster/kubeclient) -into shell-operator. Both parts can be enabled independently — the one that -moves the RSS needle for typical workloads is the **SnapshotStore**. +into shell-operator. Both parts can be enabled independently and target +different memory holders in the Kubernetes binding path. | Component | Type | Purpose | Flag | | ---------------- | -------------------------- | -------------------------------------------------------------------------------------------------------- | ----------------------------------- | -| `Client` | `*kubeclient.DedupClient` wrapper | Controller-runtime compatible Kubernetes client for hooks/extensions, with its own deduplicated cache. | `--dedup-client-enabled` | -| `SnapshotStore` | `*store.DedupStore` wrapper | Process-wide, reference-counted cache that backs every kube-events-manager monitor's per-object snapshot. **This is what reduces memory.** | `--dedup-client-snapshot-store` | +| `Client` | `*kubeclient.DedupClient` singleton facade | The only Kubernetes client shell-operator constructs. It exposes controller-runtime, dynamic, typed, discovery, and RESTMapper APIs, and backs kube-events-manager resource informers with dedup stores. | always on | +| `SnapshotStore` | `*store.DedupStore` wrapper | Process-wide, reference-counted cache that backs every kube-events-manager monitor's per-object snapshot. | `--dedup-client-snapshot-store` | For clusters with thousands of similar resources (e.g. templated `Deployment`s) the upstream store reports **60–90 %** lower cache memory @@ -17,24 +17,18 @@ usage thanks to value interning and subtree deduplication. ## Quick start ```go -import ( - klient "github.com/flant/kube-client/client" - "github.com/flant/shell-operator/pkg/kube/dedupclient" -) - -func newDedup(main *klient.Client, logger *log.Logger) (*dedupclient.Client, error) { - mapper, _ := main.ToRESTMapper() - return dedupclient.New(dedupclient.Config{ - RESTConfig: main.RestConfig(), - RESTMapper: mapper, - Namespaces: []string{"kube-system", "default"}, // empty = all - WatchGVKs: []schema.GroupVersionKind{ - {Group: "", Version: "v1", Kind: "Pod"}, - {Group: "apps", Version: "v1", Kind: "Deployment"}, - }, - ReconstructLRUSize: 4096, // 0 disables reconstruction caching - }, logger) -} +client, err := dedupclient.New(dedupclient.Config{ + Context: "kind-dev", // optional kubeconfig context + Config: "/path/to/kubeconfig", // optional kubeconfig path + QPS: 20, + Burst: 40, + Namespaces: []string{"kube-system", "default"}, // empty = all + WatchGVKs: []schema.GroupVersionKind{ + {Group: "", Version: "v1", Kind: "Pod"}, + {Group: "apps", Version: "v1", Kind: "Deployment"}, + }, + ReconstructLRUSize: 4096, // 0 disables reconstruction caching +}, logger) ``` `Start(ctx)` spins up the cache run loop in a single dedicated goroutine and @@ -43,28 +37,29 @@ exit (or for `ctx` to expire). ## How shell-operator wires it up -When `app.Config.DedupClient.Enabled` is `true`, -`AssembleCommonOperatorFromConfig` calls `initDedupClient`, which hands the -main `klient.Client`'s `rest.Config` and `RESTMapper` to -`dedupclient.New`. The resulting `*Client` is stored on -`shell_operator.ShellOperator.DedupClient`, started during `op.Start()`, and -stopped from `op.Shutdown()`. +`AssembleCommonOperatorFromConfig` always constructs one `*dedupclient.Client` +unless a library caller has already injected one with +`shell_operator.WithKubeClient(...)`. The resulting singleton is stored on +`shell_operator.ShellOperator.KubeClient`, started during `op.Start()`, and +stopped from `op.Shutdown()`. Kubernetes binding resource informers use the +singleton's dedup-backed informer factory; namespace label-selector informers +still use the singleton's typed client-go namespace interface. Configuration knobs (env vars / CLI flags): | Env var | Flag | Meaning | | ------------------------------------ | ------------------------------------ | ------------------------------------------------------------ | -| `DEDUP_CLIENT_ENABLED` | `--dedup-client-enabled` | Construct the deduplicated client at all. | -| `DEDUP_CLIENT_SNAPSHOT_STORE` | `--dedup-client-snapshot-store` | Back per-monitor snapshots with the shared dedup store. Independent of `--dedup-client-enabled`. | +| `DEDUP_CLIENT_ENABLED` | `--dedup-client-enabled` | Deprecated no-op; the singleton dedup client is always constructed. | +| `DEDUP_CLIENT_SNAPSHOT_STORE` | `--dedup-client-snapshot-store` | Back per-monitor snapshots with the shared dedup store. | | `DEDUP_CLIENT_NAMESPACES` | `--dedup-client-namespace` | Comma-separated (env) or repeated (flag) namespace allow-list. Empty = all. | | `DEDUP_CLIENT_WATCH_GVKS` | `--dedup-client-watch-gvk` | GVKs to pre-register, formatted as `//` (group empty for core). | | `DEDUP_CLIENT_RECONSTRUCT_LRU_SIZE` | `--dedup-client-reconstruct-lru-size`| LRU size for reconstructed Unstructured objects. 0 disables. | | `DEDUP_CLIENT_GC_INTERVAL` | `--dedup-client-gc-interval` | GC interval for unused interned values/subtrees. 0 = upstream default. | -When both features are off the wrappers are **not** constructed at all, so -this integration adds zero runtime overhead to existing deployments. +The singleton client is always constructed. Optional knobs only tune cache +scope, reconstruction, GC, and monitor snapshot storage. -## SnapshotStore — the memory win +## SnapshotStore — monitor snapshot memory `SnapshotStore` plugs into shell-operator's `pkg/kube_events_manager` so that every monitor's `cachedObjects` map stops holding `*Unstructured` pointers diff --git a/pkg/kube/dedupclient/client.go b/pkg/kube/dedupclient/client.go index f5be6bcf..808b5ffe 100644 --- a/pkg/kube/dedupclient/client.go +++ b/pkg/kube/dedupclient/client.go @@ -5,12 +5,26 @@ import ( "errors" "fmt" "log/slog" + "strings" "sync" "sync/atomic" "github.com/deckhouse/deckhouse/pkg/log" "github.com/ldmonster/kubeclient" + "github.com/ldmonster/kubeclient/store" + apixv1client "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/cli-runtime/pkg/resource" + "k8s.io/client-go/discovery" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/metadata" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/flant/shell-operator/pkg" @@ -31,10 +45,24 @@ var ErrAlreadyStarted = errors.New("dedupclient: cache already started") // client.Client when they need full read/write access. type Client struct { client.Client + kubernetes.Interface dedup *kubeclient.DedupClient logger *log.Logger + restConfig *rest.Config + cachedDiscovery discovery.CachedDiscoveryInterface + restMapper meta.RESTMapper + contextName string + configPath string + defaultNS string + dynamicClient dynamic.Interface + apiExtClient apixv1client.ApiextensionsV1Interface + metadataClient metadata.Interface + + informerStoresMu sync.Mutex + informerStores map[string]store.Store + // startOnce guards Start() so the cache loop is only spawned once even // if Start is called concurrently from several places. startOnce sync.Once @@ -56,26 +84,64 @@ type Client struct { // are optional. The returned Client is not started — call Start to spin up // the cache informers. func New(cfg Config, logger *log.Logger) (*Client, error) { - if cfg.RESTConfig == nil { - return nil, fmt.Errorf("dedupclient: rest.Config is required") - } if logger == nil { logger = log.NewLogger() } logger = logger.With(pkg.LogKeyOperatorComponent, "dedup-kube-client") + restCfg, defaultNS, err := buildRESTConfig(cfg) + if err != nil { + return nil, fmt.Errorf("dedupclient: construct rest config: %w", err) + } + cfg.RESTConfig = restCfg + + kube, err := kubernetes.NewForConfig(restCfg) + if err != nil { + return nil, fmt.Errorf("dedupclient: construct typed client: %w", err) + } + dyn, err := dynamic.NewForConfig(restCfg) + if err != nil { + return nil, fmt.Errorf("dedupclient: construct dynamic client: %w", err) + } + apiExt, err := apixv1client.NewForConfig(restCfg) + if err != nil { + return nil, fmt.Errorf("dedupclient: construct apiextensions client: %w", err) + } + metaClient, err := metadata.NewForConfig(restCfg) + if err != nil { + return nil, fmt.Errorf("dedupclient: construct metadata client: %w", err) + } + cachedDiscovery, err := newCachedDiscovery(restCfg) + if err != nil { + return nil, fmt.Errorf("dedupclient: construct discovery client: %w", err) + } + if cfg.RESTMapper == nil { + cfg.RESTMapper = newRESTMapper(cachedDiscovery, logger) + } + opts := buildOptions(cfg) - dc, err := kubeclient.New(cfg.RESTConfig, opts...) + dc, err := kubeclient.New(restCfg, opts...) if err != nil { return nil, fmt.Errorf("dedupclient: construct kubeclient: %w", err) } return &Client{ - Client: dc, - dedup: dc, - logger: logger, - done: make(chan struct{}), + Client: dc, + Interface: kube, + dedup: dc, + logger: logger, + restConfig: restCfg, + cachedDiscovery: cachedDiscovery, + restMapper: cfg.RESTMapper, + contextName: cfg.Context, + configPath: cfg.Config, + defaultNS: defaultNS, + dynamicClient: dyn, + apiExtClient: apiExt, + metadataClient: metaClient, + informerStores: make(map[string]store.Store), + done: make(chan struct{}), }, nil } @@ -112,6 +178,106 @@ func (c *Client) Dedup() *kubeclient.DedupClient { return c.dedup } +func (c *Client) Dynamic() dynamic.Interface { + return c.dynamicClient +} + +func (c *Client) ApiExt() apixv1client.ApiextensionsV1Interface { + return c.apiExtClient +} + +func (c *Client) Metadata() metadata.Interface { + return c.metadataClient +} + +func (c *Client) RestConfig() *rest.Config { + return c.restConfig +} + +func (c *Client) DefaultNamespace() string { + if c.defaultNS == "" { + return "default" + } + return c.defaultNS +} + +func (c *Client) ToRESTConfig() (*rest.Config, error) { + return c.restConfig, nil +} + +func (c *Client) ToDiscoveryClient() (discovery.CachedDiscoveryInterface, error) { + return c.cachedDiscovery, nil +} + +func (c *Client) ToRawKubeConfigLoader() clientcmd.ClientConfig { + return newClientConfig(c.contextName, c.configPath) +} + +func (c *Client) ToRESTMapper() (meta.RESTMapper, error) { + return c.restMapper, nil +} + +func (c *Client) RESTMapper() meta.RESTMapper { + return c.restMapper +} + +func (c *Client) NewBuilder() *resource.Builder { + return resource.NewBuilder(c) +} + +func (c *Client) InvalidateDiscoveryCache() { + if c.cachedDiscovery != nil { + c.cachedDiscovery.Invalidate() + } +} + +func (c *Client) APIResourceList(apiVersion string) ([]*metav1.APIResourceList, error) { + lists, err := c.apiResourceList(apiVersion) + if err != nil && strings.Contains(err.Error(), "not found") && c.cachedDiscovery != nil { + c.cachedDiscovery.Invalidate() + return c.apiResourceList(apiVersion) + } + return lists, err +} + +func (c *Client) APIResource(apiVersion, kind string) (*metav1.APIResource, error) { + resource, err := c.apiResource(apiVersion, kind) + if err != nil && apierrors.IsNotFound(err) && c.cachedDiscovery != nil { + c.cachedDiscovery.Invalidate() + resource, err = c.apiResource(apiVersion, kind) + } + if err != nil { + return nil, fmt.Errorf("apiVersion %q, kind %q is not supported by cluster: %w", apiVersion, kind, err) + } + return resource, nil +} + +func (c *Client) GroupVersionResource(apiVersion, kind string) (schema.GroupVersionResource, error) { + apiRes, err := c.APIResource(apiVersion, kind) + if err != nil { + return schema.GroupVersionResource{}, err + } + return schema.GroupVersionResource{ + Group: apiRes.Group, + Version: apiRes.Version, + Resource: apiRes.Name, + }, nil +} + +func (c *Client) InformerStore(key string) store.Store { + c.informerStoresMu.Lock() + defer c.informerStoresMu.Unlock() + if c.informerStores == nil { + c.informerStores = make(map[string]store.Store) + } + if s, ok := c.informerStores[key]; ok { + return s + } + s := store.NewDedupStore() + c.informerStores[key] = s + return s +} + // Start launches the cache run loop in a dedicated goroutine. Subsequent // calls return ErrAlreadyStarted without spawning additional goroutines. // @@ -120,6 +286,9 @@ func (c *Client) Dedup() *kubeclient.DedupClient { // returns and the goroutine exits. Start itself is non-blocking and // returns as soon as the goroutine has been scheduled. func (c *Client) Start(parent context.Context) error { + if c.dedup == nil { + return nil + } if parent == nil { parent = context.Background() } @@ -155,7 +324,7 @@ func (c *Client) Start(parent context.Context) error { // initial List or until ctx is cancelled. It returns true on success and // false when the wait was aborted (or no Start has happened yet). func (c *Client) WaitForCacheSync(ctx context.Context) bool { - if !c.started.Load() { + if c.dedup == nil || !c.started.Load() { return false } return c.dedup.WaitForCacheSync(ctx) @@ -175,7 +344,7 @@ func (c *Client) EnsureInformer(ctx context.Context, obj client.Object) error { // returned (or ctx is cancelled). Calling Shutdown before Start, or after // a previous Shutdown has already returned, is a safe no-op. func (c *Client) Shutdown(ctx context.Context) error { - if !c.started.Load() { + if c.dedup == nil || !c.started.Load() { return nil } if c.cancel != nil { @@ -200,5 +369,8 @@ func (c *Client) Shutdown(ctx context.Context) error { // kept here so callers don't have to import the kubeclient package just // to read it back. func (c *Client) Scheme() *runtime.Scheme { + if c.dedup == nil { + return runtime.NewScheme() + } return c.dedup.Scheme() } diff --git a/pkg/kube/dedupclient/client_test.go b/pkg/kube/dedupclient/client_test.go index b926d619..61d1f0d4 100644 --- a/pkg/kube/dedupclient/client_test.go +++ b/pkg/kube/dedupclient/client_test.go @@ -12,12 +12,12 @@ import ( "k8s.io/client-go/rest" ) -func TestNew_RequiresRESTConfig(t *testing.T) { +func TestNew_RejectsEmptyRESTConfigHost(t *testing.T) { t.Parallel() - _, err := New(Config{}, nil) + _, err := New(Config{RESTConfig: &rest.Config{}}, nil) require.Error(t, err) - assert.Contains(t, err.Error(), "rest.Config is required") + assert.Contains(t, err.Error(), "rest config host can't be empty") } // TestNew_AcceptsZeroValueOptions exercises the buildOptions translation diff --git a/pkg/kube/dedupclient/config.go b/pkg/kube/dedupclient/config.go index 1522ecef..db3cd586 100644 --- a/pkg/kube/dedupclient/config.go +++ b/pkg/kube/dedupclient/config.go @@ -22,8 +22,25 @@ import ( // RESTConfig is the only mandatory field; all other fields have sensible // defaults and may be left zero-valued. type Config struct { + // Context is the kubeconfig context name used when RESTConfig is nil. + Context string + + // Config is the kubeconfig path used when RESTConfig is nil. + Config string + + // Server is an explicit API server URL. It bypasses kubeconfig and + // in-cluster loading when set. + Server string + + // QPS and Burst tune the shared rest.Config rate limiter. + QPS float32 + Burst int + + // Timeout sets the shared rest.Config timeout. Zero leaves it unset. + Timeout time.Duration + // RESTConfig is the *rest.Config used by the underlying dynamic client. - // Required. + // Optional; when nil the client loads out-of-cluster or in-cluster config. RESTConfig *rest.Config // RESTMapper resolves GroupKind/Version → GroupVersionResource. When nil, diff --git a/pkg/kube/dedupclient/config_loader.go b/pkg/kube/dedupclient/config_loader.go new file mode 100644 index 00000000..bc208bc5 --- /dev/null +++ b/pkg/kube/dedupclient/config_loader.go @@ -0,0 +1,255 @@ +package dedupclient + +import ( + "fmt" + "log/slog" + "os" + "strings" + "time" + + "github.com/deckhouse/deckhouse/pkg/log" + "github.com/pkg/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/discovery" + "k8s.io/client-go/discovery/cached/disk" + "k8s.io/client-go/discovery/cached/memory" + fakediscovery "k8s.io/client-go/discovery/fake" + "k8s.io/client-go/rest" + "k8s.io/client-go/restmapper" + "k8s.io/client-go/tools/clientcmd" +) + +const ( + kubeTokenFilePath = "/var/run/secrets/kubernetes.io/serviceaccount/token" + kubeNamespaceFilePath = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" +) + +func buildRESTConfig(cfg Config) (*rest.Config, string, error) { + var ( + restCfg *rest.Config + defaultNS = "default" + err error + ) + + switch { + case cfg.RESTConfig != nil: + if cfg.RESTConfig.Host == "" { + return nil, "", fmt.Errorf("rest config host can't be empty") + } + restCfg = rest.CopyConfig(cfg.RESTConfig) + case cfg.Server != "": + restCfg = &rest.Config{Host: cfg.Server} + _ = rest.SetKubernetesDefaults(restCfg) + default: + var outOfClusterErr error + restCfg, defaultNS, outOfClusterErr = getOutOfClusterConfig(cfg.Context, cfg.Config) + if restCfg == nil { + if hasInClusterConfig() { + restCfg, defaultNS, err = getInClusterConfig() + if err != nil { + if cfg.Config != "" || cfg.Context != "" { + if outOfClusterErr != nil { + return nil, "", fmt.Errorf("out-of-cluster config error: %v, in-cluster config error: %v", outOfClusterErr, err) + } + return nil, "", fmt.Errorf("in-cluster config is not found") + } + return nil, "", err + } + } else if outOfClusterErr != nil { + return nil, "", outOfClusterErr + } else { + return nil, "", fmt.Errorf("no kubernetes client config found") + } + } + } + + if restCfg == nil { + return nil, "", fmt.Errorf("failed to initialize kubernetes client: no valid configuration found") + } + if cfg.QPS != 0 { + restCfg.QPS = cfg.QPS + } + if cfg.Burst != 0 { + restCfg.Burst = cfg.Burst + } + if cfg.Timeout != 0 { + restCfg.Timeout = cfg.Timeout + } + return restCfg, defaultNS, nil +} + +func newClientConfig(contextName, kubeconfig string) clientcmd.ClientConfig { + rules := clientcmd.NewDefaultClientConfigLoadingRules() + rules.DefaultClientConfig = &clientcmd.DefaultClientConfig + + overrides := &clientcmd.ConfigOverrides{ClusterDefaults: clientcmd.ClusterDefaults} + if contextName != "" { + overrides.CurrentContext = contextName + } + if kubeconfig != "" { + rules.ExplicitPath = kubeconfig + } + return clientcmd.NewNonInteractiveDeferredLoadingClientConfig(rules, overrides) +} + +func getOutOfClusterConfig(contextName, configPath string) (*rest.Config, string, error) { + clientConfig := newClientConfig(contextName, configPath) + + defaultNS, _, err := clientConfig.Namespace() + if err != nil { + return nil, "", fmt.Errorf("cannot determine default kubernetes namespace: %s", err) + } + + config, err := clientConfig.ClientConfig() + if err != nil { + return nil, "", makeOutOfClusterClientConfigError(configPath, contextName, err) + } + return config, defaultNS, nil +} + +func makeOutOfClusterClientConfigError(kubeConfig, kubeContext string, err error) error { + baseErrMsg := "out-of-cluster configuration problem" + if kubeConfig != "" { + baseErrMsg += fmt.Sprintf(", custom kube config path is '%s'", kubeConfig) + } + if kubeContext != "" { + baseErrMsg += fmt.Sprintf(", custom kube context is '%s'", kubeContext) + } + return fmt.Errorf("%s: %s", baseErrMsg, err) +} + +func hasInClusterConfig() bool { + token, _ := fileExists(kubeTokenFilePath) + ns, _ := fileExists(kubeNamespaceFilePath) + return token && ns +} + +func fileExists(path string) (bool, error) { + _, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + return true, nil +} + +func getInClusterConfig() (*rest.Config, string, error) { + config, err := rest.InClusterConfig() + if err != nil { + return nil, "", fmt.Errorf("in-cluster configuration problem: %s", err) + } + + data, err := os.ReadFile(kubeNamespaceFilePath) + if err != nil { + return nil, "", fmt.Errorf("in-cluster configuration problem: cannot determine default kubernetes namespace: error reading %s: %s", kubeNamespaceFilePath, err) + } + return config, string(data), nil +} + +func newCachedDiscovery(config *rest.Config) (discovery.CachedDiscoveryInterface, error) { + if _, inMemory := os.LookupEnv("FLANT_KUBE_CLIENT_IN_MEMORY_DISCOVERY_CACHE"); inMemory { + d, err := discovery.NewDiscoveryClientForConfig(config) + if err != nil { + return nil, err + } + return memory.NewMemCacheClient(d), nil + } + + cacheDiscoveryDir, err := os.MkdirTemp("", "kube-cache-discovery-*") + if err != nil { + return nil, err + } + cacheHTTPDir, err := os.MkdirTemp("", "kube-cache-http-*") + if err != nil { + return nil, err + } + return disk.NewCachedDiscoveryClientForConfig(config, cacheDiscoveryDir, cacheHTTPDir, 10*time.Minute) +} + +func newRESTMapper(cachedDiscovery discovery.CachedDiscoveryInterface, logger *log.Logger) meta.RESTMapper { + mapper := restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscovery) + return restmapper.NewShortcutExpander(mapper, cachedDiscovery, func(warning string) { + if logger != nil { + logger.Warn("warning", slog.String("warning", warning)) + } + }) +} + +func (c *Client) discovery() discovery.DiscoveryInterface { + if c.cachedDiscovery != nil { + return c.cachedDiscovery + } + return c.Discovery() +} + +func (c *Client) apiResourceList(apiVersion string) ([]*metav1.APIResourceList, error) { + if apiVersion == "" { + switch c.discovery().(type) { + case *fakediscovery.FakeDiscovery: + _, res, err := c.discovery().ServerGroupsAndResources() + return res, err + default: + return c.discovery().ServerPreferredResources() + } + } + + gv, err := schema.ParseGroupVersion(apiVersion) + if err != nil { + return nil, fmt.Errorf("apiVersion %q is invalid", apiVersion) + } + + list, err := c.discovery().ServerResourcesForGroupVersion(gv.String()) + if err != nil { + return nil, errors.Wrapf(err, "apiVersion %q has no supported resources in cluster", apiVersion) + } + return []*metav1.APIResourceList{list}, nil +} + +func (c *Client) apiResource(apiVersion, kind string) (*metav1.APIResource, error) { + lists, err := c.APIResourceList(apiVersion) + if err != nil && len(lists) == 0 { + return nil, err + } + + resource := getAPIResourceFromResourceLists(kind, lists) + if resource == nil { + gv, _ := schema.ParseGroupVersion(apiVersion) + return nil, apierrors.NewNotFound(schema.GroupResource{Group: gv.Group, Resource: kind}, "") + } + return resource, nil +} + +func getAPIResourceFromResourceLists(kind string, resourceLists []*metav1.APIResourceList) *metav1.APIResource { + for _, list := range resourceLists { + for _, resource := range list.APIResources { + if len(resource.Verbs) == 0 { + continue + } + if equalLowerCasedToOneOf(kind, append(resource.ShortNames, resource.Kind, resource.Name)...) { + gv, _ := schema.ParseGroupVersion(list.GroupVersion) + resource.Group = gv.Group + resource.Version = gv.Version + return &resource + } + } + } + return nil +} + +func equalLowerCasedToOneOf(term string, choices ...string) bool { + if len(choices) == 0 { + return false + } + lTerm := strings.ToLower(term) + for _, choice := range choices { + if lTerm == strings.ToLower(choice) { + return true + } + } + return false +} diff --git a/pkg/kube/dedupclient/fake.go b/pkg/kube/dedupclient/fake.go new file mode 100644 index 00000000..06e0b210 --- /dev/null +++ b/pkg/kube/dedupclient/fake.go @@ -0,0 +1,71 @@ +package dedupclient + +import ( + "github.com/deckhouse/deckhouse/pkg/log" + "github.com/ldmonster/kubeclient/store" + apixfake "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/fake" + apixv1client "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/discovery" + "k8s.io/client-go/discovery/cached/memory" + "k8s.io/client-go/dynamic" + fakedynamic "k8s.io/client-go/dynamic/fake" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/metadata" + fakemetadata "k8s.io/client-go/metadata/fake" +) + +// NewFake constructs a singleton client backed by client-go fakes. It is meant +// for unit tests that exercise shell-operator code paths using typed/dynamic +// Kubernetes APIs without starting the upstream dedup cache. +func NewFake(gvrToListKind map[schema.GroupVersionResource]string) *Client { + scheme := runtime.NewScheme() + kube := fake.NewSimpleClientset() + dyn := fakedynamic.NewSimpleDynamicClientWithCustomListKinds(scheme, gvrToListKind) + metaClient := fakemetadata.NewSimpleMetadataClient(scheme) + apiExt := apixfake.NewSimpleClientset() + + return &Client{ + Interface: kube, + logger: log.NewNop(), + defaultNS: "default", + dynamicClient: dyn, + apiExtClient: apiExt.ApiextensionsV1(), + metadataClient: metadata.Interface(metaClient), + cachedDiscovery: memory.NewMemCacheClient(kube.Discovery()), + informerStores: make(map[string]store.Store), + done: make(chan struct{}), + } +} + +func NewFromClients(kube kubernetes.Interface, dyn dynamic.Interface, apiExtClient apixv1client.ApiextensionsV1Interface, cachedDiscovery discovery.CachedDiscoveryInterface) *Client { + if cachedDiscovery == nil && kube != nil { + cachedDiscovery = memory.NewMemCacheClient(kube.Discovery()) + } + if apiExtClient == nil { + apiExtClient = apixfake.NewSimpleClientset().ApiextensionsV1() + } + return &Client{ + Interface: kube, + logger: log.NewNop(), + defaultNS: "default", + dynamicClient: dyn, + apiExtClient: apiExtClient, + cachedDiscovery: cachedDiscovery, + informerStores: make(map[string]store.Store), + done: make(chan struct{}), + } +} + +// ReloadDynamic replaces the fake dynamic client with a new resource/list-kind +// mapping. It mirrors the old flant kube-client fake helper used by tests. +func (c *Client) ReloadDynamic(gvrToListKind map[schema.GroupVersionResource]string) { + scheme := runtime.NewScheme() + c.dynamicClient = fakedynamic.NewSimpleDynamicClientWithCustomListKinds(scheme, gvrToListKind) +} + +func (c *Client) ReplaceDynamic(dynamicClient dynamic.Interface) { + c.dynamicClient = dynamicClient +} diff --git a/pkg/kube_events_manager/dedup_informer.go b/pkg/kube_events_manager/dedup_informer.go new file mode 100644 index 00000000..15ae727b --- /dev/null +++ b/pkg/kube_events_manager/dedup_informer.go @@ -0,0 +1,319 @@ +package kubeeventsmanager + +import ( + "context" + "errors" + "fmt" + "io" + "log/slog" + "sync" + "time" + + "github.com/deckhouse/deckhouse/pkg/log" + "github.com/ldmonster/kubeclient/store" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/dynamic" + clientgocache "k8s.io/client-go/tools/cache" +) + +type dedupResourceInformer struct { + index FactoryIndex + client dynamic.Interface + store store.Store + + mu sync.RWMutex + handlers map[string]clientgocache.ResourceEventHandler + hasSynced bool +} + +func newDedupResourceInformer(client dynamic.Interface, index FactoryIndex, dedupStore store.Store) *dedupResourceInformer { + if dedupStore == nil { + dedupStore = store.NewDedupStore() + } + return &dedupResourceInformer{ + index: index, + client: client, + store: dedupStore, + handlers: make(map[string]clientgocache.ResourceEventHandler), + } +} + +func (inf *dedupResourceInformer) addEventHandler(id string, handler clientgocache.ResourceEventHandler) { + inf.mu.Lock() + defer inf.mu.Unlock() + inf.handlers[id] = handler +} + +func (inf *dedupResourceInformer) removeEventHandler(id string) { + inf.mu.Lock() + defer inf.mu.Unlock() + delete(inf.handlers, id) +} + +func (inf *dedupResourceInformer) handlerCount() int { + inf.mu.RLock() + defer inf.mu.RUnlock() + return len(inf.handlers) +} + +func (inf *dedupResourceInformer) synced() bool { + inf.mu.RLock() + defer inf.mu.RUnlock() + return inf.hasSynced +} + +func (inf *dedupResourceInformer) run(ctx context.Context, errorHandler *WatchErrorHandler) error { + resourceVersion, err := inf.list(ctx, true) + if err != nil { + return fmt.Errorf("dedup informer: initial list failed for %s: %w", inf.index.GVR.String(), err) + } + + inf.mu.Lock() + inf.hasSynced = true + inf.mu.Unlock() + + backoff := time.Second + const maxBackoff = 30 * time.Second + + for { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + nextResourceVersion, err := inf.watchLoop(ctx, resourceVersion) + if err == nil { + return ctx.Err() + } + inf.reportWatchError(errorHandler, err) + if nextResourceVersion != "" { + resourceVersion = nextResourceVersion + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(backoff): + } + backoff *= 2 + if backoff > maxBackoff { + backoff = maxBackoff + } + + resourceVersion, err = inf.list(ctx, true) + if err != nil { + inf.reportWatchError(errorHandler, err) + continue + } + backoff = time.Second + } +} + +func (inf *dedupResourceInformer) list(ctx context.Context, notify bool) (string, error) { + listResult, err := inf.resource().List(ctx, inf.listOptions("")) + if err != nil { + return "", fmt.Errorf("list failed: %w", err) + } + + seen := make(map[store.ObjectKey]struct{}, len(listResult.Items)) + for i := range listResult.Items { + obj := listResult.Items[i].DeepCopy() + inf.prepareObject(obj) + key := inf.objectKey(obj) + seen[key] = struct{}{} + + oldObj, hadOld := inf.store.Get(key) + if err := inf.store.Upsert(key, obj); err != nil { + return "", fmt.Errorf("store upsert failed: %w", err) + } + + if notify { + if hadOld { + inf.notifyUpdate(oldObj, obj) + } else { + inf.notifyAdd(obj) + } + } + } + + for _, oldObj := range inf.store.List(inf.index.GVK) { + inf.prepareObject(oldObj) + key := inf.objectKey(oldObj) + if _, ok := seen[key]; ok { + continue + } + if err := inf.store.Delete(key); err != nil { + return "", fmt.Errorf("store delete failed: %w", err) + } + if notify { + inf.notifyDelete(oldObj) + } + } + + return listResult.GetResourceVersion(), nil +} + +func (inf *dedupResourceInformer) watchLoop(ctx context.Context, resourceVersion string) (string, error) { + watcher, err := inf.resource().Watch(ctx, inf.listOptions(resourceVersion)) + if err != nil { + return resourceVersion, fmt.Errorf("watch failed: %w", err) + } + defer watcher.Stop() + + currentResourceVersion := resourceVersion + for { + select { + case <-ctx.Done(): + return currentResourceVersion, nil + case event, ok := <-watcher.ResultChan(): + if !ok { + return currentResourceVersion, io.EOF + } + nextResourceVersion, err := inf.handleWatchEvent(event) + if nextResourceVersion != "" { + currentResourceVersion = nextResourceVersion + } + if err != nil { + return currentResourceVersion, err + } + } + } +} + +func (inf *dedupResourceInformer) handleWatchEvent(event watch.Event) (string, error) { + resourceVersion := resourceVersionOf(event.Object) + + switch event.Type { + case watch.Bookmark: + return resourceVersion, nil + case watch.Error: + return resourceVersion, errorFromWatchObject(event.Object) + } + + obj, ok := event.Object.(*unstructured.Unstructured) + if !ok { + return resourceVersion, fmt.Errorf("unexpected watch object type %T", event.Object) + } + obj = obj.DeepCopy() + inf.prepareObject(obj) + key := inf.objectKey(obj) + + switch event.Type { + case watch.Added: + if err := inf.store.Upsert(key, obj); err != nil { + return resourceVersion, fmt.Errorf("store upsert failed: %w", err) + } + inf.notifyAdd(obj) + case watch.Modified: + oldObj, hadOld := inf.store.Get(key) + if err := inf.store.Upsert(key, obj); err != nil { + return resourceVersion, fmt.Errorf("store upsert failed: %w", err) + } + if hadOld { + inf.notifyUpdate(oldObj, obj) + } else { + inf.notifyAdd(obj) + } + case watch.Deleted: + if err := inf.store.Delete(key); err != nil { + return resourceVersion, fmt.Errorf("store delete failed: %w", err) + } + inf.notifyDelete(obj) + default: + return resourceVersion, fmt.Errorf("unexpected watch event type %q", event.Type) + } + + return resourceVersion, nil +} + +func (inf *dedupResourceInformer) resource() dynamic.ResourceInterface { + resource := inf.client.Resource(inf.index.GVR) + if inf.index.Namespace == "" { + return resource + } + return resource.Namespace(inf.index.Namespace) +} + +func (inf *dedupResourceInformer) listOptions(resourceVersion string) metav1.ListOptions { + options := metav1.ListOptions{ + FieldSelector: inf.index.FieldSelector, + LabelSelector: inf.index.LabelSelector, + } + if resourceVersion != "" { + options.ResourceVersion = resourceVersion + } + return options +} + +func (inf *dedupResourceInformer) objectKey(obj *unstructured.Unstructured) store.ObjectKey { + return store.ObjectKey{ + GVK: inf.index.GVK, + Namespace: obj.GetNamespace(), + Name: obj.GetName(), + } +} + +func (inf *dedupResourceInformer) prepareObject(obj *unstructured.Unstructured) { + obj.SetGroupVersionKind(inf.index.GVK) +} + +func (inf *dedupResourceInformer) handlersSnapshot() []clientgocache.ResourceEventHandler { + inf.mu.RLock() + defer inf.mu.RUnlock() + + handlers := make([]clientgocache.ResourceEventHandler, 0, len(inf.handlers)) + for _, handler := range inf.handlers { + handlers = append(handlers, handler) + } + return handlers +} + +func (inf *dedupResourceInformer) notifyAdd(obj *unstructured.Unstructured) { + for _, handler := range inf.handlersSnapshot() { + handler.OnAdd(obj.DeepCopy(), false) + } +} + +func (inf *dedupResourceInformer) notifyUpdate(oldObj, newObj *unstructured.Unstructured) { + for _, handler := range inf.handlersSnapshot() { + handler.OnUpdate(oldObj.DeepCopy(), newObj.DeepCopy()) + } +} + +func (inf *dedupResourceInformer) notifyDelete(obj *unstructured.Unstructured) { + for _, handler := range inf.handlersSnapshot() { + handler.OnDelete(obj.DeepCopy()) + } +} + +func (inf *dedupResourceInformer) reportWatchError(errorHandler *WatchErrorHandler, err error) { + if errorHandler != nil && err != nil && !errors.Is(err, context.Canceled) { + errorHandler.handler(nil, err) + return + } + if err != nil && !errors.Is(err, context.Canceled) { + log.Debug("dedup informer watch error", slog.String("error", err.Error())) + } +} + +func resourceVersionOf(obj runtime.Object) string { + accessor, err := meta.Accessor(obj) + if err != nil { + return "" + } + return accessor.GetResourceVersion() +} + +func errorFromWatchObject(obj runtime.Object) error { + status, ok := obj.(*metav1.Status) + if !ok { + return fmt.Errorf("watch returned error object %T", obj) + } + return apierrors.FromObject(status) +} diff --git a/pkg/kube_events_manager/dedup_test_helper_test.go b/pkg/kube_events_manager/dedup_test_helper_test.go new file mode 100644 index 00000000..0082aebe --- /dev/null +++ b/pkg/kube_events_manager/dedup_test_helper_test.go @@ -0,0 +1,17 @@ +package kubeeventsmanager + +import ( + "github.com/flant/shell-operator/pkg/kube/dedupclient" + "k8s.io/client-go/discovery/cached/memory" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" +) + +type legacyTestClient interface { + kubernetes.Interface + Dynamic() dynamic.Interface +} + +func testDedupClientFromLegacy(client legacyTestClient) *dedupclient.Client { + return dedupclient.NewFromClients(client, client.Dynamic(), nil, memory.NewMemCacheClient(client.Discovery())) +} diff --git a/pkg/kube_events_manager/factory.go b/pkg/kube_events_manager/factory.go index 95c27c20..a274ce48 100644 --- a/pkg/kube_events_manager/factory.go +++ b/pkg/kube_events_manager/factory.go @@ -2,11 +2,13 @@ package kubeeventsmanager import ( "context" + "fmt" "log/slog" "sync" "time" "github.com/deckhouse/deckhouse/pkg/log" + "github.com/ldmonster/kubeclient/store" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/wait" @@ -24,6 +26,7 @@ const ( var DefaultSyncTime = 100 * time.Millisecond type FactoryIndex struct { + GVK schema.GroupVersionKind GVR schema.GroupVersionResource Namespace string FieldSelector string @@ -32,6 +35,7 @@ type FactoryIndex struct { type Factory struct { shared dynamicinformer.DynamicSharedInformerFactory + dedup *dedupResourceInformer handlerRegistrations map[string]cache.ResourceEventHandlerRegistration ctx context.Context cancel context.CancelFunc @@ -43,6 +47,8 @@ type FactoryStore struct { mu sync.Mutex data map[FactoryIndex]*Factory stoppedCh map[FactoryIndex]chan struct{} + useDedup bool + storeFor func(FactoryIndex) store.Store } func NewFactoryStore() *FactoryStore { @@ -53,6 +59,13 @@ func NewFactoryStore() *FactoryStore { return fs } +func (c *FactoryStore) EnableDedupInformers(storeFor func(FactoryIndex) store.Store) { + c.mu.Lock() + defer c.mu.Unlock() + c.useDedup = true + c.storeFor = storeFor +} + func (c *FactoryStore) Reset() { c.mu.Lock() defer c.mu.Unlock() @@ -60,10 +73,11 @@ func (c *FactoryStore) Reset() { c.stoppedCh = make(map[FactoryIndex]chan struct{}) } -func (c *FactoryStore) add(ctx context.Context, index FactoryIndex, f dynamicinformer.DynamicSharedInformerFactory) { +func (c *FactoryStore) add(ctx context.Context, index FactoryIndex, f dynamicinformer.DynamicSharedInformerFactory, dedup *dedupResourceInformer) { ctx, cancel := context.WithCancel(ctx) c.data[index] = &Factory{ shared: f, + dedup: dedup, handlerRegistrations: make(map[string]cache.ResourceEventHandlerRegistration), ctx: ctx, cancel: cancel, @@ -82,6 +96,11 @@ func (c *FactoryStore) get(ctx context.Context, client dynamic.Interface, index return f } + if c.useDedup { + c.add(ctx, index, nil, newDedupResourceInformer(client, index, c.dedupStore(index))) + return c.data[index] + } + // define resyncPeriod for informer resyncPeriod := randomizedResyncPeriod() @@ -98,17 +117,32 @@ func (c *FactoryStore) get(ctx context.Context, client dynamic.Interface, index client, resyncPeriod, index.Namespace, tweakListOptions) factory.ForResource(index.GVR) - c.add(ctx, index, factory) + c.add(ctx, index, factory, nil) return c.data[index] } +func (c *FactoryStore) dedupStore(index FactoryIndex) store.Store { + if c.storeFor != nil { + return c.storeFor(index) + } + return store.NewDedupStore() +} + +func factoryIndexStoreKey(index FactoryIndex) string { + return fmt.Sprintf("%s|%s|%s|%s", index.GVR.String(), index.Namespace, index.FieldSelector, index.LabelSelector) +} + func (c *FactoryStore) Start(ctx context.Context, informerId string, client dynamic.Interface, index FactoryIndex, handler cache.ResourceEventHandler, errorHandler *WatchErrorHandler) error { c.mu.Lock() defer c.mu.Unlock() factory := c.get(ctx, client, index) + if c.useDedup && factory.dedup != nil { + return c.startDedupInformer(ctx, informerId, factory, index, handler, errorHandler) + } + informer := factory.shared.ForResource(index.GVR).Informer() // Add error handler, ignore "already started" error. _ = informer.SetWatchErrorHandler(errorHandler.handler) @@ -154,6 +188,45 @@ func (c *FactoryStore) Start(ctx context.Context, informerId string, client dyna return nil } +func (c *FactoryStore) startDedupInformer(ctx context.Context, informerId string, factory *Factory, index FactoryIndex, handler cache.ResourceEventHandler, errorHandler *WatchErrorHandler) error { + factory.dedup.addEventHandler(informerId, handler) + + log.Debug("Factory store: increased usage counter of the dedup factory", + slog.Int(pkg.LogKeyValue, factory.dedup.handlerCount()), + slog.String(pkg.LogKeyNamespace, index.Namespace), slog.String(pkg.LogKeyGVR, index.GVR.String())) + + if factory.done == nil { + factory.done = make(chan struct{}) + + go func() { + defer close(factory.done) + err := factory.dedup.run(factory.ctx, errorHandler) + if err != nil && err != context.Canceled { + log.Warn("Factory store: dedup informer exited with error", + slog.String(pkg.LogKeyNamespace, index.Namespace), + slog.String(pkg.LogKeyGVR, index.GVR.String()), + log.Err(err)) + } + + log.Debug("Factory store: dedup informer goroutine exited", + slog.String(pkg.LogKeyNamespace, index.Namespace), slog.String(pkg.LogKeyGVR, index.GVR.String())) + }() + } + + if !factory.dedup.synced() { + if err := wait.PollUntilContextCancel(ctx, DefaultSyncTime, true, func(_ context.Context) (bool, error) { + return factory.dedup.synced(), nil + }); err != nil { + return err + } + } + + log.Debug("Factory store: started dedup informer", + slog.String(pkg.LogKeyNamespace, index.Namespace), slog.String(pkg.LogKeyGVR, index.GVR.String())) + + return nil +} + func (c *FactoryStore) Stop(informerId string, index FactoryIndex) { c.mu.Lock() f, ok := c.data[index] @@ -163,6 +236,40 @@ func (c *FactoryStore) Stop(informerId string, index FactoryIndex) { return } + if f.dedup != nil { + f.dedup.removeEventHandler(informerId) + + log.Debug("Factory store: decreased usage counter of the dedup factory", + slog.Int(pkg.LogKeyValue, f.dedup.handlerCount()), + slog.String(pkg.LogKeyNamespace, index.Namespace), slog.String(pkg.LogKeyGVR, index.GVR.String())) + + if f.dedup.handlerCount() == 0 { + log.Debug("Factory store: last handler removed, canceling dedup informer", + slog.String(pkg.LogKeyNamespace, index.Namespace), slog.String(pkg.LogKeyGVR, index.GVR.String())) + + done := f.done + f.cancel() + c.mu.Unlock() + if done != nil { + <-done + } + + c.mu.Lock() + delete(c.data, index) + + log.Debug("Factory store: deleted dedup factory", + slog.String(pkg.LogKeyNamespace, index.Namespace), slog.String(pkg.LogKeyGVR, index.GVR.String())) + + if ch, ok := c.stoppedCh[index]; ok { + close(ch) + delete(c.stoppedCh, index) + } + } + + c.mu.Unlock() + return + } + if handlerRegistration, found := f.handlerRegistrations[informerId]; found { err := f.shared.ForResource(index.GVR).Informer().RemoveEventHandler(handlerRegistration) if err != nil { diff --git a/pkg/kube_events_manager/kube_events_manager.go b/pkg/kube_events_manager/kube_events_manager.go index 871cd763..2fc94a87 100644 --- a/pkg/kube_events_manager/kube_events_manager.go +++ b/pkg/kube_events_manager/kube_events_manager.go @@ -9,8 +9,8 @@ import ( "github.com/deckhouse/deckhouse/pkg/log" metricsstorage "github.com/deckhouse/deckhouse/pkg/metrics-storage" + "github.com/ldmonster/kubeclient/store" - klient "github.com/flant/kube-client/client" "github.com/flant/shell-operator/pkg" "github.com/flant/shell-operator/pkg/kube/dedupclient" kemtypes "github.com/flant/shell-operator/pkg/kube_events_manager/types" @@ -50,6 +50,9 @@ type KubeEventsManager interface { // Passing nil restores the default (no dedup) behaviour. Must be // called before any AddMonitor call to take effect for that monitor. WithSnapshotStore(store *dedupclient.SnapshotStore) + // WithDedupClient switches subsequently-created Kubernetes binding + // informers to the dedup-backed implementation. A nil client is a no-op. + WithDedupClient(client *dedupclient.Client) Stop() Wait() } @@ -59,7 +62,7 @@ type kubeEventsManager struct { // channel to emit KubeEvent objects KubeEventCh chan kemtypes.KubeEvent - KubeClient *klient.Client + KubeClient *dedupclient.Client ctx context.Context cancel context.CancelFunc @@ -71,6 +74,7 @@ type kubeEventsManager struct { // created Monitor. Reading and writing this field is serialised via // the m mutex because Monitor construction happens under m. snapshotStore *dedupclient.SnapshotStore + dedupClient *dedupclient.Client m sync.RWMutex Monitors map[string]Monitor @@ -82,7 +86,7 @@ type kubeEventsManager struct { var _ KubeEventsManager = (*kubeEventsManager)(nil) // NewKubeEventsManager returns an implementation of KubeEventsManager. -func NewKubeEventsManager(ctx context.Context, client *klient.Client, logger *log.Logger) *kubeEventsManager { +func NewKubeEventsManager(ctx context.Context, client *dedupclient.Client, logger *log.Logger) *kubeEventsManager { cctx, cancel := context.WithCancel(ctx) em := &kubeEventsManager{ ctx: cctx, @@ -110,6 +114,18 @@ func (mgr *kubeEventsManager) WithSnapshotStore(snapshotStore *dedupclient.Snaps mgr.snapshotStore = snapshotStore } +func (mgr *kubeEventsManager) WithDedupClient(client *dedupclient.Client) { + if client == nil { + return + } + mgr.m.Lock() + defer mgr.m.Unlock() + mgr.dedupClient = client + mgr.factoryStore.EnableDedupInformers(func(index FactoryIndex) store.Store { + return client.InformerStore(factoryIndexStoreKey(index)) + }) +} + // AddMonitor creates a monitor with informers and return a KubeEvent with existing objects. // TODO cleanup informers in case of error // TODO use Context to stop informers diff --git a/pkg/kube_events_manager/kube_events_manager_test.go b/pkg/kube_events_manager/kube_events_manager_test.go index 72104cc4..9bb9c5de 100644 --- a/pkg/kube_events_manager/kube_events_manager_test.go +++ b/pkg/kube_events_manager/kube_events_manager_test.go @@ -14,8 +14,8 @@ import ( "k8s.io/apimachinery/pkg/version" fakediscovery "k8s.io/client-go/discovery/fake" - klient "github.com/flant/kube-client/client" "github.com/flant/shell-operator/pkg" + "github.com/flant/shell-operator/pkg/kube/dedupclient" kemtypes "github.com/flant/shell-operator/pkg/kube_events_manager/types" ) @@ -25,7 +25,7 @@ func Test_MainKubeEventsManager_Run(t *testing.T) { gvrToListKind := map[schema.GroupVersionResource]string{ {Group: "", Version: "v1", Resource: "pods"}: "PodList", } - kubeClient := klient.NewFake(gvrToListKind) + kubeClient := dedupclient.NewFake(gvrToListKind) fakeDiscovery, ok := kubeClient.Discovery().(*fakediscovery.FakeDiscovery) if !ok { @@ -93,7 +93,7 @@ func Test_MainKubeEventsManager_HandleEvents(t *testing.T) { defer cancel() // Add GVR - kubeClient := klient.NewFake(nil) + kubeClient := dedupclient.NewFake(nil) fakeDiscovery, ok := kubeClient.Discovery().(*fakediscovery.FakeDiscovery) if !ok { t.Fatalf("couldn't convert Discovery() to *FakeDiscovery") @@ -245,7 +245,7 @@ func Test_FakeClient_CatchUpdates(t *testing.T) { defer cancel() // Add GVR - kubeClient := klient.NewFake(nil) + kubeClient := dedupclient.NewFake(nil) fakeDiscovery, ok := kubeClient.Discovery().(*fakediscovery.FakeDiscovery) if !ok { t.Fatalf("couldn't convert Discovery() to *FakeDiscovery") @@ -421,7 +421,7 @@ func Test_FakeClient_CatchUpdates(t *testing.T) { // instance owns its own FactoryStore so parallel tests do not share informer state. func TestNewKubeEventsManager_IsolatedFactoryStore(t *testing.T) { ctx := context.Background() - kubeClient := klient.NewFake(nil) + kubeClient := dedupclient.NewFake(nil) mgr1 := NewKubeEventsManager(ctx, kubeClient, log.NewNop()) mgr2 := NewKubeEventsManager(ctx, kubeClient, log.NewNop()) @@ -437,3 +437,15 @@ func TestNewKubeEventsManager_IsolatedFactoryStore(t *testing.T) { t.Fatal("mgr1 and mgr2 must not share the same factoryStore") } } + +func TestKubeEventsManager_WithDedupClient_EnablesDedupInformers(t *testing.T) { + ctx := context.Background() + kubeClient := dedupclient.NewFake(nil) + + mgr := NewKubeEventsManager(ctx, kubeClient, log.NewNop()) + assert.False(t, mgr.factoryStore.useDedup) + + mgr.WithDedupClient(&dedupclient.Client{}) + + assert.True(t, mgr.factoryStore.useDedup) +} diff --git a/pkg/kube_events_manager/monitor.go b/pkg/kube_events_manager/monitor.go index ff3e64a5..738d5e04 100644 --- a/pkg/kube_events_manager/monitor.go +++ b/pkg/kube_events_manager/monitor.go @@ -10,7 +10,6 @@ import ( "github.com/deckhouse/deckhouse/pkg/log" metricsstorage "github.com/deckhouse/deckhouse/pkg/metrics-storage" - klient "github.com/flant/kube-client/client" pkg "github.com/flant/shell-operator/pkg" "github.com/flant/shell-operator/pkg/kube/dedupclient" kemtypes "github.com/flant/shell-operator/pkg/kube_events_manager/types" @@ -32,7 +31,7 @@ type Monitor interface { type monitor struct { Name string Config *MonitorConfig - KubeClient *klient.Client + KubeClient *dedupclient.Client // Static list of informers ResourceInformers []*resourceInformer // Namespace informer to get new namespaces @@ -141,7 +140,7 @@ func (c *cancelForNs) Delete(key string) { var _ Monitor = (*monitor)(nil) -func NewMonitor(ctx context.Context, client *klient.Client, mstor metricsstorage.Storage, factoryStore *FactoryStore, config *MonitorConfig, eventCb func(kemtypes.KubeEvent), logger *log.Logger) *monitor { +func NewMonitor(ctx context.Context, client *dedupclient.Client, mstor metricsstorage.Storage, factoryStore *FactoryStore, config *MonitorConfig, eventCb func(kemtypes.KubeEvent), logger *log.Logger) *monitor { cctx, cancel := context.WithCancel(ctx) return &monitor{ diff --git a/pkg/kube_events_manager/monitor_snapshot_store_test.go b/pkg/kube_events_manager/monitor_snapshot_store_test.go index 490770a2..5aa9d1fd 100644 --- a/pkg/kube_events_manager/monitor_snapshot_store_test.go +++ b/pkg/kube_events_manager/monitor_snapshot_store_test.go @@ -50,7 +50,7 @@ func Test_Monitor_SnapshotStore_BackingPath(t *testing.T) { snapshotStore := dedupclient.NewSnapshotStore(log.NewNop()) - mon := NewMonitor(context.Background(), fc.Client, metricStorage, NewFactoryStore(), monitorCfg, func(_ kemtypes.KubeEvent) {}, log.NewNop()) + mon := NewMonitor(context.Background(), testDedupClientFromLegacy(fc.Client), metricStorage, NewFactoryStore(), monitorCfg, func(_ kemtypes.KubeEvent) {}, log.NewNop()) mon.WithSnapshotStore(snapshotStore) require.NoError(t, mon.CreateInformers()) @@ -107,7 +107,7 @@ func Test_Monitor_SnapshotStore_DefaultPath(t *testing.T) { metricStorage.HistogramObserveMock.Set(func(_ string, _ float64, _ map[string]string, _ []float64) {}) metricStorage.GaugeSetMock.When(metrics.KubeSnapshotObjects, 1, map[string]string(nil)).Then() - mon := NewMonitor(context.Background(), fc.Client, metricStorage, NewFactoryStore(), monitorCfg, func(_ kemtypes.KubeEvent) {}, log.NewNop()) + mon := NewMonitor(context.Background(), testDedupClientFromLegacy(fc.Client), metricStorage, NewFactoryStore(), monitorCfg, func(_ kemtypes.KubeEvent) {}, log.NewNop()) require.NoError(t, mon.CreateInformers()) mon.Start(context.TODO()) diff --git a/pkg/kube_events_manager/monitor_test.go b/pkg/kube_events_manager/monitor_test.go index 11c9cea5..5306da3b 100644 --- a/pkg/kube_events_manager/monitor_test.go +++ b/pkg/kube_events_manager/monitor_test.go @@ -7,8 +7,10 @@ import ( "testing" "github.com/deckhouse/deckhouse/pkg/log" + "github.com/ldmonster/kubeclient/store" . "github.com/onsi/gomega" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -58,7 +60,7 @@ func Test_Monitor_should_handle_dynamic_ns_events(t *testing.T) { metricStorage.GaugeSetMock.When(metrics.KubeSnapshotObjects, 2, map[string]string(nil)).Then() metricStorage.GaugeSetMock.When(metrics.KubeSnapshotObjects, 3, map[string]string(nil)).Then() - mon := NewMonitor(context.Background(), fc.Client, metricStorage, NewFactoryStore(), monitorCfg, func(ev kemtypes.KubeEvent) { + mon := NewMonitor(context.Background(), testDedupClientFromLegacy(fc.Client), metricStorage, NewFactoryStore(), monitorCfg, func(ev kemtypes.KubeEvent) { objsMutex.Lock() objsFromEvents = append(objsFromEvents, snapshotResourceIDs(ev.Objects)...) objsMutex.Unlock() @@ -156,6 +158,68 @@ func Test_Monitor_should_handle_dynamic_ns_events(t *testing.T) { ShouldNot(BeTrue(), "Should not create informer for non-matched Namespace") } +func Test_Monitor_UsesDedupInformerFactory(t *testing.T) { + g := NewWithT(t) + fc := fake.NewFakeCluster(fake.ClusterVersionV121) + + createNsWithLabels(fc, "default", nil) + createCM(fc, "default", testCM("initial-cm")) + + monitorCfg := &MonitorConfig{ + ApiVersion: "v1", + Kind: "ConfigMap", + EventTypes: []kemtypes.WatchEventType{kemtypes.WatchEventAdded}, + NamespaceSelector: &kemtypes.NamespaceSelector{ + NameSelector: &kemtypes.NameSelector{ + MatchNames: []string{"default"}, + }, + }, + } + + metricStorage := metric.NewStorageMock(t) + metricStorage.HistogramObserveMock.Optional().Set(func(_ string, _ float64, _ map[string]string, _ []float64) {}) + metricStorage.GaugeSetMock.Optional().Set(func(_ string, _ float64, _ map[string]string) {}) + + var objsMutex sync.Mutex + objsFromEvents := make([]string, 0) + + factoryStore := NewFactoryStore() + kubeClient := testDedupClientFromLegacy(fc.Client) + factoryStore.EnableDedupInformers(func(index FactoryIndex) store.Store { + return kubeClient.InformerStore(factoryIndexStoreKey(index)) + }) + mon := NewMonitor(context.Background(), kubeClient, metricStorage, factoryStore, monitorCfg, func(ev kemtypes.KubeEvent) { + objsMutex.Lock() + objsFromEvents = append(objsFromEvents, snapshotResourceIDs(ev.Objects)...) + objsMutex.Unlock() + }, log.NewNop()) + + require.NoError(t, mon.CreateInformers()) + mon.Start(context.TODO()) + defer func() { + mon.Stop() + mon.Wait() + }() + + factoryStore.mu.Lock() + for _, factory := range factoryStore.data { + assert.NotNil(t, factory.dedup) + assert.Nil(t, factory.shared) + } + factoryStore.mu.Unlock() + + g.Expect(snapshotResourceIDs(mon.Snapshot())).Should(ContainElement("default/ConfigMap/initial-cm")) + + mon.EnableKubeEventCb() + createCM(fc, "default", testCM("event-cm")) + + g.Eventually(func() []string { + objsMutex.Lock() + defer objsMutex.Unlock() + return append([]string(nil), objsFromEvents...) + }, "5s", "10ms").Should(ContainElement("default/ConfigMap/event-cm")) +} + func createNsWithLabels(fc *fake.Cluster, name string, labels map[string]string) { nsObj := &corev1.Namespace{} nsObj.SetName(name) diff --git a/pkg/kube_events_manager/namespace_informer.go b/pkg/kube_events_manager/namespace_informer.go index 2c205c0e..2d7c4683 100644 --- a/pkg/kube_events_manager/namespace_informer.go +++ b/pkg/kube_events_manager/namespace_informer.go @@ -15,8 +15,8 @@ import ( corev1 "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/tools/cache" - klient "github.com/flant/kube-client/client" pkg "github.com/flant/shell-operator/pkg" + "github.com/flant/shell-operator/pkg/kube/dedupclient" ) const ( @@ -29,7 +29,7 @@ type namespaceInformer struct { stopped bool done chan struct{} - KubeClient *klient.Client + KubeClient *dedupclient.Client Monitor *MonitorConfig SharedInformer cache.SharedInformer @@ -39,7 +39,7 @@ type namespaceInformer struct { delFn func(string) } -func NewNamespaceInformer(ctx context.Context, client *klient.Client, monitor *MonitorConfig) *namespaceInformer { +func NewNamespaceInformer(ctx context.Context, client *dedupclient.Client, monitor *MonitorConfig) *namespaceInformer { cctx, cancel := context.WithCancel(ctx) informer := &namespaceInformer{ diff --git a/pkg/kube_events_manager/resource_informer.go b/pkg/kube_events_manager/resource_informer.go index 2d636472..1eeacabb 100644 --- a/pkg/kube_events_manager/resource_informer.go +++ b/pkg/kube_events_manager/resource_informer.go @@ -18,7 +18,6 @@ import ( "github.com/ldmonster/kubeclient/store" - klient "github.com/flant/kube-client/client" pkg "github.com/flant/shell-operator/pkg" "github.com/flant/shell-operator/pkg/kube/dedupclient" kemtypes "github.com/flant/shell-operator/pkg/kube_events_manager/types" @@ -28,7 +27,7 @@ import ( type resourceInformer struct { id string - KubeClient *klient.Client + KubeClient *dedupclient.Client Monitor *MonitorConfig // Filter by namespace Namespace string @@ -85,7 +84,7 @@ type resourceInformer struct { // resourceInformer should implement ResourceInformer type resourceInformerConfig struct { - client *klient.Client + client *dedupclient.Client mstor metricsstorage.Storage factoryStore *FactoryStore snapshotStore *dedupclient.SnapshotStore @@ -161,6 +160,11 @@ func (ei *resourceInformer) createSharedInformer() error { } ei.FactoryIndex = FactoryIndex{ + GVK: schema.GroupVersionKind{ + Group: ei.GroupVersionResource.Group, + Version: ei.GroupVersionResource.Version, + Kind: ei.Monitor.Kind, + }, GVR: ei.GroupVersionResource, Namespace: ei.Namespace, FieldSelector: ei.ListOptions.FieldSelector, diff --git a/pkg/metric/adapter.go b/pkg/metric/adapter.go deleted file mode 100644 index eec2845d..00000000 --- a/pkg/metric/adapter.go +++ /dev/null @@ -1,61 +0,0 @@ -package metric - -import ( - "github.com/deckhouse/deckhouse/pkg/log" - metricsstorage "github.com/deckhouse/deckhouse/pkg/metrics-storage" - "github.com/prometheus/client_golang/prometheus" - - klient "github.com/flant/kube-client/client" - utils "github.com/flant/shell-operator/pkg/utils/labels" -) - -// this adapter is used for kube-client -// it's deprecated, but don't know how to use it without breaking changes -// -//nolint:staticcheck -var _ klient.MetricStorage = (*MetricsAdapter)(nil) - -type MetricsAdapter struct { - Storage metricsstorage.Storage - Logger *log.Logger -} - -func NewMetricsAdapter(storage metricsstorage.Storage, logger *log.Logger) *MetricsAdapter { - return &MetricsAdapter{Storage: storage, Logger: logger} -} - -// RegisterCounter registers a counter using the external storage -func (a *MetricsAdapter) RegisterCounter(metric string, labels map[string]string) *prometheus.CounterVec { - // Use external storage to register the counter - labelNames := utils.LabelNames(labels) - _, err := a.Storage.RegisterCounter(metric, labelNames) - if err != nil { - a.Logger.Warn("failed to register counter metric", log.Err(err)) - } - - return nil -} - -// CounterAdd adds a value to a counter using the external storage -func (a *MetricsAdapter) CounterAdd(metric string, value float64, labels map[string]string) { - // Use external storage for the actual counter operation - a.Storage.CounterAdd(metric, value, labels) -} - -// RegisterHistogram registers a histogram using the external storage -func (a *MetricsAdapter) RegisterHistogram(metric string, labels map[string]string, buckets []float64) *prometheus.HistogramVec { - // Use external storage to register the histogram - labelNames := utils.LabelNames(labels) - _, err := a.Storage.RegisterHistogram(metric, labelNames, buckets) - if err != nil { - a.Logger.Warn("failed to register histogram metric", log.Err(err)) - } - - return nil -} - -// HistogramObserve observes a value in a histogram using the external storage -func (a *MetricsAdapter) HistogramObserve(metric string, value float64, labels map[string]string, buckets []float64) { - // Use external storage for the actual histogram operation - a.Storage.HistogramObserve(metric, value, labels, buckets) -} diff --git a/pkg/shell-operator/bootstrap.go b/pkg/shell-operator/bootstrap.go index 37525605..dd6e8664 100644 --- a/pkg/shell-operator/bootstrap.go +++ b/pkg/shell-operator/bootstrap.go @@ -12,6 +12,7 @@ import ( "github.com/flant/shell-operator/pkg/debug" "github.com/flant/shell-operator/pkg/filter/jq" "github.com/flant/shell-operator/pkg/hook" + objectpatch "github.com/flant/shell-operator/pkg/kube/object_patch" kubeeventsmanager "github.com/flant/shell-operator/pkg/kube_events_manager" "github.com/flant/shell-operator/pkg/metrics" schedulemanager "github.com/flant/shell-operator/pkg/schedule_manager" @@ -122,7 +123,7 @@ func Init(ctx context.Context, cfg *app.Config, logger *log.Logger) (*ShellOpera // KubeClientConfigs from cfg and delegates to AssembleCommonOperator. The // derivation reads only the supplied *app.Config — no environment variables // are consulted on this path, so the values you put into cfg are the values -// shell-operator uses. See kubeClientConfigsFromAppConfig for the exact +// shell-operator uses. See kubeClientConfigFromAppConfig for the exact // field mapping. // // kubeEventsManagerLabels are the metric labels for the kube-events manager; @@ -133,9 +134,9 @@ func Init(ctx context.Context, cfg *app.Config, logger *log.Logger) (*ShellOpera // defaults) — useful for tests. func (op *ShellOperator) AssembleCommonOperatorFromConfig(cfg *app.Config, kubeEventsManagerLabels []string) error { listenAddress, listenPort := listenAddrFromAppConfig(cfg) - mainKubeCfg, patcherKubeCfg := kubeClientConfigsFromAppConfig(cfg) + kubeCfg := kubeClientConfigFromAppConfig(cfg) dedupCfg := dedupClientConfigFromAppConfig(cfg) - return op.AssembleCommonOperatorWithDedupClient(listenAddress, listenPort, kubeEventsManagerLabels, mainKubeCfg, patcherKubeCfg, dedupCfg) + return op.AssembleCommonOperator(listenAddress, listenPort, kubeEventsManagerLabels, kubeCfg, dedupCfg) } // listenAddrFromAppConfig returns the HTTP server listen address/port from cfg @@ -148,38 +149,27 @@ func listenAddrFromAppConfig(cfg *app.Config) (string, string) { return cfg.App.ListenAddress, cfg.App.ListenPort } -// kubeClientConfigsFromAppConfig derives the main and object-patcher -// KubeClientConfigs from an *app.Config. The function is pure: it does not +// kubeClientConfigFromAppConfig derives the singleton KubeClientConfig from +// an *app.Config. The function is pure: it does not // touch the process environment, so any value present in cfg is used as-is // and library consumers can rely on env vars never overriding their config. -// A nil cfg yields two zero KubeClientConfig values (in-cluster defaults). -func kubeClientConfigsFromAppConfig(cfg *app.Config) (KubeClientConfig, KubeClientConfig) { +// A nil cfg yields a zero KubeClientConfig value (in-cluster defaults). +func kubeClientConfigFromAppConfig(cfg *app.Config) KubeClientConfig { if cfg == nil { - return KubeClientConfig{}, KubeClientConfig{} + return KubeClientConfig{} } - mainKubeCfg := KubeClientConfig{ + return KubeClientConfig{ Context: cfg.Kube.Context, Config: cfg.Kube.Config, QPS: cfg.Kube.ClientQPS, Burst: cfg.Kube.ClientBurst, MetricPrefix: cfg.App.PrometheusMetricsPrefix, } - patcherKubeCfg := KubeClientConfig{ - Context: cfg.Kube.Context, - Config: cfg.Kube.Config, - QPS: cfg.ObjectPatcher.KubeClientQPS, - Burst: cfg.ObjectPatcher.KubeClientBurst, - Timeout: cfg.ObjectPatcher.KubeClientTimeout, - MetricPrefix: "object_patcher_", - } - return mainKubeCfg, patcherKubeCfg } // dedupClientConfigFromAppConfig pulls deduplicated-kubeclient settings out -// of an *app.Config. A nil cfg (or one where DedupClient.Enabled is false) -// returns a zero-valued DedupClientConfig, which initDedupClient then -// recognises as "do not construct". Keeping this as a pure helper makes the -// derivation easy to unit-test without standing up a full operator. +// of an *app.Config. Enabled is preserved for compatibility with config +// parsing, but the singleton client is always constructed. func dedupClientConfigFromAppConfig(cfg *app.Config) DedupClientConfig { if cfg == nil { return DedupClientConfig{} @@ -194,29 +184,7 @@ func dedupClientConfigFromAppConfig(cfg *app.Config) DedupClientConfig { } } -// AssembleCommonOperator instantiates common dependencies used by both -// shell-operator and its derivatives (e.g. addon-operator). -// Requires listenAddress and listenPort to run the HTTP server for operator APIs. -// kubeCfg provides Kubernetes connection settings for the main client and -// object patcher; pass KubeClientConfig{} to fall back to in-cluster defaults. -// -// This method preserves the pre-deduplicated-kubeclient signature for -// backwards compatibility: it delegates to AssembleCommonOperatorWithDedupClient -// with a disabled DedupClientConfig, so behaviour is unchanged. Use the -// DedupClient-aware variant (or AssembleCommonOperatorFromConfig) to enable -// the deduplicated cache. -// -// For library consumers that already hold an *app.Config, prefer -// AssembleCommonOperatorFromConfig instead of unpacking fields by hand. -func (op *ShellOperator) AssembleCommonOperator(listenAddress, listenPort string, kubeEventsManagerLabels []string, mainKubeCfg, patcherKubeCfg KubeClientConfig) error { - return op.AssembleCommonOperatorWithDedupClient(listenAddress, listenPort, kubeEventsManagerLabels, mainKubeCfg, patcherKubeCfg, DedupClientConfig{}) -} - -// AssembleCommonOperatorWithDedupClient is the full assembly entry point that -// also constructs the optional deduplicated kubeclient. Pass a zero -// DedupClientConfig (or one with Enabled=false) to keep behaviour identical -// to AssembleCommonOperator. -func (op *ShellOperator) AssembleCommonOperatorWithDedupClient(listenAddress, listenPort string, kubeEventsManagerLabels []string, mainKubeCfg, patcherKubeCfg KubeClientConfig, dedupCfg DedupClientConfig) error { +func (op *ShellOperator) AssembleCommonOperator(listenAddress, listenPort string, kubeEventsManagerLabels []string, kubeCfg KubeClientConfig, dedupCfg DedupClientConfig) error { op.APIServer = newBaseHTTPServer(listenAddress, listenPort) // built-in metrics @@ -228,31 +196,15 @@ func (op *ShellOperator) AssembleCommonOperatorWithDedupClient(listenAddress, li // metrics from user's hooks op.setupHookMetricStorage() - // 'main' Kubernetes client. - op.KubeClient, err = initDefaultMainKubeClient(mainKubeCfg, op.MetricStorage, op.logger) - if err != nil { - return err - } - - // ObjectPatcher with a separate Kubernetes client. - op.ObjectPatcher, err = initDefaultObjectPatcher(patcherKubeCfg, op.MetricStorage, op.logger.Named("object-patcher")) - if err != nil { - return err + if op.KubeClient == nil { + op.KubeClient, err = initSingletonKubeClient(kubeCfg, dedupCfg, op.logger.Named("kube-client")) + if err != nil { + return err + } } - // Optional deduplicated kubeclient. Reuses the rest.Config + RESTMapper - // derived from the main kube client so users only configure connection - // details once. - op.DedupClient, err = initDedupClient(op.KubeClient, dedupCfg, op.logger.Named("dedup-kube-client")) - if err != nil { - return err - } + op.ObjectPatcher = objectpatch.NewObjectPatcher(op.KubeClient, op.logger.Named("object-patcher")) - // Optional deduplicated SnapshotStore. Constructed independently of - // DedupClient because the two solve different problems: DedupClient - // is a kubeclient instance for hooks/extensions, SnapshotStore is the - // shared backing for kube-events-manager monitors. Either, both, or - // neither may be active. op.SnapshotStore = initSnapshotStore(dedupCfg, op.logger.Named("dedup-snapshot-store")) op.SetupEventManagers() @@ -320,6 +272,7 @@ func (op *ShellOperator) SetupEventManagers() { // Initialize kubernetes events manager. op.KubeEventsManager = kubeeventsmanager.NewKubeEventsManager(op.ctx, op.KubeClient, op.logger.Named("kube-events-manager")) + op.KubeEventsManager.WithDedupClient(op.KubeClient) op.KubeEventsManager.WithMetricStorage(op.MetricStorage) // Initialize events handler that emit tasks to run hooks diff --git a/pkg/shell-operator/bootstrap_test.go b/pkg/shell-operator/bootstrap_test.go index 83eb25e3..568de4ec 100644 --- a/pkg/shell-operator/bootstrap_test.go +++ b/pkg/shell-operator/bootstrap_test.go @@ -9,11 +9,7 @@ import ( "github.com/flant/shell-operator/pkg/app" ) -// TestKubeClientConfigsFromAppConfig_DerivedFromConfig is a basic check that -// the per-client KubeClientConfigs are built straight from the supplied -// *app.Config (including the special "object_patcher_" metric prefix for the -// patcher client). -func TestKubeClientConfigsFromAppConfig_DerivedFromConfig(t *testing.T) { +func TestKubeClientConfigFromAppConfig_DerivedFromConfig(t *testing.T) { cfg := &app.Config{ App: app.AppSettings{ ListenAddress: "127.0.0.1", @@ -34,40 +30,32 @@ func TestKubeClientConfigsFromAppConfig_DerivedFromConfig(t *testing.T) { } addr, port := listenAddrFromAppConfig(cfg) - main, patcher := kubeClientConfigsFromAppConfig(cfg) + kubeCfg := kubeClientConfigFromAppConfig(cfg) assert.Equal(t, "127.0.0.1", addr) assert.Equal(t, "9000", port) - assert.Equal(t, "explicit-ctx", main.Context) - assert.Equal(t, "/explicit/kubeconfig", main.Config) - assert.InDelta(t, float32(42), main.QPS, 0.001) - assert.Equal(t, 84, main.Burst) - assert.Equal(t, time.Duration(0), main.Timeout) - assert.Equal(t, "embedded_", main.MetricPrefix) - - assert.Equal(t, "explicit-ctx", patcher.Context) - assert.Equal(t, "/explicit/kubeconfig", patcher.Config) - assert.InDelta(t, float32(11), patcher.QPS, 0.001) - assert.Equal(t, 22, patcher.Burst) - assert.Equal(t, 7*time.Second, patcher.Timeout) - assert.Equal(t, "object_patcher_", patcher.MetricPrefix) + assert.Equal(t, "explicit-ctx", kubeCfg.Context) + assert.Equal(t, "/explicit/kubeconfig", kubeCfg.Config) + assert.InDelta(t, float32(42), kubeCfg.QPS, 0.001) + assert.Equal(t, 84, kubeCfg.Burst) + assert.Equal(t, time.Duration(0), kubeCfg.Timeout) + assert.Equal(t, "embedded_", kubeCfg.MetricPrefix) } -// TestKubeClientConfigsFromAppConfig_NilCfg verifies the nil-cfg fallback -// yields zero KubeClientConfig values (which klient treats as in-cluster +// TestKubeClientConfigFromAppConfig_NilCfg verifies the nil-cfg fallback +// yields a zero KubeClientConfig value (which means in-cluster // defaults) without consulting the environment. -func TestKubeClientConfigsFromAppConfig_NilCfg(t *testing.T) { +func TestKubeClientConfigFromAppConfig_NilCfg(t *testing.T) { t.Setenv("KUBE_CONTEXT", "env-ctx") t.Setenv("SHELL_OPERATOR_LISTEN_PORT", "7777") addr, port := listenAddrFromAppConfig(nil) - main, patcher := kubeClientConfigsFromAppConfig(nil) + kubeCfg := kubeClientConfigFromAppConfig(nil) assert.Empty(t, addr) assert.Empty(t, port) - assert.Equal(t, KubeClientConfig{}, main) - assert.Equal(t, KubeClientConfig{}, patcher) + assert.Equal(t, KubeClientConfig{}, kubeCfg) } // TestAssembleFromConfig_EnvDoesNotOverrideConfig is the regression test that @@ -110,26 +98,18 @@ func TestAssembleFromConfig_EnvDoesNotOverrideConfig(t *testing.T) { } addr, port := listenAddrFromAppConfig(cfg) - main, patcher := kubeClientConfigsFromAppConfig(cfg) + kubeCfg := kubeClientConfigFromAppConfig(cfg) // Listen address/port must come from cfg, not env. assert.Equal(t, "127.0.0.1", addr) assert.Equal(t, "9000", port) // Main client values come from cfg.Kube / cfg.App. - assert.Equal(t, "lib-ctx", main.Context) - assert.Equal(t, "/lib/kubeconfig", main.Config) - assert.InDelta(t, float32(1), main.QPS, 0.001) - assert.Equal(t, 2, main.Burst) - assert.Equal(t, "lib_prefix_", main.MetricPrefix) - - // Patcher client values come from cfg.ObjectPatcher. - assert.Equal(t, "lib-ctx", patcher.Context) - assert.Equal(t, "/lib/kubeconfig", patcher.Config) - assert.InDelta(t, float32(3), patcher.QPS, 0.001) - assert.Equal(t, 4, patcher.Burst) - assert.Equal(t, 5*time.Second, patcher.Timeout) - assert.Equal(t, "object_patcher_", patcher.MetricPrefix) + assert.Equal(t, "lib-ctx", kubeCfg.Context) + assert.Equal(t, "/lib/kubeconfig", kubeCfg.Config) + assert.InDelta(t, float32(1), kubeCfg.QPS, 0.001) + assert.Equal(t, 2, kubeCfg.Burst) + assert.Equal(t, "lib_prefix_", kubeCfg.MetricPrefix) // And the source-of-truth cfg itself is untouched by the call. assert.Equal(t, "127.0.0.1", cfg.App.ListenAddress) diff --git a/pkg/shell-operator/debug_server.go b/pkg/shell-operator/debug_server.go index e7197746..bfaf6a60 100644 --- a/pkg/shell-operator/debug_server.go +++ b/pkg/shell-operator/debug_server.go @@ -86,9 +86,7 @@ func (op *ShellOperator) RegisterDebugHookRoutes(dbgSrv *debug.Server) { } // RegisterDebugDedupClientRoutes exposes a small JSON snapshot of the -// deduplicated-kubeclient state on the debug server. The route is registered -// even when the dedup client is disabled, returning a clear "disabled" -// payload so probes can distinguish "not configured" from "errored". +// singleton deduplicated kubeclient state on the debug server. func (op *ShellOperator) RegisterDebugDedupClientRoutes(dbgSrv *debug.Server) { dbgSrv.RegisterHandler(http.MethodGet, "/dedup-client/status.{format:(json|yaml|text)}", func(_ *http.Request) (interface{}, error) { payload := map[string]any{ @@ -99,19 +97,19 @@ func (op *ShellOperator) RegisterDebugDedupClientRoutes(dbgSrv *debug.Server) { }) } -// clientStatus reports the status of the runtime DedupClient. +// clientStatus reports the status of the singleton dedup client. func clientStatus(op *ShellOperator) map[string]any { - if op.DedupClient == nil { + if op.KubeClient == nil { return map[string]any{ "enabled": false, - "reason": "DedupClient is not configured (set --dedup-client-enabled or $DEDUP_CLIENT_ENABLED)", + "reason": "KubeClient is not configured", } } // Cache wide synchronisation status — best-effort, capped at 0 // timeout so the probe never blocks the debug server. ctx, cancel := context.WithTimeout(context.Background(), 0) defer cancel() - synced := op.DedupClient.WaitForCacheSync(ctx) + synced := op.KubeClient.WaitForCacheSync(ctx) return map[string]any{ "enabled": true, "cacheSyncedHint": synced, diff --git a/pkg/shell-operator/kube_client.go b/pkg/shell-operator/kube_client.go index 41b905ca..9d79f7ac 100644 --- a/pkg/shell-operator/kube_client.go +++ b/pkg/shell-operator/kube_client.go @@ -6,20 +6,9 @@ import ( "time" "github.com/deckhouse/deckhouse/pkg/log" - metricsstorage "github.com/deckhouse/deckhouse/pkg/metrics-storage" "k8s.io/apimachinery/pkg/runtime/schema" - klient "github.com/flant/kube-client/client" - pkg "github.com/flant/shell-operator/pkg" "github.com/flant/shell-operator/pkg/kube/dedupclient" - objectpatch "github.com/flant/shell-operator/pkg/kube/object_patch" - "github.com/flant/shell-operator/pkg/metric" - utils "github.com/flant/shell-operator/pkg/utils/labels" -) - -var ( - defaultMainKubeClientMetricLabels = map[string]string{pkg.MetricKeyComponent: "main"} - defaultObjectPatcherKubeClientMetricLabels = map[string]string{pkg.MetricKeyComponent: "object_patcher"} ) // KubeClientConfig holds explicit connection settings for a Kubernetes client, @@ -33,61 +22,10 @@ type KubeClientConfig struct { MetricPrefix string } -// defaultMainKubeClient creates a Kubernetes client for hooks. No timeout specified, because -// timeout will reset connections for Watchers. -func defaultMainKubeClient(cfg KubeClientConfig, metricStorage metricsstorage.Storage, metricLabels map[string]string, logger *log.Logger) *klient.Client { - client := klient.New(klient.WithLogger(logger)) - client.WithContextName(cfg.Context) - client.WithConfigPath(cfg.Config) - client.WithRateLimiterSettings(cfg.QPS, cfg.Burst) - client.WithMetricStorage(metric.NewMetricsAdapter(metricStorage, logger.Named("kube-client-metrics-adapter"))) - client.WithMetricLabels(utils.DefaultIfEmpty(metricLabels, defaultMainKubeClientMetricLabels)) - client.WithMetricPrefix(cfg.MetricPrefix) - - return client -} - -func initDefaultMainKubeClient(kubeCfg KubeClientConfig, metricStorage metricsstorage.Storage, logger *log.Logger) (*klient.Client, error) { - //nolint:staticcheck - kubeClient := defaultMainKubeClient(kubeCfg, metricStorage, defaultMainKubeClientMetricLabels, logger.Named("main-kube-client")) - err := kubeClient.Init() - if err != nil { - return nil, fmt.Errorf("initialize 'main' Kubernetes client: %s\n", err) - } - return kubeClient, nil -} - -// defaultObjectPatcherKubeClient initializes a Kubernetes client for ObjectPatcher. Timeout is specified here. -func defaultObjectPatcherKubeClient(cfg KubeClientConfig, metricStorage metricsstorage.Storage, metricLabels map[string]string, logger *log.Logger) *klient.Client { - client := klient.New(klient.WithLogger(logger)) - client.WithContextName(cfg.Context) - client.WithConfigPath(cfg.Config) - client.WithRateLimiterSettings(cfg.QPS, cfg.Burst) - client.WithMetricStorage(metric.NewMetricsAdapter(metricStorage, logger.Named("kube-client-metrics-adapter"))) - client.WithMetricLabels(utils.DefaultIfEmpty(metricLabels, defaultObjectPatcherKubeClientMetricLabels)) - client.WithMetricPrefix(cfg.MetricPrefix) - if cfg.Timeout > 0 { - client.WithTimeout(cfg.Timeout) - } - return client -} - -func initDefaultObjectPatcher(kubeCfg KubeClientConfig, metricStorage metricsstorage.Storage, logger *log.Logger) (*objectpatch.ObjectPatcher, error) { - patcherKubeClient := defaultObjectPatcherKubeClient(kubeCfg, metricStorage, defaultObjectPatcherKubeClientMetricLabels, logger.Named("object-patcher-kube-client")) - err := patcherKubeClient.Init() - if err != nil { - return nil, fmt.Errorf("initialize Kubernetes client for Object patcher: %s\n", err) - } - return objectpatch.NewObjectPatcher(patcherKubeClient, logger), nil -} - -// DedupClientConfig consolidates the parameters needed to build the optional -// deduplicated kubeclient on top of an already initialised *klient.Client. It -// mirrors app.Config.DedupClient but stays decoupled from the app package so -// addon-operator and other library consumers can populate it directly. +// DedupClientConfig consolidates the parameters for the singleton +// deduplicated Kubernetes client. Enabled is retained only for deprecated +// configuration compatibility; the singleton is always constructed. type DedupClientConfig struct { - // Enabled toggles construction of the deduplicated client. When false, - // initDedupClient returns (nil, nil) and the operator runs as before. Enabled bool // Namespaces restricts the cache to this list of namespaces. Empty @@ -97,7 +35,7 @@ type DedupClientConfig struct { // WatchGVKs is a list of GVK strings to pre-register with the cache. // Each entry follows the form "//"; the group // is empty for core resources (e.g. "/v1/Pod"). Malformed entries - // cause initDedupClient to return an error so misconfiguration is + // cause initSingletonKubeClient to return an error so misconfiguration is // caught at startup rather than silently ignored. WatchGVKs []string @@ -108,48 +46,23 @@ type DedupClientConfig struct { // SnapshotStore toggles construction of the process-wide deduplicated // SnapshotStore. The store backs per-monitor object caches; turning it - // on is the change that actually moves the RSS needle for workloads - // with many similar objects (the runtime DedupClient itself does not - // affect kube-events-manager memory). Independent of Enabled. + // on deduplicates monitor snapshots, while Enabled moves informer + // list/watch storage to the dedup-backed path. Independent of Enabled. SnapshotStore bool } -// initDedupClient constructs an optional deduplicated kubeclient using the -// rest.Config and RESTMapper exposed by an already-initialised KubeClient. -// Returning (nil, nil) when cfg.Enabled is false keeps the call site at the -// assembly layer simple — it only has to nil-check the result. -func initDedupClient(kubeClient *klient.Client, cfg DedupClientConfig, logger *log.Logger) (*dedupclient.Client, error) { - if !cfg.Enabled { - return nil, nil - } - if kubeClient == nil { - return nil, fmt.Errorf("initialize dedup kubeclient: main kube client is nil") - } - - restCfg := kubeClient.RestConfig() - if restCfg == nil { - return nil, fmt.Errorf("initialize dedup kubeclient: main kube client has no rest.Config (is it initialised?)") - } - - mapper, mapperErr := kubeClient.ToRESTMapper() - if mapperErr != nil { - // Fall through with a nil mapper; kubeclient will use its built-in - // default. The error is logged so misconfiguration is visible but - // non-fatal — many operators run fine with the default mapper. - logger.Warn("could not derive RESTMapper from main kube client; "+ - "dedup kubeclient will fall back to the default in-memory mapper", - log.Err(mapperErr)) - mapper = nil - } - +func initSingletonKubeClient(kubeCfg KubeClientConfig, cfg DedupClientConfig, logger *log.Logger) (*dedupclient.Client, error) { gvks, err := parseGVKs(cfg.WatchGVKs) if err != nil { - return nil, fmt.Errorf("initialize dedup kubeclient: parse watched GVKs: %w", err) + return nil, fmt.Errorf("initialize singleton kubeclient: parse watched GVKs: %w", err) } dedupCfg := dedupclient.Config{ - RESTConfig: restCfg, - RESTMapper: mapper, + Context: kubeCfg.Context, + Config: kubeCfg.Config, + QPS: kubeCfg.QPS, + Burst: kubeCfg.Burst, + Timeout: kubeCfg.Timeout, Namespaces: cfg.Namespaces, WatchGVKs: gvks, ReconstructLRUSize: cfg.ReconstructLRUSize, @@ -158,7 +71,7 @@ func initDedupClient(kubeClient *klient.Client, cfg DedupClientConfig, logger *l c, err := dedupclient.New(dedupCfg, logger) if err != nil { - return nil, fmt.Errorf("initialize dedup kubeclient: %w", err) + return nil, fmt.Errorf("initialize singleton kubeclient: %w", err) } return c, nil } diff --git a/pkg/shell-operator/kube_client_dedup_test.go b/pkg/shell-operator/kube_client_dedup_test.go index 8a4e14db..ddbfe492 100644 --- a/pkg/shell-operator/kube_client_dedup_test.go +++ b/pkg/shell-operator/kube_client_dedup_test.go @@ -118,37 +118,14 @@ func TestDedupClientConfigFromAppConfig_PassThrough(t *testing.T) { assert.Equal(t, 30*time.Second, got.GCInterval) } -func TestInitDedupClient_DisabledReturnsNil(t *testing.T) { +func TestInitSingletonKubeClient_RejectsMalformedGVK(t *testing.T) { t.Parallel() - c, err := initDedupClient(nil, DedupClientConfig{Enabled: false}, nil) - require.NoError(t, err) - assert.Nil(t, c, "Enabled=false must skip construction even when kubeClient is nil") -} - -func TestInitDedupClient_EnabledRequiresKubeClient(t *testing.T) { - t.Parallel() - - _, err := initDedupClient(nil, DedupClientConfig{Enabled: true}, nil) - require.Error(t, err) - assert.Contains(t, err.Error(), "main kube client is nil") -} - -func TestInitDedupClient_RejectsMalformedGVK(t *testing.T) { - t.Parallel() - - // We don't need a real kube client for this test path because parseGVKs - // is invoked before any rest.Config is touched. But initDedupClient does - // dereference kubeClient before parsing — so we short-circuit by - // constructing a request that fails at the GVK-parse step. cfg := DedupClientConfig{ Enabled: true, WatchGVKs: []string{"not-a-valid-gvk"}, } - _, err := initDedupClient(nil, cfg, nil) + _, err := initSingletonKubeClient(KubeClientConfig{}, cfg, nil) require.Error(t, err) - // The "main kube client is nil" branch fires first, which is the - // stronger guarantee at this layer; parse-level rejection is exercised - // directly by TestParseGVKs above. - assert.Contains(t, err.Error(), "main kube client is nil") + assert.Contains(t, err.Error(), "parse watched GVKs") } diff --git a/pkg/shell-operator/operator.go b/pkg/shell-operator/operator.go index d41a4db8..15a9a30b 100644 --- a/pkg/shell-operator/operator.go +++ b/pkg/shell-operator/operator.go @@ -25,7 +25,6 @@ import ( "github.com/gofrs/uuid/v5" "go.opentelemetry.io/otel" - klient "github.com/flant/kube-client/client" pkg "github.com/flant/shell-operator/pkg" "github.com/flant/shell-operator/pkg/hook" bindingcontext "github.com/flant/shell-operator/pkg/hook/binding_context" @@ -66,22 +65,14 @@ type ShellOperator struct { // HookMetricStorage separate metric storage for metrics, which are returned by user hooks HookMetricStorage metricsstorage.Storage - KubeClient *klient.Client + KubeClient *dedupclient.Client ObjectPatcher *objectpatch.ObjectPatcher - // DedupClient is the optional controller-runtime compatible Kubernetes - // client backed by a deduplicated cache (github.com/ldmonster/kubeclient). - // It is nil unless the deduplicated client is enabled at assembly time - // (see app.Config.DedupClient and AssembleCommonOperator). When non-nil, - // it is started in op.Start() and stopped during op.Shutdown(). - DedupClient *dedupclient.Client - // SnapshotStore is the optional process-wide deduplicated cache that // backs every kubernetes-binding monitor's per-object snapshot. When // non-nil it is wired into the KubeEventsManager so resourceInformers // store `*Unstructured` bodies once (refcounted) instead of per-monitor. - // Enabled via app.Config.DedupClient.SnapshotStore. Independent of - // DedupClient: either, both, or neither may be active. + // Enabled via app.Config.DedupClient.SnapshotStore. SnapshotStore *dedupclient.SnapshotStore ScheduleManager schedulemanager.ScheduleManager @@ -109,6 +100,12 @@ func WithLogger(logger *log.Logger) Option { } } +func WithKubeClient(client *dedupclient.Client) Option { + return func(operator *ShellOperator) { + operator.KubeClient = client + } +} + func NewShellOperator(ctx context.Context, metricsStorage, hookMetricStorage metricsstorage.Storage, opts ...Option) *ShellOperator { cctx, cancel := context.WithCancel(ctx) @@ -147,11 +144,10 @@ func (op *ShellOperator) Start() { op.APIServer.Start(op.ctx) - // Spin up the deduplicated kubeclient cache before any consumer asks - // for it. Failure to start is logged but non-fatal: the rest of the - // operator can still operate via the existing KubeClient/ObjectPatcher. - if op.DedupClient != nil { - if err := op.DedupClient.Start(op.ctx); err != nil { + // Spin up the singleton deduplicated kubeclient cache before any + // consumer asks for it. + if op.KubeClient != nil { + if err := op.KubeClient.Start(op.ctx); err != nil { op.logger.Error("start dedup kubeclient cache", log.Err(err)) } } @@ -897,9 +893,9 @@ func (op *ShellOperator) Shutdown() { // the dedup cache run loop, but Shutdown is also called from paths that // don't always invoke Stop(). Trigger an explicit, time-bounded shutdown // so the goroutine is guaranteed to exit even on direct Shutdown() use. - if op.DedupClient != nil { + if op.KubeClient != nil { shutdownCtx, cancel := context.WithTimeout(context.Background(), WaitQueuesTimeout) - if err := op.DedupClient.Shutdown(shutdownCtx); err != nil { + if err := op.KubeClient.Shutdown(shutdownCtx); err != nil { op.logger.Warn("dedup kubeclient cache did not shut down cleanly", slog.String(pkg.LogKeyPhase, "shutdown"), log.Err(err)) diff --git a/pkg/webhook/admission/manager.go b/pkg/webhook/admission/manager.go index 7858541a..a792983e 100644 --- a/pkg/webhook/admission/manager.go +++ b/pkg/webhook/admission/manager.go @@ -6,7 +6,7 @@ import ( "github.com/deckhouse/deckhouse/pkg/log" - klient "github.com/flant/kube-client/client" + "github.com/flant/shell-operator/pkg/kube/dedupclient" "github.com/flant/shell-operator/pkg/webhook/server" ) @@ -21,7 +21,7 @@ const DefaultConfigurationId = "hooks" // - Call AddWebhook for every binding in hooks // - Start() to run server and create ValidatingWebhookConfiguration/MutatingWebhookConfiguration type WebhookManager struct { - KubeClient *klient.Client + KubeClient *dedupclient.Client Settings *WebhookSettings Namespace string @@ -44,7 +44,7 @@ func WithLogger(logger *log.Logger) Option { } } -func NewWebhookManager(kubeClient *klient.Client, opts ...Option) *WebhookManager { +func NewWebhookManager(kubeClient *dedupclient.Client, opts ...Option) *WebhookManager { manager := &WebhookManager{ ValidatingResources: make(map[string]*ValidatingWebhookResource), MutatingResources: make(map[string]*MutatingWebhookResource), diff --git a/pkg/webhook/admission/resource.go b/pkg/webhook/admission/resource.go index f6d6b8af..a626f67b 100644 --- a/pkg/webhook/admission/resource.go +++ b/pkg/webhook/admission/resource.go @@ -10,12 +10,12 @@ import ( v1 "k8s.io/api/admissionregistration/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - klient "github.com/flant/kube-client/client" "github.com/flant/shell-operator/pkg" + "github.com/flant/shell-operator/pkg/kube/dedupclient" ) type WebhookResourceOptions struct { - KubeClient *klient.Client + KubeClient *dedupclient.Client Namespace string ConfigurationName string ServiceName string diff --git a/pkg/webhook/conversion/crd_client_config.go b/pkg/webhook/conversion/crd_client_config.go index af355486..0342705d 100644 --- a/pkg/webhook/conversion/crd_client_config.go +++ b/pkg/webhook/conversion/crd_client_config.go @@ -8,13 +8,13 @@ import ( extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - klient "github.com/flant/kube-client/client" "github.com/flant/shell-operator/pkg" + "github.com/flant/shell-operator/pkg/kube/dedupclient" ) // A clientConfig for a particular CRD. type CrdClientConfig struct { - KubeClient *klient.Client + KubeClient *dedupclient.Client CrdName string Namespace string ServiceName string diff --git a/pkg/webhook/conversion/manager.go b/pkg/webhook/conversion/manager.go index 83e8ce04..940b0db1 100644 --- a/pkg/webhook/conversion/manager.go +++ b/pkg/webhook/conversion/manager.go @@ -8,7 +8,7 @@ import ( "github.com/deckhouse/deckhouse/pkg/log" v1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" - klient "github.com/flant/kube-client/client" + "github.com/flant/shell-operator/pkg/kube/dedupclient" "github.com/flant/shell-operator/pkg/webhook/server" ) @@ -27,7 +27,7 @@ type EventHandlerFn func(ctx context.Context, cdrName string, request *v1.Conver // - Start server loop. // - Update clientConfig in each registered CRD. type WebhookManager struct { - KubeClient *klient.Client + KubeClient *dedupclient.Client EventHandlerFn EventHandlerFn Settings *WebhookSettings diff --git a/test/hook/context/generator.go b/test/hook/context/generator.go index 254ebd86..34461626 100644 --- a/test/hook/context/generator.go +++ b/test/hook/context/generator.go @@ -15,8 +15,10 @@ import ( bctx "github.com/flant/shell-operator/pkg/hook/binding_context" "github.com/flant/shell-operator/pkg/hook/controller" "github.com/flant/shell-operator/pkg/hook/types" + "github.com/flant/shell-operator/pkg/kube/dedupclient" kubeeventsmanager "github.com/flant/shell-operator/pkg/kube_events_manager" schedulemanager "github.com/flant/shell-operator/pkg/schedule_manager" + "k8s.io/client-go/discovery/cached/memory" ) func init() { @@ -39,6 +41,7 @@ type BindingContextController struct { ScheduleManager schedulemanager.ScheduleManager fakeCluster *fake.Cluster + kubeClient *dedupclient.Client mu sync.Mutex started atomic.Bool @@ -64,7 +67,8 @@ func NewBindingContextController(config string, logger *log.Logger, version ...f logger: logger, } - b.KubeEventsManager = kubeeventsmanager.NewKubeEventsManager(ctx, b.fakeCluster.Client, b.logger.Named("kube-events-manager")) + b.kubeClient = dedupclient.NewFromClients(fc.Client, fc.Client.Dynamic(), nil, memory.NewMemCacheClient(fc.Client.Discovery())) + b.KubeEventsManager = kubeeventsmanager.NewKubeEventsManager(ctx, b.kubeClient, b.logger.Named("kube-events-manager")) b.KubeEventsManager.WithMetricStorage(metricsstorage.NewMetricStorage( metricsstorage.WithLogger(log.NewNop()), )) @@ -88,6 +92,7 @@ func (b *BindingContextController) FakeCluster() *fake.Cluster { // RegisterCRD registers custom resources for the cluster func (b *BindingContextController) RegisterCRD(group, version, kind string, namespaced bool) { b.fakeCluster.RegisterCRD(group, version, kind, namespaced) + b.kubeClient.ReplaceDynamic(b.fakeCluster.Client.Dynamic()) } // Run generates binding contexts for hook tests diff --git a/test/integration/suite/run.go b/test/integration/suite/run.go index 77dc651a..89b8e113 100644 --- a/test/integration/suite/run.go +++ b/test/integration/suite/run.go @@ -8,7 +8,7 @@ import ( "testing" "github.com/deckhouse/deckhouse/pkg/log" - klient "github.com/flant/kube-client/client" + "github.com/flant/shell-operator/pkg/kube/dedupclient" objectpatch "github.com/flant/shell-operator/pkg/kube/object_patch" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -17,7 +17,7 @@ import ( var ( ClusterName string ContextName string - KubeClient *klient.Client + KubeClient *dedupclient.Client ObjectPatcher *objectpatch.ObjectPatcher ) @@ -31,9 +31,8 @@ func RunIntegrationSuite(t *testing.T, description string, clusterPrefix string) var _ = BeforeSuite(func() { // Initialize kube client out-of-cluster - KubeClient = klient.New(klient.WithLogger(log.NewNop())) - KubeClient.WithContextName(ContextName) - err := KubeClient.Init() + var err error + KubeClient, err = dedupclient.New(dedupclient.Config{Context: ContextName}, log.NewNop()) Expect(err).ShouldNot(HaveOccurred()) ObjectPatcher = objectpatch.NewObjectPatcher(KubeClient, log.NewNop()) From 583a237bcc85d0cc4ecf655691789bcc23eb23bf Mon Sep 17 00:00:00 2001 From: Pavel Okhlopkov Date: Mon, 1 Jun 2026 22:39:52 +0300 Subject: [PATCH 4/4] replace jsons for binary checksums Signed-off-by: Pavel Okhlopkov --- pkg/kube_events_manager/filter.go | 19 ++---- pkg/utils/checksum/checksum.go | 101 ++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 13 deletions(-) diff --git a/pkg/kube_events_manager/filter.go b/pkg/kube_events_manager/filter.go index 134b38ae..d4366af5 100644 --- a/pkg/kube_events_manager/filter.go +++ b/pkg/kube_events_manager/filter.go @@ -12,7 +12,6 @@ import ( "github.com/flant/shell-operator/pkg/filter" kemtypes "github.com/flant/shell-operator/pkg/kube_events_manager/types" utils_checksum "github.com/flant/shell-operator/pkg/utils/checksum" - json "github.com/flant/shell-operator/pkg/utils/json" ) // applyFilter filters object json representation with a pre-compiled jq expression, @@ -36,24 +35,18 @@ func applyFilter(compiledFilter filter.CompiledFilter, jqFilterStr string, filte return nil, fmt.Errorf("filterFn (%s) contains an error: %v", runtime.FuncForPC(reflect.ValueOf(filterFn).Pointer()).Name(), err) } - filteredBytes, err := json.Marshal(filteredObj) - if err != nil { - return nil, err - } - res.FilterResult = filteredObj - res.Metadata.Checksum = utils_checksum.CalculateChecksumOfBytes(filteredBytes) + // Checksum the filter result directly from its in-memory value + // instead of round-tripping it through encoding/json. + res.Metadata.Checksum = utils_checksum.CalculateChecksumOfObject(filteredObj) return res, nil } - // Render obj to JSON text to apply jq filter. + // Checksum the object's decomposed content directly; the informers flow + // must not marshal objects to JSON just to detect changes. if compiledFilter == nil { - data, err := json.Marshal(obj) - if err != nil { - return nil, err - } - res.Metadata.Checksum = utils_checksum.CalculateChecksumOfBytes(data) + res.Metadata.Checksum = utils_checksum.CalculateChecksumOfObject(obj.UnstructuredContent()) } else { filtered, err := compiledFilter.Apply(obj.UnstructuredContent()) if err != nil { diff --git a/pkg/utils/checksum/checksum.go b/pkg/utils/checksum/checksum.go index f7852aec..7d1656fe 100644 --- a/pkg/utils/checksum/checksum.go +++ b/pkg/utils/checksum/checksum.go @@ -2,11 +2,14 @@ package checksum import ( "crypto/md5" + "encoding/binary" "encoding/hex" + "fmt" "hash" "os" "path/filepath" "sort" + "strconv" "sync" ) @@ -14,6 +17,104 @@ var md5Pool = sync.Pool{ New: func() any { return md5.New() }, } +// CalculateChecksumOfObject computes a deterministic MD5 hex digest of an +// arbitrary value without serializing it to JSON. It is intended for the +// informers flow, where objects are already decomposed `map[string]interface{}` +// trees (unstructured content) and re-marshalling them to JSON purely to +// checksum them is wasteful. Map keys are visited in sorted order so the +// digest is stable across runs, and every scalar/composite is length-prefixed +// and type-tagged so structurally different values cannot collide. +func CalculateChecksumOfObject(v interface{}) string { + h := md5Pool.Get().(hash.Hash) + h.Reset() + writeValueChecksum(h, v) + sum := hex.EncodeToString(h.Sum(nil)) + md5Pool.Put(h) + return sum +} + +// type tags keep structurally different values from colliding in the digest. +const ( + tagNil = 0x00 + tagBool = 0x01 + tagString = 0x02 + tagInt = 0x03 + tagFloat = 0x04 + tagMap = 0x05 + tagSlice = 0x06 + tagOther = 0x07 +) + +func writeValueChecksum(h hash.Hash, v interface{}) { + switch t := v.(type) { + case nil: + _, _ = h.Write([]byte{tagNil}) + case bool: + b := byte(0) + if t { + b = 1 + } + _, _ = h.Write([]byte{tagBool, b}) + case string: + _, _ = h.Write([]byte{tagString}) + writeLenString(h, t) + case int64: + _, _ = h.Write([]byte{tagInt}) + writeLenString(h, strconv.FormatInt(t, 10)) + case int: + writeValueChecksum(h, int64(t)) + case int32: + writeValueChecksum(h, int64(t)) + case uint: + writeValueChecksum(h, int64(t)) + case uint32: + writeValueChecksum(h, int64(t)) + case uint64: + writeValueChecksum(h, int64(t)) + case float64: + _, _ = h.Write([]byte{tagFloat}) + writeLenString(h, strconv.FormatFloat(t, 'g', -1, 64)) + case float32: + writeValueChecksum(h, float64(t)) + case map[string]interface{}: + _, _ = h.Write([]byte{tagMap}) + writeUint(h, uint64(len(t))) + keys := make([]string, 0, len(t)) + for k := range t { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + writeLenString(h, k) + writeValueChecksum(h, t[k]) + } + case []interface{}: + _, _ = h.Write([]byte{tagSlice}) + writeUint(h, uint64(len(t))) + for _, item := range t { + writeValueChecksum(h, item) + } + default: + // Fall back to a stable textual rendering for values that are not + // part of the standard unstructured content (e.g. Go-hook filter + // results that return typed structs). This stays deterministic + // without round-tripping through encoding/json. + _, _ = h.Write([]byte{tagOther}) + writeLenString(h, fmt.Sprintf("%+v", t)) + } +} + +func writeLenString(h hash.Hash, s string) { + writeUint(h, uint64(len(s))) + _, _ = h.Write([]byte(s)) +} + +func writeUint(h hash.Hash, n uint64) { + var buf [8]byte + binary.BigEndian.PutUint64(buf[:], n) + _, _ = h.Write(buf[:]) +} + func CalculateChecksum(stringArr ...string) string { h := md5Pool.Get().(hash.Hash) h.Reset()