From 90dac58111cc342af3ae52c52b8864eccd854a48 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 11 May 2026 11:48:13 -0700 Subject: [PATCH 01/73] Port channel send/wait helpers to TestEnv (#10159) ## What changed? Port `WaitForChannel` and `SendToChannel` to TestEnv. ## Why? Safer / simpler. --- tests/activity_api_pause_test.go | 12 ++++++------ tests/activity_api_reset_test.go | 10 +++++----- tests/activity_api_update_test.go | 4 ++-- tests/nexus_workflow_test.go | 4 ++-- tests/testcore/functional_test_base.go | 2 ++ tests/testcore/test_env.go | 20 ++++++++++++++++++++ 6 files changed, 37 insertions(+), 15 deletions(-) diff --git a/tests/activity_api_pause_test.go b/tests/activity_api_pause_test.go index 2cffca6e895..63b6fabbb63 100644 --- a/tests/activity_api_pause_test.go +++ b/tests/activity_api_pause_test.go @@ -62,7 +62,7 @@ func (s *ActivityAPIPauseClientTestSuite) TestActivityPauseApi_WhileRunning() { activityFunction := func() (string, error) { startedActivityCount.Add(1) if startedActivityCount.Load() == 1 { - env.WaitForChannel(ctx, activityPausedCn) + env.WaitForChannel(activityPausedCn) return "", activityErr } return "done!", nil @@ -115,7 +115,7 @@ func (s *ActivityAPIPauseClientTestSuite) TestActivityPauseApi_WhileRunning() { }, 5*time.Second, 500*time.Millisecond) // unblock the activity - env.SendToChannel(ctx, activityPausedCn) + env.SendToChannel(activityPausedCn) // make sure activity is paused on server and completed on the worker s.EventuallyWithT(func(t *assert.CollectT) { description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) @@ -208,7 +208,7 @@ func (s *ActivityAPIPauseClientTestSuite) TestActivityPauseApi_IncreaseAttemptsO activityFunction := func() (string, error) { startedActivityCount.Add(1) if startedActivityCount.Load() == 1 { - env.WaitForChannel(ctx, activityPausedCn) + env.WaitForChannel(activityPausedCn) return "", activityErr } if shouldSucceed.Load() { @@ -264,7 +264,7 @@ func (s *ActivityAPIPauseClientTestSuite) TestActivityPauseApi_IncreaseAttemptsO }, 5*time.Second, 500*time.Millisecond) // End the activity - env.SendToChannel(ctx, activityPausedCn) + env.SendToChannel(activityPausedCn) s.EventuallyWithT(func(t *assert.CollectT) { description, err := env.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) @@ -559,7 +559,7 @@ func (s *ActivityAPIPauseClientTestSuite) TestActivityPauseApi_WithReset() { activityErr := errors.New("bad-luck-please-retry") return "", activityErr } - env.WaitForChannel(ctx, activityCompleteCn) + env.WaitForChannel(activityCompleteCn) return "done!", nil } @@ -632,7 +632,7 @@ func (s *ActivityAPIPauseClientTestSuite) TestActivityPauseApi_WithReset() { }, 5*time.Second, 100*time.Millisecond) // let activity finish - env.SendToChannel(ctx, activityCompleteCn) + env.SendToChannel(activityCompleteCn) // wait for workflow to finish var out string diff --git a/tests/activity_api_reset_test.go b/tests/activity_api_reset_test.go index 0e96bfa0010..b6d656d0219 100644 --- a/tests/activity_api_reset_test.go +++ b/tests/activity_api_reset_test.go @@ -87,7 +87,7 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_AfterRetry() { return "", activityErr } - env.WaitForChannel(ctx, activityCompleteCh) + env.WaitForChannel(activityCompleteCh) return "done!", nil } @@ -160,7 +160,7 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_WhileRunning() { var startedActivityCount atomic.Int32 activityFunction := func() (string, error) { startedActivityCount.Add(1) - env.WaitForChannel(ctx, activityCompleteCh) + env.WaitForChannel(activityCompleteCh) return "done!", nil } @@ -242,7 +242,7 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_InRetry() { return "", activityErr } - env.WaitForChannel(ctx, activityCompleteCh) + env.WaitForChannel(activityCompleteCh) return "done!", nil } @@ -322,7 +322,7 @@ func (s *ActivityApiResetClientTestSuite) TestActivityResetApi_KeepPaused() { return "", activityErr } - env.WaitForChannel(ctx, activityCompleteCh) + env.WaitForChannel(activityCompleteCh) return "done!", nil } @@ -462,7 +462,7 @@ func (s *ActivityApiResetClientTestSuite) TestActivityReset_HeartbeatDetails() { return "", errors.New("bad-luck-please-retry") } // not the first iteration - env.WaitForChannel(ctx, activityCompleteCh) + env.WaitForChannel(activityCompleteCh) for activityShouldFinish.Load() == false { activity.RecordHeartbeat(ctx, "second") time.Sleep(time.Second) //nolint:forbidigo diff --git a/tests/activity_api_update_test.go b/tests/activity_api_update_test.go index 13fd9a5e1b7..0afcdd54ef4 100644 --- a/tests/activity_api_update_test.go +++ b/tests/activity_api_update_test.go @@ -82,7 +82,7 @@ func (s *ActivityAPIUpdateClientTestSuite) TestActivityUpdateApi_ChangeRetryInte return "", activityErr } - env.WaitForChannel(ctx, activityUpdated) + env.WaitForChannel(activityUpdated) return "done!", nil } @@ -327,7 +327,7 @@ func (s *ActivityAPIUpdateClientTestSuite) TestActivityUpdateApi_ResetDefaultOpt return "", activityErr } - env.WaitForChannel(ctx, activityUpdated) + env.WaitForChannel(activityUpdated) return "done!", nil } diff --git a/tests/nexus_workflow_test.go b/tests/nexus_workflow_test.go index 7699750fd17..a17dacbfb9c 100644 --- a/tests/nexus_workflow_test.go +++ b/tests/nexus_workflow_test.go @@ -1408,8 +1408,8 @@ func (s *NexusWorkflowTestSuite) TestNexusOperationCancelBeforeStarted_Cancelati require.NotNil(t, desc.PendingNexusOperations[0].CancellationInfo) }, time.Second*10, time.Millisecond*100) - env.SendToChannel(ctx, canStartCh) - env.WaitForChannel(ctx, cancelSentCh) + env.SendToChannel(canStartCh) + env.WaitForChannel(cancelSentCh) // Terminate the workflow for good measure. err = env.SdkClient().TerminateWorkflow(ctx, run.GetID(), run.GetRunID(), "test") diff --git a/tests/testcore/functional_test_base.go b/tests/testcore/functional_test_base.go index e45eaacc8f6..7d6832dc500 100644 --- a/tests/testcore/functional_test_base.go +++ b/tests/testcore/functional_test_base.go @@ -693,6 +693,7 @@ func (s *FunctionalTestBase) RunTestWithMatchingBehavior(subtest func()) { } } +// Deprecated: use (*TestEnv).WaitForChannel instead. func (s *FunctionalTestBase) WaitForChannel(ctx context.Context, ch chan struct{}) { s.T().Helper() select { @@ -702,6 +703,7 @@ func (s *FunctionalTestBase) WaitForChannel(ctx context.Context, ch chan struct{ } } +// Deprecated: use (*TestEnv).SendToChannel instead. func (s *FunctionalTestBase) SendToChannel(ctx context.Context, ch chan struct{}) { s.T().Helper() select { diff --git a/tests/testcore/test_env.go b/tests/testcore/test_env.go index ae332faaa81..7570481e67a 100644 --- a/tests/testcore/test_env.go +++ b/tests/testcore/test_env.go @@ -310,6 +310,26 @@ func (e *TestEnv) Context() context.Context { return e.ctx } +// WaitForChannel waits for ch to receive using the TestEnv context. +func (e *TestEnv) WaitForChannel(ch <-chan struct{}) { + e.t.Helper() + select { + case <-ch: + case <-e.ctx.Done(): + e.FailNow("context timeout while waiting for channel") + } +} + +// SendToChannel sends to ch using the TestEnv context. +func (e *TestEnv) SendToChannel(ch chan<- struct{}) { + e.t.Helper() + select { + case ch <- struct{}{}: + case <-e.ctx.Done(): + e.FailNow("context timeout while sending to channel") + } +} + // SdkClient returns the SDK client. It is lazily initialized on the first call. func (e *TestEnv) SdkClient() sdkclient.Client { e.sdkClientOnce.Do(func() { From 55562689b7449db19fb883179d8e81db00e95911 Mon Sep 17 00:00:00 2001 From: Kannan Rajah Date: Mon, 11 May 2026 10:26:30 -0700 Subject: [PATCH 02/73] Dispatch cancel command to worker for standalone activities When a standalone activity's cancellation is requested or it is terminated while running on a worker, proactively dispatch a cancel command via the Nexus worker commands control queue. This avoids relying on the worker to discover cancellation only through heartbeat responses. Changes: - Add worker_control_task_queue field to ActivityAttemptState proto - Store control queue from poll request in TransitionStarted - Add CancelCommandDispatchTask side-effect task - Schedule dispatch task on cancel request and terminate - Dispatch cancel command via Nexus to matching service Co-Authored-By: Claude Opus 4.6 --- chasm/lib/activity/activity.go | 50 +++++ chasm/lib/activity/activity_tasks.go | 181 ++++++++++++++++++ chasm/lib/activity/fx.go | 1 + .../gen/activitypb/v1/activity_state.pb.go | 20 +- .../gen/activitypb/v1/tasks.go-helpers.pb.go | 37 ++++ .../activity/gen/activitypb/v1/tasks.pb.go | 46 ++++- chasm/lib/activity/library.go | 33 ++-- .../activity/proto/v1/activity_state.proto | 4 + chasm/lib/activity/proto/v1/tasks.proto | 4 + chasm/lib/activity/statemachine.go | 1 + 10 files changed, 357 insertions(+), 20 deletions(-) diff --git a/chasm/lib/activity/activity.go b/chasm/lib/activity/activity.go index df45a9ac490..afb3a676946 100644 --- a/chasm/lib/activity/activity.go +++ b/chasm/lib/activity/activity.go @@ -201,6 +201,28 @@ func (a *Activity) createAddActivityTaskRequest(ctx chasm.Context, namespaceID s }, nil } +// buildCancelCommandTaskToken builds the serialized task token for a cancel command. +// This token matches what the worker received when the activity was dispatched. +func (a *Activity) buildCancelCommandTaskToken(ctx chasm.Context, activityRef chasm.ComponentRef) ([]byte, error) { + componentRefBytes, err := ctx.Ref(a) + if err != nil { + return nil, err + } + + attempt := a.LastAttempt.Get(ctx) + key := ctx.ExecutionKey() + + token := &tokenspb.Task{ + NamespaceId: key.NamespaceID, + ActivityId: key.BusinessID, + ActivityType: a.GetActivityType().GetName(), + Attempt: attempt.GetCount(), + ComponentRef: componentRefBytes, + } + + return token.Marshal() +} + // HandleStarted updates the activity on recording activity task started and populates the response. func (a *Activity) HandleStarted(ctx chasm.MutableContext, request *historyservice.RecordActivityTaskStartedRequest) ( *historyservice.RecordActivityTaskStartedResponse, error, @@ -505,6 +527,13 @@ func (a *Activity) Terminate( return chasm.TerminateComponentResponse{}, nil } + // If the activity is running on a worker, proactively notify the worker via Nexus. + // Must be done before the transition since it checks current status. + if a.GetStatus() == activitypb.ACTIVITY_EXECUTION_STATUS_STARTED || + a.GetStatus() == activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED { + a.addCancelCommandDispatchTask(ctx) + } + metricsHandler, err := a.enrichMetricsHandler(ctx, metrics.ActivityTerminatedScope) if err != nil { return chasm.TerminateComponentResponse{}, err @@ -527,6 +556,23 @@ func (a *Activity) getOrCreateLastHeartbeat(ctx chasm.MutableContext) *activityp return heartbeat } +// addCancelCommandDispatchTask schedules a side-effect task to dispatch a cancel command to the +// worker via the Nexus worker commands control queue. No-op if the worker doesn't support worker +// commands (i.e., has no control queue). +func (a *Activity) addCancelCommandDispatchTask(ctx chasm.MutableContext) { + controlQueue := a.LastAttempt.Get(ctx).GetWorkerControlTaskQueue() + if controlQueue == "" { + return + } + ctx.AddTask( + a, + chasm.TaskAttributes{ + Destination: controlQueue, + }, + &activitypb.CancelCommandDispatchTask{}, + ) +} + func (a *Activity) handleCancellationRequested(ctx chasm.MutableContext, request *activitypb.RequestCancelActivityExecutionRequest) ( *activitypb.RequestCancelActivityExecutionResponse, error, ) { @@ -551,6 +597,10 @@ func (a *Activity) handleCancellationRequested(ctx chasm.MutableContext, request return nil, err } + if !isCancelImmediately { + a.addCancelCommandDispatchTask(ctx) + } + if isCancelImmediately { details := &commonpb.Payloads{ Payloads: []*commonpb.Payload{ diff --git a/chasm/lib/activity/activity_tasks.go b/chasm/lib/activity/activity_tasks.go index e22b2f586a6..1339a2ab930 100644 --- a/chasm/lib/activity/activity_tasks.go +++ b/chasm/lib/activity/activity_tasks.go @@ -2,14 +2,30 @@ package activity import ( "context" + "errors" + "fmt" + "time" + "github.com/nexus-rpc/sdk-go/nexus" + commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" + nexuspb "go.temporal.io/api/nexus/v1" + workerservicepb "go.temporal.io/api/nexusservices/workerservice/v1" + taskqueuepb "go.temporal.io/api/taskqueue/v1" + workerpb "go.temporal.io/api/worker/v1" + "go.temporal.io/sdk/temporal" + "go.temporal.io/server/api/matchingservice/v1" "go.temporal.io/server/chasm" "go.temporal.io/server/chasm/lib/activity/gen/activitypb/v1" + "go.temporal.io/server/common/debug" + "go.temporal.io/server/common/log" + "go.temporal.io/server/common/log/tag" "go.temporal.io/server/common/metrics" "go.temporal.io/server/common/resource" "go.temporal.io/server/common/util" + "go.temporal.io/server/service/history/configs" "go.uber.org/fx" + "google.golang.org/protobuf/proto" ) type activityDispatchTaskHandlerOptions struct { @@ -277,3 +293,168 @@ func (h *heartbeatTimeoutTaskHandler) Execute( fromStatus: activity.GetStatus(), }) } + +// cancelCommandDispatchTaskHandler dispatches a cancel command to the worker via the Nexus +// worker commands control queue. This is a best-effort mechanism — the activity will eventually +// time out if the worker doesn't respond. +type cancelCommandDispatchTaskHandler struct { + chasm.SideEffectTaskHandlerBase[*activitypb.CancelCommandDispatchTask] + opts cancelCommandDispatchTaskHandlerOptions +} + +type cancelCommandDispatchTaskHandlerOptions struct { + fx.In + + MatchingClient resource.MatchingClient + Config *configs.Config + MetricsHandler metrics.Handler + Logger log.Logger +} + +func newCancelCommandDispatchTaskHandler(opts cancelCommandDispatchTaskHandlerOptions) *cancelCommandDispatchTaskHandler { + return &cancelCommandDispatchTaskHandler{opts: opts} +} + +func (h *cancelCommandDispatchTaskHandler) Validate( + _ chasm.Context, + activity *Activity, + _ chasm.TaskAttributes, + _ *activitypb.CancelCommandDispatchTask, +) (bool, error) { + // Valid if the activity is in a state where it has been requested to cancel or terminated + // (meaning it was running on a worker when the cancel/terminate was issued). + return activity.Status == activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED || + activity.Status == activitypb.ACTIVITY_EXECUTION_STATUS_TERMINATED, nil +} + +const ( + cancelCommandDispatchTimeout = time.Second * 10 * debug.TimeoutMultiplier + cancelCommandDispatchMaxAttempt = 3 + + workerCommandsServiceName = "temporal.api.nexusservices.workerservice.v1.WorkerService" + workerCommandsOperationName = "ExecuteCommands" +) + +func (h *cancelCommandDispatchTaskHandler) Execute( + ctx context.Context, + activityRef chasm.ComponentRef, + taskAttrs chasm.TaskAttributes, + _ *activitypb.CancelCommandDispatchTask, +) error { + if !h.opts.Config.EnableCancelActivityWorkerCommand() { + return nil + } + + // Read the activity to build the task token for the cancel command. + taskToken, err := chasm.ReadComponent( + ctx, + activityRef, + (*Activity).buildCancelCommandTaskToken, + activityRef, + ) + if err != nil { + return err + } + + command := &workerpb.WorkerCommand{ + Type: &workerpb.WorkerCommand_CancelActivity{ + CancelActivity: &workerpb.CancelActivityCommand{ + TaskToken: taskToken, + }, + }, + } + + return h.dispatchToWorker(ctx, activityRef.NamespaceID, taskAttrs.Destination, []*workerpb.WorkerCommand{command}) +} + +func (h *cancelCommandDispatchTaskHandler) dispatchToWorker( + ctx context.Context, + namespaceID string, + controlQueue string, + commands []*workerpb.WorkerCommand, +) error { + ctx, cancel := context.WithTimeout(ctx, cancelCommandDispatchTimeout) + defer cancel() + + request := &workerservicepb.ExecuteCommandsRequest{ + Commands: commands, + } + requestData, err := proto.Marshal(request) + if err != nil { + return fmt.Errorf("failed to encode worker commands request: %w", err) + } + requestPayload := &commonpb.Payload{ + Metadata: map[string][]byte{ + "encoding": []byte("binary/protobuf"), + }, + Data: requestData, + } + + nexusRequest := &nexuspb.Request{ + Header: map[string]string{}, + Variant: &nexuspb.Request_StartOperation{ + StartOperation: &nexuspb.StartOperationRequest{ + Service: workerCommandsServiceName, + Operation: workerCommandsOperationName, + Payload: requestPayload, + }, + }, + } + + resp, err := h.opts.MatchingClient.DispatchNexusTask(ctx, &matchingservice.DispatchNexusTaskRequest{ + NamespaceId: namespaceID, + TaskQueue: &taskqueuepb.TaskQueue{ + Name: controlQueue, + Kind: enumspb.TASK_QUEUE_KIND_NORMAL, + }, + Request: nexusRequest, + }) + if err != nil { + h.opts.Logger.Warn("Failed to dispatch cancel command", + tag.NewStringTag("control_queue", controlQueue), + tag.Error(err)) + metrics.WorkerCommandsSent.With(h.opts.MetricsHandler).Record(1, metrics.OutcomeTag("rpc_error")) + return err + } + + nexusErr := cancelCommandDispatchResponseToError(resp) + if nexusErr == nil { + metrics.WorkerCommandsSent.With(h.opts.MetricsHandler).Record(1, metrics.OutcomeTag("success")) + return nil + } + + // Non-retryable errors are dropped — the activity will eventually time out. + var handlerErr *nexus.HandlerError + if errors.As(nexusErr, &handlerErr) && !handlerErr.Retryable() { + h.opts.Logger.Error("Cancel command non-retryable error", + tag.NewStringTag("control_queue", controlQueue), + tag.Error(nexusErr)) + metrics.WorkerCommandsSent.With(h.opts.MetricsHandler).Record(1, metrics.OutcomeTag("non_retryable_error")) + return nil + } + + metrics.WorkerCommandsSent.With(h.opts.MetricsHandler).Record(1, metrics.OutcomeTag("transport_error")) + return nexusErr +} + +// cancelCommandDispatchResponseToError converts a DispatchNexusTaskResponse into a Go error. +func cancelCommandDispatchResponseToError(resp *matchingservice.DispatchNexusTaskResponse) error { + switch t := resp.GetOutcome().(type) { + case *matchingservice.DispatchNexusTaskResponse_Failure: + return temporal.GetDefaultFailureConverter().FailureToError(t.Failure) + case *matchingservice.DispatchNexusTaskResponse_RequestTimeout: + return nexus.NewHandlerErrorf(nexus.HandlerErrorTypeUpstreamTimeout, "upstream timeout") + case *matchingservice.DispatchNexusTaskResponse_Response: + startResp := t.Response.GetStartOperation() + switch startResp.GetVariant().(type) { + case *nexuspb.StartOperationResponse_SyncSuccess, *nexuspb.StartOperationResponse_AsyncSuccess: + return nil + case *nexuspb.StartOperationResponse_Failure: + return temporal.GetDefaultFailureConverter().FailureToError(startResp.GetFailure()) + default: + return nexus.NewHandlerErrorf(nexus.HandlerErrorTypeInternal, "unknown start operation response") + } + default: + return nexus.NewHandlerErrorf(nexus.HandlerErrorTypeInternal, "empty or unknown dispatch outcome") + } +} diff --git a/chasm/lib/activity/fx.go b/chasm/lib/activity/fx.go index 905042382c2..0639862b381 100644 --- a/chasm/lib/activity/fx.go +++ b/chasm/lib/activity/fx.go @@ -12,6 +12,7 @@ var HistoryModule = fx.Module( fx.Provide( ConfigProvider, newActivityDispatchTaskHandler, + newCancelCommandDispatchTaskHandler, newScheduleToStartTimeoutTaskHandler, newScheduleToCloseTimeoutTaskHandler, newStartToCloseTimeoutTaskHandler, diff --git a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go index 3e95ee84f59..90034d167b6 100644 --- a/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/activity_state.pb.go @@ -456,8 +456,11 @@ type ActivityAttemptState struct { // The request ID that came from matching's RecordActivityTaskStarted API call. Used to make this API idempotent in // case of implicit retries. StartRequestId string `protobuf:"bytes,9,opt,name=start_request_id,json=startRequestId,proto3" json:"start_request_id,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + // The worker's control task queue for sending commands (e.g. cancel) via Nexus. + // Set when the worker reports it during poll. Empty if the worker doesn't support worker commands. + WorkerControlTaskQueue string `protobuf:"bytes,10,opt,name=worker_control_task_queue,json=workerControlTaskQueue,proto3" json:"worker_control_task_queue,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *ActivityAttemptState) Reset() { @@ -553,6 +556,13 @@ func (x *ActivityAttemptState) GetStartRequestId() string { return "" } +func (x *ActivityAttemptState) GetWorkerControlTaskQueue() string { + if x != nil { + return x.WorkerControlTaskQueue + } + return "" +} + type ActivityHeartbeatState struct { state protoimpl.MessageState `protogen:"open.v1"` // Details provided in the last recorded activity heartbeat. @@ -934,7 +944,7 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD "\x06reason\x18\x04 \x01(\tR\x06reason\"7\n" + "\x16ActivityTerminateState\x12\x1d\n" + "\n" + - "request_id\x18\x01 \x01(\tR\trequestId\"\xe8\x05\n" + + "request_id\x18\x01 \x01(\tR\trequestId\"\xa3\x06\n" + "\x14ActivityAttemptState\x12\x14\n" + "\x05count\x18\x01 \x01(\x05R\x05count\x12O\n" + "\x16current_retry_interval\x18\x02 \x01(\v2\x19.google.protobuf.DurationR\x14currentRetryInterval\x12=\n" + @@ -944,7 +954,9 @@ const file_temporal_server_chasm_lib_activity_proto_v1_activity_state_proto_rawD "\x05stamp\x18\x06 \x01(\x05R\x05stamp\x120\n" + "\x14last_worker_identity\x18\a \x01(\tR\x12lastWorkerIdentity\x12k\n" + "\x17last_deployment_version\x18\b \x01(\v23.temporal.api.deployment.v1.WorkerDeploymentVersionR\x15lastDeploymentVersion\x12(\n" + - "\x10start_request_id\x18\t \x01(\tR\x0estartRequestId\x1a\x80\x01\n" + + "\x10start_request_id\x18\t \x01(\tR\x0estartRequestId\x129\n" + + "\x19worker_control_task_queue\x18\n" + + " \x01(\tR\x16workerControlTaskQueue\x1a\x80\x01\n" + "\x12LastFailureDetails\x12.\n" + "\x04time\x18\x01 \x01(\v2\x1a.google.protobuf.TimestampR\x04time\x12:\n" + "\afailure\x18\x02 \x01(\v2 .temporal.api.failure.v1.FailureR\afailure\"\xc9\x01\n" + diff --git a/chasm/lib/activity/gen/activitypb/v1/tasks.go-helpers.pb.go b/chasm/lib/activity/gen/activitypb/v1/tasks.go-helpers.pb.go index d7628a6e9e6..a4173d9659f 100644 --- a/chasm/lib/activity/gen/activitypb/v1/tasks.go-helpers.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/tasks.go-helpers.pb.go @@ -189,3 +189,40 @@ func (this *HeartbeatTimeoutTask) Equal(that interface{}) bool { return proto.Equal(this, that1) } + +// Marshal an object of type CancelCommandDispatchTask to the protobuf v3 wire format +func (val *CancelCommandDispatchTask) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type CancelCommandDispatchTask from the protobuf v3 wire format +func (val *CancelCommandDispatchTask) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *CancelCommandDispatchTask) Size() int { + return proto.Size(val) +} + +// Equal returns whether two CancelCommandDispatchTask values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *CancelCommandDispatchTask) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *CancelCommandDispatchTask + switch t := that.(type) { + case *CancelCommandDispatchTask: + that1 = t + case CancelCommandDispatchTask: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} diff --git a/chasm/lib/activity/gen/activitypb/v1/tasks.pb.go b/chasm/lib/activity/gen/activitypb/v1/tasks.pb.go index 796574e7db2..23fc96a8db5 100644 --- a/chasm/lib/activity/gen/activitypb/v1/tasks.pb.go +++ b/chasm/lib/activity/gen/activitypb/v1/tasks.pb.go @@ -239,6 +239,44 @@ func (x *HeartbeatTimeoutTask) GetStamp() int32 { return 0 } +// CancelCommandDispatchTask is a side-effect task that dispatches a cancel command to the worker +// via the Nexus worker commands control queue. +type CancelCommandDispatchTask struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *CancelCommandDispatchTask) Reset() { + *x = CancelCommandDispatchTask{} + mi := &file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *CancelCommandDispatchTask) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CancelCommandDispatchTask) ProtoMessage() {} + +func (x *CancelCommandDispatchTask) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_msgTypes[5] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CancelCommandDispatchTask.ProtoReflect.Descriptor instead. +func (*CancelCommandDispatchTask) Descriptor() ([]byte, []int) { + return file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_rawDescGZIP(), []int{5} +} + var File_temporal_server_chasm_lib_activity_proto_v1_tasks_proto protoreflect.FileDescriptor const file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_rawDesc = "" + @@ -252,7 +290,8 @@ const file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_rawDesc = "" "\x17StartToCloseTimeoutTask\x12\x14\n" + "\x05stamp\x18\x01 \x01(\x05R\x05stamp\",\n" + "\x14HeartbeatTimeoutTask\x12\x14\n" + - "\x05stamp\x18\x01 \x01(\x05R\x05stampBDZBgo.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypbb\x06proto3" + "\x05stamp\x18\x01 \x01(\x05R\x05stamp\"\x1b\n" + + "\x19CancelCommandDispatchTaskBDZBgo.temporal.io/server/chasm/lib/activity/gen/activitypb;activitypbb\x06proto3" var ( file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_rawDescOnce sync.Once @@ -266,13 +305,14 @@ func file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_rawDescGZIP() return file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_rawDescData } -var file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_msgTypes = make([]protoimpl.MessageInfo, 5) +var file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_msgTypes = make([]protoimpl.MessageInfo, 6) var file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_goTypes = []any{ (*ActivityDispatchTask)(nil), // 0: temporal.server.chasm.lib.activity.proto.v1.ActivityDispatchTask (*ScheduleToStartTimeoutTask)(nil), // 1: temporal.server.chasm.lib.activity.proto.v1.ScheduleToStartTimeoutTask (*ScheduleToCloseTimeoutTask)(nil), // 2: temporal.server.chasm.lib.activity.proto.v1.ScheduleToCloseTimeoutTask (*StartToCloseTimeoutTask)(nil), // 3: temporal.server.chasm.lib.activity.proto.v1.StartToCloseTimeoutTask (*HeartbeatTimeoutTask)(nil), // 4: temporal.server.chasm.lib.activity.proto.v1.HeartbeatTimeoutTask + (*CancelCommandDispatchTask)(nil), // 5: temporal.server.chasm.lib.activity.proto.v1.CancelCommandDispatchTask } var file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_depIdxs = []int32{ 0, // [0:0] is the sub-list for method output_type @@ -293,7 +333,7 @@ func file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_rawDesc), len(file_temporal_server_chasm_lib_activity_proto_v1_tasks_proto_rawDesc)), NumEnums: 0, - NumMessages: 5, + NumMessages: 6, NumExtensions: 0, NumServices: 0, }, diff --git a/chasm/lib/activity/library.go b/chasm/lib/activity/library.go index 83e3d9067af..cb128c20111 100644 --- a/chasm/lib/activity/library.go +++ b/chasm/lib/activity/library.go @@ -77,17 +77,19 @@ func (l *componentOnlyLibrary) Components() []*chasm.RegistrableComponent { type library struct { componentOnlyLibrary - handler *handler - activityDispatchTaskHandler *activityDispatchTaskHandler - scheduleToStartTimeoutTaskHandler *scheduleToStartTimeoutTaskHandler - scheduleToCloseTimeoutTaskHandler *scheduleToCloseTimeoutTaskHandler - startToCloseTimeoutTaskHandler *startToCloseTimeoutTaskHandler - heartbeatTimeoutTaskHandler *heartbeatTimeoutTaskHandler + handler *handler + activityDispatchTaskHandler *activityDispatchTaskHandler + cancelCommandDispatchTaskHandler *cancelCommandDispatchTaskHandler + scheduleToStartTimeoutTaskHandler *scheduleToStartTimeoutTaskHandler + scheduleToCloseTimeoutTaskHandler *scheduleToCloseTimeoutTaskHandler + startToCloseTimeoutTaskHandler *startToCloseTimeoutTaskHandler + heartbeatTimeoutTaskHandler *heartbeatTimeoutTaskHandler } func newLibrary( handler *handler, activityDispatchTaskHandler *activityDispatchTaskHandler, + cancelCommandDispatchTaskHandler *cancelCommandDispatchTaskHandler, scheduleToStartTimeoutTaskHandler *scheduleToStartTimeoutTaskHandler, scheduleToCloseTimeoutTaskHandler *scheduleToCloseTimeoutTaskHandler, startToCloseTimeoutTaskHandler *startToCloseTimeoutTaskHandler, @@ -96,13 +98,14 @@ func newLibrary( namespaceRegistry namespace.Registry, ) *library { return &library{ - componentOnlyLibrary: *newComponentOnlyLibrary(config, namespaceRegistry), - handler: handler, - activityDispatchTaskHandler: activityDispatchTaskHandler, - scheduleToStartTimeoutTaskHandler: scheduleToStartTimeoutTaskHandler, - scheduleToCloseTimeoutTaskHandler: scheduleToCloseTimeoutTaskHandler, - startToCloseTimeoutTaskHandler: startToCloseTimeoutTaskHandler, - heartbeatTimeoutTaskHandler: heartbeatTimeoutTaskHandler, + componentOnlyLibrary: *newComponentOnlyLibrary(config, namespaceRegistry), + handler: handler, + activityDispatchTaskHandler: activityDispatchTaskHandler, + cancelCommandDispatchTaskHandler: cancelCommandDispatchTaskHandler, + scheduleToStartTimeoutTaskHandler: scheduleToStartTimeoutTaskHandler, + scheduleToCloseTimeoutTaskHandler: scheduleToCloseTimeoutTaskHandler, + startToCloseTimeoutTaskHandler: startToCloseTimeoutTaskHandler, + heartbeatTimeoutTaskHandler: heartbeatTimeoutTaskHandler, } } @@ -132,5 +135,9 @@ func (l *library) Tasks() []*chasm.RegistrableTask { "heartbeatTimer", l.heartbeatTimeoutTaskHandler, ), + chasm.NewRegistrableSideEffectTask( + "cancelCommandDispatch", + l.cancelCommandDispatchTaskHandler, + ), } } diff --git a/chasm/lib/activity/proto/v1/activity_state.proto b/chasm/lib/activity/proto/v1/activity_state.proto index 931afb0b881..7519ff46be0 100644 --- a/chasm/lib/activity/proto/v1/activity_state.proto +++ b/chasm/lib/activity/proto/v1/activity_state.proto @@ -155,6 +155,10 @@ message ActivityAttemptState { // The request ID that came from matching's RecordActivityTaskStarted API call. Used to make this API idempotent in // case of implicit retries. string start_request_id = 9; + + // The worker's control task queue for sending commands (e.g. cancel) via Nexus. + // Set when the worker reports it during poll. Empty if the worker doesn't support worker commands. + string worker_control_task_queue = 10; } message ActivityHeartbeatState { diff --git a/chasm/lib/activity/proto/v1/tasks.proto b/chasm/lib/activity/proto/v1/tasks.proto index 9a1996e3dd2..70dd3ea992a 100644 --- a/chasm/lib/activity/proto/v1/tasks.proto +++ b/chasm/lib/activity/proto/v1/tasks.proto @@ -26,3 +26,7 @@ message HeartbeatTimeoutTask { // The current stamp for this activity execution. Used for task validation. See also [ActivityAttemptState]. int32 stamp = 1; } + +// CancelCommandDispatchTask is a side-effect task that dispatches a cancel command to the worker +// via the Nexus worker commands control queue. +message CancelCommandDispatchTask {} diff --git a/chasm/lib/activity/statemachine.go b/chasm/lib/activity/statemachine.go index b594e56a6d1..5cd7e9c8a86 100644 --- a/chasm/lib/activity/statemachine.go +++ b/chasm/lib/activity/statemachine.go @@ -146,6 +146,7 @@ var TransitionStarted = chasm.NewTransition( attempt.StartedTime = timestamppb.New(ctx.Now(a)) attempt.StartRequestId = request.GetRequestId() attempt.LastWorkerIdentity = request.GetPollRequest().GetIdentity() + attempt.WorkerControlTaskQueue = request.GetPollRequest().GetWorkerControlTaskQueue() if versionDirective := request.GetVersionDirective().GetDeploymentVersion(); versionDirective != nil { attempt.LastDeploymentVersion = &deploymentpb.WorkerDeploymentVersion{ BuildId: versionDirective.GetBuildId(), From cd991a370d59a188a06cb16d62b542bf07c5ef06 Mon Sep 17 00:00:00 2001 From: Kannan Rajah Date: Mon, 11 May 2026 10:31:32 -0700 Subject: [PATCH 03/73] Add unit tests for standalone activity cancel command dispatch Co-Authored-By: Claude Opus 4.6 --- chasm/lib/activity/activity_test.go | 212 ++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) diff --git a/chasm/lib/activity/activity_test.go b/chasm/lib/activity/activity_test.go index b614c3eba7a..f7797afe6e2 100644 --- a/chasm/lib/activity/activity_test.go +++ b/chasm/lib/activity/activity_test.go @@ -8,6 +8,7 @@ import ( "github.com/stretchr/testify/require" commonpb "go.temporal.io/api/common/v1" taskqueuepb "go.temporal.io/api/taskqueue/v1" + "go.temporal.io/api/workflowservice/v1" "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/chasm" "go.temporal.io/server/chasm/lib/activity/gen/activitypb/v1" @@ -305,3 +306,214 @@ func TestContextMetadata(t *testing.T) { require.Nil(t, md) }) } + +func TestTransitionStartedStoresWorkerControlTaskQueue(t *testing.T) { + testTime := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) + ctx := &chasm.MockMutableContext{ + MockContext: chasm.MockContext{ + HandleNow: func(chasm.Component) time.Time { return testTime }, + HandleExecutionKey: func() chasm.ExecutionKey { + return chasm.ExecutionKey{BusinessID: "test-activity-id", RunID: "test-run-id"} + }, + }, + } + + attemptState := &activitypb.ActivityAttemptState{Count: 1, Stamp: 1} + a := &Activity{ + ActivityState: &activitypb.ActivityState{ + ActivityType: &commonpb.ActivityType{Name: "test-type"}, + Status: activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, + TaskQueue: &taskqueuepb.TaskQueue{Name: "test-queue"}, + StartToCloseTimeout: durationpb.New(3 * time.Minute), + }, + LastAttempt: chasm.NewDataField(ctx, attemptState), + RequestData: chasm.NewDataField(ctx, &activitypb.ActivityRequestData{}), + Outcome: chasm.NewDataField(ctx, &activitypb.ActivityOutcome{}), + } + + request := &historyservice.RecordActivityTaskStartedRequest{ + Stamp: 1, + RequestId: "req-1", + PollRequest: &workflowservice.PollActivityTaskQueueRequest{ + WorkerControlTaskQueue: "test-control-queue", + }, + } + + _, err := a.HandleStarted(ctx, request) + require.NoError(t, err) + require.Equal(t, "test-control-queue", a.LastAttempt.Get(ctx).GetWorkerControlTaskQueue()) +} + +func TestCancelRequestDispatchesCancelCommand(t *testing.T) { + testTime := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) + + testCases := []struct { + name string + activityStatus activitypb.ActivityExecutionStatus + controlQueue string + expectDispatchTask bool + }{ + { + name: "started with control queue dispatches cancel task", + activityStatus: activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, + controlQueue: "test-control-queue", + expectDispatchTask: true, + }, + { + name: "started without control queue does not dispatch", + activityStatus: activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, + controlQueue: "", + expectDispatchTask: false, + }, + { + name: "scheduled cancels immediately, no dispatch", + activityStatus: activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, + controlQueue: "", + expectDispatchTask: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + nsRegistry := namespace.NewMockRegistry(ctrl) + nsRegistry.EXPECT().GetNamespaceName(gomock.Any()).Return(namespace.Name("test-ns"), nil).AnyTimes() + + ctx := &chasm.MockMutableContext{ + MockContext: chasm.MockContext{ + HandleNow: func(chasm.Component) time.Time { return testTime }, + GoCtx: context.WithValue(context.Background(), ctxKeyActivityContext, &activityContext{ + config: &Config{ + BreakdownMetricsByTaskQueue: dynamicconfig.GetBoolPropertyFnFilteredByTaskQueue(true), + }, + namespaceRegistry: nsRegistry, + }), + }, + } + + a := &Activity{ + ActivityState: &activitypb.ActivityState{ + ActivityType: &commonpb.ActivityType{Name: "test-type"}, + Status: tc.activityStatus, + TaskQueue: &taskqueuepb.TaskQueue{Name: "test-queue"}, + ScheduleToCloseTimeout: durationpb.New(10 * time.Minute), + StartToCloseTimeout: durationpb.New(3 * time.Minute), + }, + LastAttempt: chasm.NewDataField(ctx, &activitypb.ActivityAttemptState{ + Count: 1, + Stamp: 1, + WorkerControlTaskQueue: tc.controlQueue, + }), + Outcome: chasm.NewDataField(ctx, &activitypb.ActivityOutcome{}), + } + + req := &activitypb.RequestCancelActivityExecutionRequest{ + FrontendRequest: &workflowservice.RequestCancelActivityExecutionRequest{ + RequestId: "cancel-req-1", + Identity: "test-identity", + }, + } + _, err := a.handleCancellationRequested(ctx, req) + require.NoError(t, err) + + hasCancelTask := false + for _, task := range ctx.Tasks { + if _, ok := task.Payload.(*activitypb.CancelCommandDispatchTask); ok { + hasCancelTask = true + require.Equal(t, tc.controlQueue, task.Attributes.Destination) + } + } + require.Equal(t, tc.expectDispatchTask, hasCancelTask, + "expected dispatch task: %v, but found: %v", tc.expectDispatchTask, hasCancelTask) + }) + } +} + +func TestTerminateDispatchesCancelCommand(t *testing.T) { + testTime := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) + + testCases := []struct { + name string + activityStatus activitypb.ActivityExecutionStatus + controlQueue string + expectDispatchTask bool + }{ + { + name: "started with control queue dispatches cancel task", + activityStatus: activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, + controlQueue: "test-control-queue", + expectDispatchTask: true, + }, + { + name: "cancel_requested with control queue dispatches cancel task", + activityStatus: activitypb.ACTIVITY_EXECUTION_STATUS_CANCEL_REQUESTED, + controlQueue: "test-control-queue", + expectDispatchTask: true, + }, + { + name: "started without control queue does not dispatch", + activityStatus: activitypb.ACTIVITY_EXECUTION_STATUS_STARTED, + controlQueue: "", + expectDispatchTask: false, + }, + { + name: "scheduled does not dispatch", + activityStatus: activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, + controlQueue: "", + expectDispatchTask: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + nsRegistry := namespace.NewMockRegistry(ctrl) + nsRegistry.EXPECT().GetNamespaceName(gomock.Any()).Return(namespace.Name("test-ns"), nil).AnyTimes() + + ctx := &chasm.MockMutableContext{ + MockContext: chasm.MockContext{ + HandleNow: func(chasm.Component) time.Time { return testTime }, + GoCtx: context.WithValue(context.Background(), ctxKeyActivityContext, &activityContext{ + config: &Config{ + BreakdownMetricsByTaskQueue: dynamicconfig.GetBoolPropertyFnFilteredByTaskQueue(true), + }, + namespaceRegistry: nsRegistry, + }), + }, + } + + a := &Activity{ + ActivityState: &activitypb.ActivityState{ + ActivityType: &commonpb.ActivityType{Name: "test-type"}, + Status: tc.activityStatus, + TaskQueue: &taskqueuepb.TaskQueue{Name: "test-queue"}, + ScheduleToCloseTimeout: durationpb.New(10 * time.Minute), + StartToCloseTimeout: durationpb.New(3 * time.Minute), + }, + LastAttempt: chasm.NewDataField(ctx, &activitypb.ActivityAttemptState{ + Count: 1, + Stamp: 1, + WorkerControlTaskQueue: tc.controlQueue, + }), + Outcome: chasm.NewDataField(ctx, &activitypb.ActivityOutcome{}), + } + + _, err := a.Terminate(ctx, chasm.TerminateComponentRequest{ + Reason: "test terminate", + }) + require.NoError(t, err) + + hasCancelTask := false + for _, task := range ctx.Tasks { + if _, ok := task.Payload.(*activitypb.CancelCommandDispatchTask); ok { + hasCancelTask = true + require.Equal(t, tc.controlQueue, task.Attributes.Destination) + } + } + require.Equal(t, tc.expectDispatchTask, hasCancelTask, + "expected dispatch task: %v, but found: %v", tc.expectDispatchTask, hasCancelTask) + }) + } +} From ae47ada3a65f2007a7333ba36863f99f1d77f61b Mon Sep 17 00:00:00 2001 From: Kannan Rajah Date: Mon, 11 May 2026 10:49:35 -0700 Subject: [PATCH 04/73] Use TASK_QUEUE_KIND_WORKER_COMMANDS for cancel command dispatch Co-Authored-By: Claude Opus 4.6 --- chasm/lib/activity/activity_tasks.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chasm/lib/activity/activity_tasks.go b/chasm/lib/activity/activity_tasks.go index 1339a2ab930..9c222346a0e 100644 --- a/chasm/lib/activity/activity_tasks.go +++ b/chasm/lib/activity/activity_tasks.go @@ -405,7 +405,7 @@ func (h *cancelCommandDispatchTaskHandler) dispatchToWorker( NamespaceId: namespaceID, TaskQueue: &taskqueuepb.TaskQueue{ Name: controlQueue, - Kind: enumspb.TASK_QUEUE_KIND_NORMAL, + Kind: enumspb.TASK_QUEUE_KIND_WORKER_COMMANDS, }, Request: nexusRequest, }) From 94a9a6d2213bb279cb5a6306a840569ccd536485 Mon Sep 17 00:00:00 2001 From: Kannan Rajah Date: Mon, 11 May 2026 11:05:15 -0700 Subject: [PATCH 05/73] Remove unused constant and add TODO for duplicated dispatch helper Co-Authored-By: Claude Opus 4.6 --- chasm/lib/activity/activity_tasks.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chasm/lib/activity/activity_tasks.go b/chasm/lib/activity/activity_tasks.go index 9c222346a0e..c6d5ababb0d 100644 --- a/chasm/lib/activity/activity_tasks.go +++ b/chasm/lib/activity/activity_tasks.go @@ -328,8 +328,7 @@ func (h *cancelCommandDispatchTaskHandler) Validate( } const ( - cancelCommandDispatchTimeout = time.Second * 10 * debug.TimeoutMultiplier - cancelCommandDispatchMaxAttempt = 3 + cancelCommandDispatchTimeout = time.Second * 10 * debug.TimeoutMultiplier workerCommandsServiceName = "temporal.api.nexusservices.workerservice.v1.WorkerService" workerCommandsOperationName = "ExecuteCommands" @@ -438,6 +437,7 @@ func (h *cancelCommandDispatchTaskHandler) dispatchToWorker( } // cancelCommandDispatchResponseToError converts a DispatchNexusTaskResponse into a Go error. +// TODO: consolidate with service/history.dispatchResponseToError into a shared package. func cancelCommandDispatchResponseToError(resp *matchingservice.DispatchNexusTaskResponse) error { switch t := resp.GetOutcome().(type) { case *matchingservice.DispatchNexusTaskResponse_Failure: From a1a2d84c8e37ea8d96e204989f2f17b9b7d31a28 Mon Sep 17 00:00:00 2001 From: Kannan Rajah Date: Mon, 11 May 2026 11:28:50 -0700 Subject: [PATCH 06/73] Add e2e tests for cancel command dispatch on standalone activities Tests both cancel-request and terminate paths: start activity with worker control queue, trigger cancellation/termination, verify cancel command arrives on the Nexus control queue with correct task token. Co-Authored-By: Claude Opus 4.6 --- tests/standalone_activity_test.go | 142 ++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index 49ce3fe4dc4..c36cdd319c2 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -15,6 +15,7 @@ import ( commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" failurepb "go.temporal.io/api/failure/v1" + workerservicepb "go.temporal.io/api/nexusservices/workerservice/v1" "go.temporal.io/api/operatorservice/v1" sdkpb "go.temporal.io/api/sdk/v1" "go.temporal.io/api/serviceerror" @@ -6328,3 +6329,144 @@ func (s *standaloneActivityTestSuite) TestCallbacks() { require.Equal(t, enumspb.ACTIVITY_EXECUTION_STATUS_TIMED_OUT, descResp.GetInfo().GetStatus()) }) } + +func (s *standaloneActivityTestSuite) TestDispatchCancelCommandToWorker() { + t := s.T() + ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) + defer cancel() + + s.OverrideDynamicConfig(dynamicconfig.EnableCancelActivityWorkerCommand, true) + + controlQueueName := s.tv.ControlQueueName(s.Namespace().String()) + + tokenSerializer := tasktoken.NewSerializer() + + // assertCancelTokenMatchesPoll verifies the cancel command's task token identifies the same + // activity as the poll response's token. The tokens won't be byte-identical because: + // 1. Matching builds poll tokens with additional fields (Clock, Version, etc.) + // 2. The ComponentRef version advances after state mutations (cancel/terminate) + // We compare the stable identity fields that the SDK uses to find the running activity. + assertCancelTokenMatchesPoll := func(t *testing.T, pollToken, cancelToken []byte) { + t.Helper() + pollTask, err := tokenSerializer.Deserialize(pollToken) + require.NoError(t, err) + cancelTask, err := tokenSerializer.Deserialize(cancelToken) + require.NoError(t, err) + require.Equal(t, pollTask.GetActivityId(), cancelTask.GetActivityId()) + require.Equal(t, pollTask.GetNamespaceId(), cancelTask.GetNamespaceId()) + require.Equal(t, pollTask.GetActivityType(), cancelTask.GetActivityType()) + require.Equal(t, pollTask.GetAttempt(), cancelTask.GetAttempt()) + require.NotEmpty(t, cancelTask.GetComponentRef(), "cancel token must have a ComponentRef") + } + + // pollNexusControlQueue polls the worker commands control queue for a cancel command and + // returns the decoded ExecuteCommandsRequest. Returns nil if no task is received. + pollNexusControlQueue := func() *workerservicepb.ExecuteCommandsRequest { + pollCtx, pollCancel := context.WithTimeout(ctx, 5*time.Second) + defer pollCancel() + resp, err := s.FrontendClient().PollNexusTaskQueue(pollCtx, &workflowservice.PollNexusTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: controlQueueName, Kind: enumspb.TASK_QUEUE_KIND_WORKER_COMMANDS}, + Identity: s.tv.WorkerIdentity(), + }) + if err != nil || resp == nil || resp.Request == nil { + return nil + } + startOp := resp.Request.GetStartOperation() + if startOp == nil { + return nil + } + var executeReq workerservicepb.ExecuteCommandsRequest + if err := payload.Decode(startOp.Payload, &executeReq); err != nil { + return nil + } + return &executeReq + } + + t.Run("CancelRequest", func(t *testing.T) { + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + // Poll with a worker control task queue so the activity stores it. + pollTaskResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{ + Name: taskQueue, + Kind: enumspb.TASK_QUEUE_KIND_NORMAL, + }, + Identity: s.tv.WorkerIdentity(), + WorkerInstanceKey: s.tv.WorkerInstanceKey(), + WorkerControlTaskQueue: controlQueueName, + }) + require.NoError(t, err) + require.NotEmpty(t, pollTaskResp.TaskToken) + + // Request cancellation — should dispatch cancel command to the control queue. + _, err = s.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Identity: "canceller", + RequestId: s.tv.RequestID(), + Reason: "test cancel", + }) + require.NoError(t, err) + + var executeReq *workerservicepb.ExecuteCommandsRequest + s.Eventually(func() bool { + executeReq = pollNexusControlQueue() + return executeReq != nil + }, 15*time.Second, 100*time.Millisecond, "cancel command not received on control queue") + + require.Len(t, executeReq.Commands, 1) + cancelCmd := executeReq.Commands[0].GetCancelActivity() + require.NotNil(t, cancelCmd, "expected CancelActivity command") + assertCancelTokenMatchesPoll(t, pollTaskResp.TaskToken, cancelCmd.TaskToken) + }) + + t.Run("Terminate", func(t *testing.T) { + activityID := testcore.RandomizeStr(t.Name()) + taskQueue := testcore.RandomizeStr(t.Name()) + + startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + runID := startResp.RunId + + // Poll with a worker control task queue. + pollTaskResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{ + Name: taskQueue, + Kind: enumspb.TASK_QUEUE_KIND_NORMAL, + }, + Identity: s.tv.WorkerIdentity(), + WorkerInstanceKey: s.tv.WorkerInstanceKey(), + WorkerControlTaskQueue: controlQueueName, + }) + require.NoError(t, err) + require.NotEmpty(t, pollTaskResp.TaskToken) + + // Terminate — should dispatch cancel command to the control queue. + _, err = s.FrontendClient().TerminateActivityExecution(ctx, &workflowservice.TerminateActivityExecutionRequest{ + Namespace: s.Namespace().String(), + ActivityId: activityID, + RunId: runID, + Reason: "test terminate", + Identity: "terminator", + }) + require.NoError(t, err) + + var executeReq *workerservicepb.ExecuteCommandsRequest + s.Eventually(func() bool { + executeReq = pollNexusControlQueue() + return executeReq != nil + }, 15*time.Second, 100*time.Millisecond, "cancel command not received on control queue after terminate") + + require.Len(t, executeReq.Commands, 1) + cancelCmd := executeReq.Commands[0].GetCancelActivity() + require.NotNil(t, cancelCmd, "expected CancelActivity command") + assertCancelTokenMatchesPoll(t, pollTaskResp.TaskToken, cancelCmd.TaskToken) + }) +} From d68c034c705d50c14a1f483f26d7c24e4e14fb1a Mon Sep 17 00:00:00 2001 From: Kannan Rajah Date: Mon, 11 May 2026 11:43:30 -0700 Subject: [PATCH 07/73] Use shared DispatchResponseToError from common/nexus Replace the duplicated cancelCommandDispatchResponseToError with the shared commonnexus.DispatchResponseToError now available from the merged kannan/move-dispatch-response-to-error branch. Co-Authored-By: Claude Opus 4.6 --- chasm/lib/activity/activity_tasks.go | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/chasm/lib/activity/activity_tasks.go b/chasm/lib/activity/activity_tasks.go index c6d5ababb0d..a1d59d9c66c 100644 --- a/chasm/lib/activity/activity_tasks.go +++ b/chasm/lib/activity/activity_tasks.go @@ -13,7 +13,6 @@ import ( workerservicepb "go.temporal.io/api/nexusservices/workerservice/v1" taskqueuepb "go.temporal.io/api/taskqueue/v1" workerpb "go.temporal.io/api/worker/v1" - "go.temporal.io/sdk/temporal" "go.temporal.io/server/api/matchingservice/v1" "go.temporal.io/server/chasm" "go.temporal.io/server/chasm/lib/activity/gen/activitypb/v1" @@ -21,6 +20,7 @@ import ( "go.temporal.io/server/common/log" "go.temporal.io/server/common/log/tag" "go.temporal.io/server/common/metrics" + commonnexus "go.temporal.io/server/common/nexus" "go.temporal.io/server/common/resource" "go.temporal.io/server/common/util" "go.temporal.io/server/service/history/configs" @@ -416,7 +416,7 @@ func (h *cancelCommandDispatchTaskHandler) dispatchToWorker( return err } - nexusErr := cancelCommandDispatchResponseToError(resp) + nexusErr := commonnexus.DispatchResponseToError(resp) if nexusErr == nil { metrics.WorkerCommandsSent.With(h.opts.MetricsHandler).Record(1, metrics.OutcomeTag("success")) return nil @@ -435,26 +435,3 @@ func (h *cancelCommandDispatchTaskHandler) dispatchToWorker( metrics.WorkerCommandsSent.With(h.opts.MetricsHandler).Record(1, metrics.OutcomeTag("transport_error")) return nexusErr } - -// cancelCommandDispatchResponseToError converts a DispatchNexusTaskResponse into a Go error. -// TODO: consolidate with service/history.dispatchResponseToError into a shared package. -func cancelCommandDispatchResponseToError(resp *matchingservice.DispatchNexusTaskResponse) error { - switch t := resp.GetOutcome().(type) { - case *matchingservice.DispatchNexusTaskResponse_Failure: - return temporal.GetDefaultFailureConverter().FailureToError(t.Failure) - case *matchingservice.DispatchNexusTaskResponse_RequestTimeout: - return nexus.NewHandlerErrorf(nexus.HandlerErrorTypeUpstreamTimeout, "upstream timeout") - case *matchingservice.DispatchNexusTaskResponse_Response: - startResp := t.Response.GetStartOperation() - switch startResp.GetVariant().(type) { - case *nexuspb.StartOperationResponse_SyncSuccess, *nexuspb.StartOperationResponse_AsyncSuccess: - return nil - case *nexuspb.StartOperationResponse_Failure: - return temporal.GetDefaultFailureConverter().FailureToError(startResp.GetFailure()) - default: - return nexus.NewHandlerErrorf(nexus.HandlerErrorTypeInternal, "unknown start operation response") - } - default: - return nexus.NewHandlerErrorf(nexus.HandlerErrorTypeInternal, "empty or unknown dispatch outcome") - } -} From 808b69af6f31c1601a1f6634dd787430fd94a432 Mon Sep 17 00:00:00 2001 From: Kannan Date: Mon, 11 May 2026 13:27:21 -0700 Subject: [PATCH 08/73] Use worker_commands task queue kind when dispatching worker command (#10219) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What Change the `DispatchNexusTask` call in `workerCommandsTaskDispatcher` to use `TASK_QUEUE_KIND_WORKER_COMMANDS` instead of `TASK_QUEUE_KIND_NORMAL`. ## Why Worker commands queues use a dedicated partition type (`WorkerCommandsPartition`) with different properties than normal partitions. This new kind was introduced in https://github.com/temporalio/temporal/pull/9899. ## How did you test it? Unit test: Updated e2e test and added an assertion on the dispatched request's task queue kind. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 --- .../worker_commands_task_dispatcher.go | 2 +- .../worker_commands_task_dispatcher_test.go | 30 ++++++++++++------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/service/history/worker_commands_task_dispatcher.go b/service/history/worker_commands_task_dispatcher.go index 0d9510429c6..84b48fe3656 100644 --- a/service/history/worker_commands_task_dispatcher.go +++ b/service/history/worker_commands_task_dispatcher.go @@ -147,7 +147,7 @@ func (d *workerCommandsTaskDispatcher) dispatchToWorker( NamespaceId: task.NamespaceID, TaskQueue: &taskqueuepb.TaskQueue{ Name: task.Destination, - Kind: enumspb.TASK_QUEUE_KIND_NORMAL, + Kind: enumspb.TASK_QUEUE_KIND_WORKER_COMMANDS, }, Request: nexusRequest, }) diff --git a/service/history/worker_commands_task_dispatcher_test.go b/service/history/worker_commands_task_dispatcher_test.go index 6b71edd6522..a2391678afb 100644 --- a/service/history/worker_commands_task_dispatcher_test.go +++ b/service/history/worker_commands_task_dispatcher_test.go @@ -7,6 +7,7 @@ import ( "github.com/nexus-rpc/sdk-go/nexus" "github.com/stretchr/testify/require" + enumspb "go.temporal.io/api/enums/v1" nexuspb "go.temporal.io/api/nexus/v1" workerpb "go.temporal.io/api/worker/v1" "go.temporal.io/sdk/temporal" @@ -141,25 +142,34 @@ func TestExecute_DispatchSuccess(t *testing.T) { logger: log.NewNoopLogger(), } - mockClient.EXPECT().DispatchNexusTask(gomock.Any(), gomock.Any()).Return( - &matchingservice.DispatchNexusTaskResponse{ - Outcome: &matchingservice.DispatchNexusTaskResponse_Response{ - Response: &nexuspb.Response{ - Variant: &nexuspb.Response_StartOperation{ - StartOperation: &nexuspb.StartOperationResponse{ - Variant: &nexuspb.StartOperationResponse_SyncSuccess{ - SyncSuccess: &nexuspb.StartOperationResponse_Sync{}, + var capturedReq *matchingservice.DispatchNexusTaskRequest + mockClient.EXPECT().DispatchNexusTask(gomock.Any(), gomock.Any()).DoAndReturn( + func(_ context.Context, req *matchingservice.DispatchNexusTaskRequest, _ ...any) (*matchingservice.DispatchNexusTaskResponse, error) { + capturedReq = req + return &matchingservice.DispatchNexusTaskResponse{ + Outcome: &matchingservice.DispatchNexusTaskResponse_Response{ + Response: &nexuspb.Response{ + Variant: &nexuspb.Response_StartOperation{ + StartOperation: &nexuspb.StartOperationResponse{ + Variant: &nexuspb.StartOperationResponse_SyncSuccess{ + SyncSuccess: &nexuspb.StartOperationResponse_Sync{}, + }, }, }, }, }, - }, - }, nil) + }, nil + }) task := testWorkerCommandsTask() err := d.execute(context.Background(), task, 1 /* attempt */) require.NoError(t, err) + require.NotNil(t, capturedReq) + require.Equal(t, enumspb.TASK_QUEUE_KIND_WORKER_COMMANDS, capturedReq.TaskQueue.Kind, + "dispatch request must use TASK_QUEUE_KIND_WORKER_COMMANDS, not TASK_QUEUE_KIND_NORMAL") + require.Equal(t, task.Destination, capturedReq.TaskQueue.Name) + requireMetricValue(t, capture.Snapshot(), "success") } From c7ada46ec46512f095c3f83391ced4c4979b0ed2 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Mon, 11 May 2026 14:33:57 -0700 Subject: [PATCH 09/73] `testing/testcontext` package (#10220) ## What changed? Adds a new package `testing/testcontext` for creating and managing test `context.Context`s. No behavior change expected/intended. ## Why? tl;dr encapsulating the test context behavior and decouple from TestEnv Bigger picture: Right now the test context behavior is coupled to TestEnv; and legacy suites use `testcore.NewContext()`. That served us well so far, but it's suboptimal. A test context should be available outside of TestEnv and `tests/` package; ie unit and integration tests can also benefit it. ## How did you test it? - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [x] added new unit test(s) - [ ] added new functional test(s) --- common/testing/testcontext/context.go | 179 +++++++++++++++++++++ common/testing/testcontext/context_test.go | 126 +++++++++++++++ tests/testcore/context.go | 48 +----- tests/testcore/test_env.go | 12 +- 4 files changed, 314 insertions(+), 51 deletions(-) create mode 100644 common/testing/testcontext/context.go create mode 100644 common/testing/testcontext/context_test.go diff --git a/common/testing/testcontext/context.go b/common/testing/testcontext/context.go new file mode 100644 index 00000000000..4b881fd7b22 --- /dev/null +++ b/common/testing/testcontext/context.go @@ -0,0 +1,179 @@ +package testcontext + +import ( + "context" + "os" + "sync" + "testing" + "time" + + "go.temporal.io/server/common/debug" +) + +const defaultTimeout = 90 * time.Second + +type contextStore struct { + sync.Mutex + byTest map[*testing.T]*contextState +} + +// testContexts is process-global so repeated helpers in the same test share +// one context and one cleanup. +var testContexts = contextStore{ + byTest: make(map[*testing.T]*contextState), +} + +type config struct { + timeout time.Duration + timeoutSet bool + decorators []contextDecorator +} + +type contextDecorator struct { + key any + decorate func(context.Context) context.Context +} + +// New returns the test-scoped context for t. The context is canceled when the +// test ends or when the configured test timeout expires. +// +// The first call creates the per-test context and fixes its timeout. Later calls +// may add decorators, but an explicit different timeout fails instead of being +// silently ignored. +func New(t *testing.T, opts ...Option) context.Context { + t.Helper() + + cfg := config{timeout: effectiveTimeout(0)} + for _, opt := range opts { + opt(&cfg) + } + + st := getContextState(t, cfg.timeout) + st.configure(t, cfg) + return st.context() +} + +// Option configures the test-scoped context returned by [New]. +type Option func(*config) + +// WithTimeout sets a custom timeout for the test-scoped context. +func WithTimeout(timeout time.Duration) Option { + return func(cfg *config) { + if timeout <= 0 { + return + } + cfg.timeout = effectiveTimeout(timeout) + cfg.timeoutSet = true + } +} + +// WithContextDecorator applies decorator to the test-scoped context once for key. +// Reusing the same key is a no-op. +func WithContextDecorator[K comparable](key K, decorator func(context.Context) context.Context) Option { + return func(cfg *config) { + cfg.decorators = append(cfg.decorators, contextDecorator{ + key: key, + decorate: decorator, + }) + } +} + +type contextState struct { + mu sync.Mutex + ctx context.Context + cancel context.CancelFunc + timeout time.Duration + decorators map[any]struct{} +} + +func getContextState(t *testing.T, timeout time.Duration) *contextState { + t.Helper() + + testContexts.Lock() + defer testContexts.Unlock() + + if st, ok := testContexts.byTest[t]; ok { + return st + } + + ctx, cancel := context.WithTimeout(t.Context(), timeout) + st := &contextState{ + ctx: ctx, + cancel: cancel, + timeout: timeout, + decorators: make(map[any]struct{}), + } + testContexts.byTest[t] = st + + t.Cleanup(func() { + st.cancel() + testContexts.Lock() + delete(testContexts.byTest, t) + testContexts.Unlock() + if st.err() == context.DeadlineExceeded { + t.Errorf("Test exceeded timeout of %v", st.timeout) + } + }) + return st +} + +func (s *contextState) configure(t *testing.T, cfg config) { + t.Helper() + + s.mu.Lock() + defer s.mu.Unlock() + + if cfg.timeoutSet && cfg.timeout != s.timeout { + t.Fatalf("testcontext: test context already exists with timeout %v; cannot change it to %v", s.timeout, cfg.timeout) + } + + // Decorators may be registered by independent helpers, so apply each keyed + // decorator at most once while preserving call order. + for _, decorator := range cfg.decorators { + if decorator.key == nil { + t.Fatal("testcontext: context decorator key must not be nil") + } + if decorator.decorate == nil { + t.Fatal("testcontext: context decorator must not be nil") + } + if _, ok := s.decorators[decorator.key]; ok { + continue + } + s.ctx = decorator.decorate(s.ctx) + s.decorators[decorator.key] = struct{}{} + } +} + +func (s *contextState) context() context.Context { + s.mu.Lock() + defer s.mu.Unlock() + return s.ctx +} + +func (s *contextState) err() error { + s.mu.Lock() + defer s.mu.Unlock() + return s.ctx.Err() +} + +func effectiveTimeout(customTimeout time.Duration) (timeout time.Duration) { + defer func() { + // Build flag TEMPORAL_DEBUG applies a timeout multiplier to all test timeouts. + timeout *= debug.TimeoutMultiplier + }() + + // 1. Custom timeout (via WithTimeout option). + if customTimeout > 0 { + return customTimeout + } + + // 2. TEMPORAL_TEST_TIMEOUT environment variable. + if envTimeout := os.Getenv("TEMPORAL_TEST_TIMEOUT"); envTimeout != "" { + if dur, err := time.ParseDuration(envTimeout); err == nil && dur > 0 { + return dur + } + } + + // 3. Default 90 seconds. + return defaultTimeout +} diff --git a/common/testing/testcontext/context_test.go b/common/testing/testcontext/context_test.go new file mode 100644 index 00000000000..d14db50c2e5 --- /dev/null +++ b/common/testing/testcontext/context_test.go @@ -0,0 +1,126 @@ +package testcontext + +import ( + "context" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestWithTimeout(t *testing.T) { + t.Parallel() + + ctx := New(t, WithTimeout(time.Second)) + deadline, ok := ctx.Deadline() + require.True(t, ok) + require.WithinDuration(t, time.Now().Add(time.Second), deadline, 50*time.Millisecond) +} + +func TestContextDecorators(t *testing.T) { + t.Parallel() + + t.Run("applied once across calls", func(t *testing.T) { + t.Parallel() + + type key struct{} + + var calls atomic.Int32 + decorator := func(ctx context.Context) context.Context { + calls.Add(1) + return context.WithValue(ctx, key{}, "decorated") + } + + ctx := New(t, WithContextDecorator(key{}, decorator)) + require.Equal(t, "decorated", ctx.Value(key{})) + + ctx = New(t, WithContextDecorator(key{}, decorator)) + require.Equal(t, "decorated", ctx.Value(key{})) + require.Equal(t, int32(1), calls.Load(), "decorator should only be applied once") + }) + + t.Run("applied once in single call", func(t *testing.T) { + t.Parallel() + + type key struct{} + + var calls atomic.Int32 + decorator := func(ctx context.Context) context.Context { + calls.Add(1) + return context.WithValue(ctx, key{}, "decorated") + } + + ctx := New(t, + WithContextDecorator(key{}, decorator), + WithContextDecorator(key{}, decorator), + ) + + require.Equal(t, "decorated", ctx.Value(key{})) + require.Equal(t, int32(1), calls.Load(), "decorator should only be applied once") + }) + + t.Run("multiple decorators", func(t *testing.T) { + t.Parallel() + + type key1 struct{} + type key2 struct{} + + ctx := New(t, + WithContextDecorator(key1{}, func(ctx context.Context) context.Context { + return context.WithValue(ctx, key1{}, "one") + }), + WithContextDecorator(key2{}, func(ctx context.Context) context.Context { + return context.WithValue(ctx, key2{}, "two") + }), + ) + + require.Equal(t, "one", ctx.Value(key1{})) + require.Equal(t, "two", ctx.Value(key2{})) + }) + + t.Run("later call decorates cached context", func(t *testing.T) { + t.Parallel() + + type key struct{} + + ctx := New(t) + require.Nil(t, ctx.Value(key{})) + + ctx = New(t, WithContextDecorator(key{}, func(ctx context.Context) context.Context { + return context.WithValue(ctx, key{}, "decorated") + })) + require.Equal(t, "decorated", ctx.Value(key{})) + }) +} + +func TestCleanupCancelsContext(t *testing.T) { + t.Parallel() + + var ctx context.Context + t.Run("subtest", func(t *testing.T) { + ctx = New(t) + require.NoError(t, ctx.Err()) + }) + require.ErrorIs(t, ctx.Err(), context.Canceled) +} + +func TestEnvTimeout(t *testing.T) { + t.Run("from env", func(t *testing.T) { + t.Setenv("TEMPORAL_TEST_TIMEOUT", "10s") + + ctx := New(t) + deadline, ok := ctx.Deadline() + require.True(t, ok) + require.WithinDuration(t, time.Now().Add(10*time.Second), deadline, 50*time.Millisecond) + }) + + t.Run("custom overrides env", func(t *testing.T) { + t.Setenv("TEMPORAL_TEST_TIMEOUT", "10s") + + ctx := New(t, WithTimeout(time.Second)) + deadline, ok := ctx.Deadline() + require.True(t, ok) + require.WithinDuration(t, time.Now().Add(time.Second), deadline, 50*time.Millisecond) + }) +} diff --git a/tests/testcore/context.go b/tests/testcore/context.go index 604c36e39f2..ec61941da6c 100644 --- a/tests/testcore/context.go +++ b/tests/testcore/context.go @@ -2,15 +2,15 @@ package testcore import ( "context" - "os" "testing" - "time" - "go.temporal.io/server/common/debug" "go.temporal.io/server/common/headers" "go.temporal.io/server/common/rpc" + "go.temporal.io/server/common/testing/testcontext" ) +type versionHeadersContextKey struct{} + // NewContext creates a context with default 90-second timeout and RPC headers. // // NOTE: If you're using testcore.NewEnv, you can use env.Context() directly - it already @@ -34,45 +34,13 @@ func NewContext(parent ...context.Context) context.Context { return ctx } -// calculateTimeout determines the appropriate timeout duration based on custom timeout, -// environment variable, and default values. -// -// Priority order: -// 1. Custom timeout (via WithTimeout option) -// 2. TEMPORAL_TEST_TIMEOUT environment variable (in seconds) -// 3. Default 90 seconds -func calculateTimeout(customTimeout time.Duration) time.Duration { - if customTimeout > 0 { - return customTimeout * debug.TimeoutMultiplier - } - - if envTimeout := os.Getenv("TEMPORAL_TEST_TIMEOUT"); envTimeout != "" { - if dur, err := time.ParseDuration(envTimeout); err == nil && dur > 0 { - return dur * debug.TimeoutMultiplier - } - } - - return defaultTestTimeout -} - // setupTestTimeoutWithContext creates a context that will be canceled on timeout, // and reports the timeout error during cleanup. Returns a context that tests can // use to be interrupted when timeout occurs. The context includes RPC version headers. -func setupTestTimeoutWithContext(t *testing.T, customTimeout time.Duration) context.Context { +func setupTestTimeoutWithContext(t *testing.T) context.Context { t.Helper() - - timeout := calculateTimeout(customTimeout) - ctx, cancel := context.WithTimeout(t.Context(), timeout) - ctx = headers.SetVersions(ctx) - - // Register cleanup to cancel context and check timeout. - // t.Cleanup() functions run in LIFO order, so this runs after test code. - t.Cleanup(func() { - cancel() - if ctx.Err() == context.DeadlineExceeded { - t.Errorf("Test exceeded timeout of %v", timeout) - } - }) - - return ctx + return testcontext.New( + t, + testcontext.WithContextDecorator(versionHeadersContextKey{}, headers.SetVersions), + ) } diff --git a/tests/testcore/test_env.go b/tests/testcore/test_env.go index 7570481e67a..405624993a6 100644 --- a/tests/testcore/test_env.go +++ b/tests/testcore/test_env.go @@ -88,7 +88,6 @@ type TestOption func(*testOptions) type testOptions struct { dedicatedCluster bool dynamicConfigSettings []dynamicConfigOverride - timeout time.Duration } type dynamicConfigOverride struct { @@ -125,15 +124,6 @@ func WithDynamicConfig(setting dynamicconfig.GenericSetting, value any) TestOpti } } -// WithTimeout sets a custom timeout for the test. The test will fail if it runs longer -// than this duration. The timeout is multiplied by debug.TimeoutMultiplier when debugging. -// The TEMPORAL_TEST_TIMEOUT environment variable can also set the default timeout in seconds. -func WithTimeout(duration time.Duration) TestOption { - return func(o *testOptions) { - o.timeout = duration - } -} - // NewEnv creates a new test environment with access to a Temporal cluster. func NewEnv(t *testing.T, opts ...TestOption) *TestEnv { t.Helper() @@ -187,7 +177,7 @@ func NewEnv(t *testing.T, opts ...TestOption) *TestEnv { taskPoller: taskpoller.New(t, cluster.FrontendClient(), ns.String()), t: t, tv: testvars.New(t), - ctx: setupTestTimeoutWithContext(t, options.timeout), + ctx: setupTestTimeoutWithContext(t), sdkWorkerTQ: RandomizeStr("tq-" + t.Name()), dedicatedGuard: dedicatedGuard, } From 246d7523491ede350cc56f736293cd0446f125fe Mon Sep 17 00:00:00 2001 From: Shivam <57200924+Shivs11@users.noreply.github.com> Date: Mon, 11 May 2026 17:55:52 -0400 Subject: [PATCH 10/73] Trampolining fix: Suppress targetWorkerDeploymentVersionChanged on inline WFT path (#10217) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Added revision number mechanics to strengthen checks while deciding between clearing/setting the LastNotifiedTargetVersion! ## Problem - The inline WFT path in `RespondWorkflowTaskCompleted` (both `bypassTaskGeneration` at line 596 and speculative at line 722) calls `AddWorkflowTaskStartedEvent` with `targetDeploymentVersion=nil` and `targetRevisionNumber=0` because matching is never consulted on this path. - In AddWorkflowTaskStartedEvent, case 3 compared only `buildId + deploymentName`, so a `nil` target fails that case and the default fires — also corrupting `LastNotifiedTargetVersion` with `{nil, 0}`. For update- or signal-driven PINNED workflows (where the updates and the signals were buffered on workflow task completion) this manifested as a continuous CaN loop on a stable deployment. - A separate but related hazard: even on the normal poll path, a stale matching partition that happens to report `target == effective` by buildId would spuriously hit case 3 and clear legitimate notifications, silently defeating the trampolining-suppression mechanism from #9895. ## Test plan - [x] `TestInlinePath_StableRouting_NoSpuriousFlag` — new integration test that exercises the user-reported scenario: - Sends a buffered signal during a regular WFT, completes with `ReturnNewWorkflowTask=true`. - Asserts the inline follow-up WFT's WFT-Started event has `requestId == "request-from-RespondWorkflowTaskCompleted"` (self-verification that the test actually exercises the inline code path). - Asserts `TargetWorkerDeploymentVersionChanged == false` on the inline WFT-Started event. - Confirmed to **fail** before the state machine change (test commit is on top of the fix commit) and **pass** after. - [x] All 8 related existing tests still pass: - `TestStalePartition_RevisionSuppressesTrampolining` (from #9895) - `TestPinnedCaN_NoAUOnCaN_NoInfiniteLoop` - `TestOverride_SuppressesTargetVersionChangedSignal` - `TestAutoUpgrade_SuppressesTargetVersionChangedSignal` - `TestPinnedCaN_TargetChangesAgain_SignalsTrue` - `TestRemoveOverride_ClearsDeclinedState` - `TestRetryOfDeclinedCaN_SignalsOnNewTarget` - `TestPinnedCaN_RollbackResetsDeclined` 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- > [!NOTE] > **Medium Risk** > Touches workflow-task start/versioning decision logic in history service; mistakes could suppress legitimate upgrade notifications or alter trampolining behavior, but changes are localized and covered by a new integration test. > > **Overview** > Fixes a trampolining loop where inline workflow tasks created in `RespondWorkflowTaskCompleted` could incorrectly set `targetWorkerDeploymentVersionChanged` and corrupt `LastNotifiedTargetVersion` despite not consulting matching. > > Inline/eager/synthetic `AddWorkflowTaskStartedEvent` call sites now pass `targetRevisionNumber=-1` as a sentinel, and the workflow-task state machine strengthens its decision logic by tracking the *highest seen* matching revision and refusing to clear/set notification state based on stale (older-revision) reports. > > Adds `TestInlinePath_StableRouting_NoSpuriousFlag` to reproduce the buffered-signal inline-WFT scenario and assert the flag remains false on stable routing. > > Reviewed by [Cursor Bugbot](https://cursor.com/bugbot) for commit baf0f63467506f83a4673d89a813055b3c2b1f3c. Bugbot is set up for automated code reviews on this repo. Configure [here](https://www.cursor.com/dashboard/bugbot). --------- Co-authored-by: Claude Opus 4.7 (1M context) --- service/history/api/create_workflow_util.go | 2 +- .../api/respondworkflowtaskcompleted/api.go | 4 +- service/history/ndc/workflow_resetter.go | 2 +- .../workflow/workflow_task_state_machine.go | 22 +++- tests/versioning_3_test.go | 100 ++++++++++++++++++ 5 files changed, 123 insertions(+), 7 deletions(-) diff --git a/service/history/api/create_workflow_util.go b/service/history/api/create_workflow_util.go index c1929216554..8f783cab75b 100644 --- a/service/history/api/create_workflow_util.go +++ b/service/history/api/create_workflow_util.go @@ -113,7 +113,7 @@ func NewWorkflowWithSignal( nil, false, nil, - 0, + -1, // sentinel: eager-exec path didn't consult matching, has no routing revision ) if err != nil { // Unable to add WorkflowTaskStarted event to history diff --git a/service/history/api/respondworkflowtaskcompleted/api.go b/service/history/api/respondworkflowtaskcompleted/api.go index 978923d1c95..e0cf1db4f26 100644 --- a/service/history/api/respondworkflowtaskcompleted/api.go +++ b/service/history/api/respondworkflowtaskcompleted/api.go @@ -603,7 +603,7 @@ func (handler *WorkflowTaskCompletedHandler) Invoke( workflowLease.GetContext().UpdateRegistry(ctx), false, nil, - 0, + -1, // sentinel: inline path didn't consult matching, has no routing revision ) if err != nil { return nil, err @@ -729,7 +729,7 @@ func (handler *WorkflowTaskCompletedHandler) Invoke( workflowLease.GetContext().UpdateRegistry(ctx), false, nil, - 0, + -1, // sentinel: inline path didn't consult matching, has no routing revision ) if err != nil { return nil, err diff --git a/service/history/ndc/workflow_resetter.go b/service/history/ndc/workflow_resetter.go index c3ff9d17753..d0e6bbf610a 100644 --- a/service/history/ndc/workflow_resetter.go +++ b/service/history/ndc/workflow_resetter.go @@ -541,7 +541,7 @@ func (r *workflowResetterImpl) failWorkflowTask( // skipping versioning checks because this task is not actually dispatched but will fail immediately. true, nil, - 0, + -1, // sentinel: synthetic event, no routing info ) if err != nil { return err diff --git a/service/history/workflow/workflow_task_state_machine.go b/service/history/workflow/workflow_task_state_machine.go index 7d4d4d7ab00..1a52601500a 100644 --- a/service/history/workflow/workflow_task_state_machine.go +++ b/service/history/workflow/workflow_task_state_machine.go @@ -500,6 +500,16 @@ func (m *workflowTaskStateMachine) AddWorkflowTaskStartedEvent( // in that case proto getters return zero values and we correctly fall through to signal. effectiveDeploymentVersion := worker_versioning.ExternalWorkerDeploymentVersionFromDeployment(m.ms.GetEffectiveDeployment()) + // Highest revision the workflow knows about from matching, whether from a + // notification on this run (LastNotifiedTargetVersion) or carried via CaN + // (DeclinedTargetVersionUpgrade). Used to suppress stale matching reports, + // including inline WFTs in RespondWorkflowTaskCompleted which don't consult + // matching and pass revision 0. + highestSeenRevNumber := max( + m.ms.executionInfo.GetLastNotifiedTargetVersion().GetRevisionNumber(), + m.ms.executionInfo.GetDeclinedTargetVersionUpgrade().GetRevisionNumber(), + ) + switch { // 1. Override active — operator controls version, don't signal. Clear any stale declined/notified state so that // when/if the operator removes the override, we re-calculate the declined/notified state and appropriately fire the @@ -510,16 +520,22 @@ func (m *workflowTaskStateMachine) AddWorkflowTaskStartedEvent( // 2. AutoUpgrade — will transition naturally, no CaN needed. case m.ms.GetEffectiveVersioningBehavior() == enumspb.VERSIONING_BEHAVIOR_AUTO_UPGRADE: // Rest of the checks are guaranteed to have the Workflow's Effective Versioning Behavior to be Pinned in nature. - // 3. Already on target — nothing changed. Clear any stale declined/notified state. + // 3. Already on target AND partition's view is at least as fresh as what we last knew. + // The revision check prevents a stale partition (coincidentally matching by buildId) + // from wiping legitimate declined/notified state. case effectiveDeploymentVersion.GetBuildId() == targetDeploymentVersion.GetBuildId() && - effectiveDeploymentVersion.GetDeploymentName() == targetDeploymentVersion.GetDeploymentName(): - // TODO (Shivam): Revision number mechanics to strengthen this check + effectiveDeploymentVersion.GetDeploymentName() == targetDeploymentVersion.GetDeploymentName() && + targetRevisionNumber >= highestSeenRevNumber: m.ms.executionInfo.DeclinedTargetVersionUpgrade = nil m.ms.executionInfo.LastNotifiedTargetVersion = nil // 4. Previously declined upgrade — target revision is not newer than what was declined. case m.ms.executionInfo.GetDeclinedTargetVersionUpgrade() != nil && targetRevisionNumber <= m.ms.executionInfo.GetDeclinedTargetVersionUpgrade().GetRevisionNumber(): default: + // Strict `<` (not `<=`) so legitimate same-revision re-firings (e.g., transient retries, repeated updates) still fire; inline path uses revision=-1 sentinel to be caught here. + if targetRevisionNumber < highestSeenRevNumber { + break + } // Otherwise — target changed + did not decline to upgrade on CaN/retry. Signal the SDK. targetDeploymentVersionChanged = true m.ms.executionInfo.LastNotifiedTargetVersion = &persistencespb.LastNotifiedTargetVersion{ diff --git a/tests/versioning_3_test.go b/tests/versioning_3_test.go index 4b573009011..e7793c8f6cb 100644 --- a/tests/versioning_3_test.go +++ b/tests/versioning_3_test.go @@ -6511,6 +6511,106 @@ func (s *Versioning3Suite) TestStalePartition_RevisionSuppressesTrampolining() { }) } +// TestInlinePath_StableRouting_NoSpuriousFlag verifies that a PINNED workflow +// on stable routing does NOT receive targetWorkerDeploymentVersionChanged=true +// on WFTs created via the inline path in RespondWorkflowTaskCompleted (e.g., +// when a buffered signal arrives during WFT processing). +// +// Flow: +// 1. Start pinned workflow on v1; set v1 as current (stable routing). +// 2. Trigger a regular WFT via a first signal. +// 3. During that WFT's processing, send a second signal → gets buffered → +// server creates an inline WFT to deliver it. +// 4. Poll the inline WFT and assert: +// - requestId == "request-from-RespondWorkflowTaskCompleted" (self-check +// that we actually exercised the inline path) +// - targetWorkerDeploymentVersionChanged == false (the bug being fixed) +func (s *Versioning3Suite) TestInlinePath_StableRouting_NoSpuriousFlag() { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + tv1 := testvars.New(s).WithBuildIDNumber(1) + + // Async poller for first WFT, declares pinned behavior + wftCompleted := make(chan struct{}) + s.pollWftAndHandle(tv1, false, wftCompleted, + func(task *workflowservice.PollWorkflowTaskQueueResponse) (*workflowservice.RespondWorkflowTaskCompletedRequest, error) { + s.NotNil(task) + return respondEmptyWft(tv1, false, vbPinned), nil + }) + + s.waitForDeploymentDataPropagation(tv1, versionStatusInactive, false, tqTypeWf) + s.setCurrentDeployment(tv1) + + runID := s.startWorkflow(tv1, nil) + execution := tv1.WithRunID(runID).WorkflowExecution() + s.WaitForChannel(ctx, wftCompleted) //nolint:staticcheck // SA1019: matches pattern used throughout versioning_3_test.go + s.verifyWorkflowVersioning(s.Assertions, tv1, vbPinned, tv1.Deployment(), nil, nil) + + // Trigger a regular WFT via a first signal. + _, err := s.FrontendClient().SignalWorkflowExecution(ctx, &workflowservice.SignalWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{WorkflowId: tv1.WorkflowID()}, + SignalName: "first-signal", + Identity: tv1.WorkerIdentity(), + }) + s.NoError(err) + + // Process the WFT for the first signal; during processing send a second signal + // that will be buffered. Set ReturnNewWorkflowTask=true so the server takes the + // bypassTaskGeneration path and embeds the inline follow-up WFT in the response. + poller, resp := s.pollWftAndHandle(tv1, false, nil, + func(task *workflowservice.PollWorkflowTaskQueueResponse) (*workflowservice.RespondWorkflowTaskCompletedRequest, error) { + s.NotNil(task) + _, err := s.FrontendClient().SignalWorkflowExecution(ctx, &workflowservice.SignalWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{WorkflowId: tv1.WorkflowID()}, + SignalName: "buffered-signal", + Identity: tv1.WorkerIdentity(), + }) + s.NoError(err) + reply := respondEmptyWft(tv1, false, vbPinned) + reply.ReturnNewWorkflowTask = true + return reply, nil + }) + + // The inline follow-up WFT should be embedded in the response. + s.NotNil(resp) + inlineTask := resp.GetWorkflowTask() + s.NotNil(inlineTask, "expected inline follow-up WFT in RespondWorkflowTaskCompletedResponse") + + // Self-verification: WFT-Started event from the inline path carries the marker requestId. + var lastStarted *historypb.HistoryEvent + for _, e := range inlineTask.GetHistory().GetEvents() { + if e.GetEventType() == enumspb.EVENT_TYPE_WORKFLOW_TASK_STARTED { + lastStarted = e + } + } + s.NotNil(lastStarted) + s.Equal("request-from-RespondWorkflowTaskCompleted", + lastStarted.GetWorkflowTaskStartedEventAttributes().GetRequestId(), + "inline WFT-Started event should carry the marker requestId (test exercises inline path)") + // Core assertion. + s.False(lastStarted.GetWorkflowTaskStartedEventAttributes().GetTargetWorkerDeploymentVersionChanged(), + "inline WFT on stable routing must NOT fire targetWorkerDeploymentVersionChanged=true") + + // Complete the inline WFT to finish the workflow. + _, err = poller.HandleWorkflowTask(tv1, inlineTask, + func(task *workflowservice.PollWorkflowTaskQueueResponse) (*workflowservice.RespondWorkflowTaskCompletedRequest, error) { + return respondCompleteWorkflow(tv1, vbPinned), nil + }) + s.NoError(err) + + // Sanity: full history should have no WFT-Started event with the flag. + events := s.GetHistory(s.Namespace().String(), execution) + for _, e := range events { + if e.GetEventType() == enumspb.EVENT_TYPE_WORKFLOW_TASK_STARTED { + s.False(e.GetWorkflowTaskStartedEventAttributes().GetTargetWorkerDeploymentVersionChanged(), + "no WFT-Started event should have flag=true on stable routing (event %d)", e.GetEventId()) + } + } +} + // TestRetryOfDeclinedCaN_SignalsOnNewTarget verifies that when a CaN'd run // ,which declined to upgrade, fails and is retried by the server, the retry // run inherits NotificationSuppressedTargetVersion from the original CaN From a49d803466e2633b1bf18ad8c9f5b525f3079d72 Mon Sep 17 00:00:00 2001 From: Fred Tzeng <41805201+fretz12@users.noreply.github.com> Date: Tue, 12 May 2026 08:28:35 -0700 Subject: [PATCH 11/73] Add context.Context to callback.Validator interface (#10140) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What changed? Added `context.Context` as the first parameter to the `callback.Validator` interface and threaded it through all call sites (`WorkflowHandler`, `activity.FrontendHandler`, and their internal validation/preparation methods). The base validator ignores the context. No behavioral change. ## Why? Deployments that decorate the validator (via `fx.Decorate`) need access to the gRPC context to check caller identity — e.g. to allow internal callers (scheduler) to attach `temporal://internal` callbacks while rejecting them from external users. ## How did you test it? - [X] built - [X] run locally and tested manually - [X] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) --- chasm/lib/activity/frontend.go | 5 +++-- chasm/lib/activity/frontend_test.go | 5 +++-- chasm/lib/callback/validator.go | 5 +++-- chasm/lib/callback/validator_test.go | 19 ++++++++++--------- service/frontend/workflow_handler.go | 13 ++++++++----- 5 files changed, 27 insertions(+), 20 deletions(-) diff --git a/chasm/lib/activity/frontend.go b/chasm/lib/activity/frontend.go index 4d08ffbbbf6..6dae15ece5b 100644 --- a/chasm/lib/activity/frontend.go +++ b/chasm/lib/activity/frontend.go @@ -95,7 +95,7 @@ func (h *frontendHandler) StartActivityExecution(ctx context.Context, req *workf return nil, err } - modifiedReq, err := h.validateAndPopulateStartRequest(req, namespaceID) + modifiedReq, err := h.validateAndPopulateStartRequest(ctx, req, namespaceID) if err != nil { return nil, err } @@ -351,6 +351,7 @@ func (h *frontendHandler) RequestCancelActivityExecution( } func (h *frontendHandler) validateAndPopulateStartRequest( + ctx context.Context, req *workflowservice.StartActivityExecutionRequest, namespaceID namespace.ID, ) (*workflowservice.StartActivityExecutionRequest, error) { @@ -405,7 +406,7 @@ func (h *frontendHandler) validateAndPopulateStartRequest( } if cbs := req.GetCompletionCallbacks(); len(cbs) > 0 { - if err := h.callbackValidator.Validate(req.GetNamespace(), cbs); err != nil { + if err := h.callbackValidator.Validate(ctx, req.GetNamespace(), cbs); err != nil { return nil, err } } diff --git a/chasm/lib/activity/frontend_test.go b/chasm/lib/activity/frontend_test.go index 9dc13cda79c..e692de13b84 100644 --- a/chasm/lib/activity/frontend_test.go +++ b/chasm/lib/activity/frontend_test.go @@ -1,6 +1,7 @@ package activity import ( + "context" "testing" "time" @@ -50,11 +51,11 @@ func TestRequestIdStableAcrossRetries(t *testing.T) { // validateAndPopulateStartRequest with the same request pointer. validateTwoAttempts := func(t *testing.T, req *workflowservice.StartActivityExecutionRequest) { t.Helper() - clone1, err := h.validateAndPopulateStartRequest(req, nsID) + clone1, err := h.validateAndPopulateStartRequest(context.Background(), req, nsID) require.NoError(t, err) require.NotEmpty(t, clone1.RequestId) - clone2, err := h.validateAndPopulateStartRequest(req, nsID) + clone2, err := h.validateAndPopulateStartRequest(context.Background(), req, nsID) require.NoError(t, err) require.Equal(t, clone1.RequestId, clone2.RequestId) } diff --git a/chasm/lib/callback/validator.go b/chasm/lib/callback/validator.go index bc478ff8923..d9de4ea607b 100644 --- a/chasm/lib/callback/validator.go +++ b/chasm/lib/callback/validator.go @@ -1,6 +1,7 @@ package callback import ( + "context" "fmt" "strings" @@ -12,7 +13,7 @@ import ( // Validator validates completion callbacks attached to executions (workflows and standalone activities). type Validator interface { - Validate(namespaceName string, cbs []*commonpb.Callback) error + Validate(ctx context.Context, namespaceName string, cbs []*commonpb.Callback) error } type validator struct { @@ -38,7 +39,7 @@ func NewValidator( // Validate validates completion callbacks: count, URL length, endpoint allowlist, header size, and normalizes header // keys to lowercase. -func (v *validator) Validate(namespaceName string, cbs []*commonpb.Callback) error { +func (v *validator) Validate(_ context.Context, namespaceName string, cbs []*commonpb.Callback) error { if len(cbs) > v.maxCallbacksPerExecution(namespaceName) { return serviceerror.NewInvalidArgumentf( "cannot attach more than %d callbacks to an execution", v.maxCallbacksPerExecution(namespaceName), diff --git a/chasm/lib/callback/validator_test.go b/chasm/lib/callback/validator_test.go index 55ae7dd5fd9..63d95d87d96 100644 --- a/chasm/lib/callback/validator_test.go +++ b/chasm/lib/callback/validator_test.go @@ -1,6 +1,7 @@ package callback import ( + "context" "regexp" "testing" @@ -31,7 +32,7 @@ func TestValidateCallbacks(t *testing.T) { }, }}, } - err := v.Validate("ns", cbs) + err := v.Validate(context.Background(), "ns", cbs) require.NoError(t, err) }) @@ -46,7 +47,7 @@ func TestValidateCallbacks(t *testing.T) { {Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: "http://localhost/cb1"}}}, {Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: "http://localhost/cb2"}}}, } - err := v.Validate("ns", cbs) + err := v.Validate(context.Background(), "ns", cbs) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Contains(t, err.Error(), "cannot attach more than 1 callbacks") @@ -66,7 +67,7 @@ func TestValidateCallbacks(t *testing.T) { }, }}, } - err := v.Validate("ns", cbs) + err := v.Validate(context.Background(), "ns", cbs) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Contains(t, err.Error(), "url length longer than max length allowed") @@ -81,7 +82,7 @@ func TestValidateCallbacks(t *testing.T) { }, }}, } - err := v.Validate("ns", cbs) + err := v.Validate(context.Background(), "ns", cbs) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Contains(t, err.Error(), "header size longer than max allowed size") @@ -96,7 +97,7 @@ func TestValidateCallbacks(t *testing.T) { }, }}, } - err := v.Validate("ns", cbs) + err := v.Validate(context.Background(), "ns", cbs) require.NoError(t, err) nexus := cbs[0].GetNexus() require.Equal(t, "application/json", nexus.Header["content-type"]) @@ -119,7 +120,7 @@ func TestValidateCallbacks(t *testing.T) { }, }}, } - err := v.Validate("ns", cbs) + err := v.Validate(context.Background(), "ns", cbs) var invalidArgErr *serviceerror.InvalidArgument require.ErrorAs(t, err, &invalidArgErr) require.Contains(t, err.Error(), "does not match any configured callback address") @@ -129,14 +130,14 @@ func TestValidateCallbacks(t *testing.T) { cbs := []*commonpb.Callback{ {Variant: nil}, } - err := v.Validate("ns", cbs) + err := v.Validate(context.Background(), "ns", cbs) var unimplementedErr *serviceerror.Unimplemented require.ErrorAs(t, err, &unimplementedErr) require.Contains(t, err.Error(), "unknown callback variant") }) t.Run("EmptyCallbacksNoError", func(t *testing.T) { - err := v.Validate("ns", nil) + err := v.Validate(context.Background(), "ns", nil) require.NoError(t, err) }) @@ -146,7 +147,7 @@ func TestValidateCallbacks(t *testing.T) { Internal: &commonpb.Callback_Internal{}, }}, } - err := v.Validate("ns", cbs) + err := v.Validate(context.Background(), "ns", cbs) require.NoError(t, err) }) } diff --git a/service/frontend/workflow_handler.go b/service/frontend/workflow_handler.go index 8d09abe0433..114655ccf32 100644 --- a/service/frontend/workflow_handler.go +++ b/service/frontend/workflow_handler.go @@ -546,7 +546,7 @@ func (wh *WorkflowHandler) StartWorkflowExecution( defer log.CapturePanic(wh.logger, &retError) var err error - if request, err = wh.prepareStartWorkflowRequest(request); err != nil { + if request, err = wh.prepareStartWorkflowRequest(ctx, request); err != nil { return nil, err } @@ -604,6 +604,7 @@ func (wh *WorkflowHandler) convertToStartWorkflowExecutionResponse( // Validates the request and sets default values where they are missing. func (wh *WorkflowHandler) prepareStartWorkflowRequest( + ctx context.Context, request *workflowservice.StartWorkflowExecutionRequest, ) (*workflowservice.StartWorkflowExecutionRequest, error) { if request == nil { @@ -681,7 +682,7 @@ func (wh *WorkflowHandler) prepareStartWorkflowRequest( } if cbs := request.GetCompletionCallbacks(); len(cbs) > 0 { - if err := wh.callbackValidator.Validate(namespaceName.String(), cbs); err != nil { + if err := wh.callbackValidator.Validate(ctx, namespaceName.String(), cbs); err != nil { return nil, err } } @@ -784,7 +785,7 @@ func (wh *WorkflowHandler) ExecuteMultiOperation( return nil, errMultiOpNotStartAndUpdate } - historyReq, err := wh.convertToHistoryMultiOperationRequest(namespaceID, request) + historyReq, err := wh.convertToHistoryMultiOperationRequest(ctx, namespaceID, request) if err != nil { return nil, err } @@ -808,6 +809,7 @@ func (wh *WorkflowHandler) ExecuteMultiOperation( } func (wh *WorkflowHandler) convertToHistoryMultiOperationRequest( + ctx context.Context, namespaceID namespace.ID, request *workflowservice.ExecuteMultiOperationRequest, ) (*historyservice.ExecuteMultiOperationRequest, error) { @@ -818,7 +820,7 @@ func (wh *WorkflowHandler) convertToHistoryMultiOperationRequest( errs := make([]error, len(request.Operations)) for i, op := range request.Operations { - convertedOp, opWorkflowID, err := wh.convertToHistoryMultiOperationItem(namespaceID, namespace.Name(request.Namespace), op) + convertedOp, opWorkflowID, err := wh.convertToHistoryMultiOperationItem(ctx, namespaceID, namespace.Name(request.Namespace), op) if err != nil { hasError = true } else { @@ -849,6 +851,7 @@ func (wh *WorkflowHandler) convertToHistoryMultiOperationRequest( } func (wh *WorkflowHandler) convertToHistoryMultiOperationItem( + ctx context.Context, namespaceID namespace.ID, namespaceName namespace.Name, op *workflowservice.ExecuteMultiOperationRequest_Operation, @@ -861,7 +864,7 @@ func (wh *WorkflowHandler) convertToHistoryMultiOperationItem( return nil, "", errMultiOpNamespaceMismatch } var err error - if startReq, err = wh.prepareStartWorkflowRequest(startReq); err != nil { + if startReq, err = wh.prepareStartWorkflowRequest(ctx, startReq); err != nil { return nil, "", err } if len(startReq.CronSchedule) > 0 { From 7759b0d34d03a0ad8d00d34a527d5386c76251a1 Mon Sep 17 00:00:00 2001 From: Kannan Rajah Date: Tue, 12 May 2026 11:54:25 -0700 Subject: [PATCH 12/73] Improve cancel command dispatch error handling Port error handling pattern from workerCommandsTaskDispatcher: distinguish UpstreamTimeout (no_poller metric), non-retryable handler errors, transport errors, and permanent worker-returned failures. Fix misleading comment on buildCancelCommandTaskToken. Co-Authored-By: Claude Opus 4.6 --- chasm/lib/activity/activity.go | 3 ++- chasm/lib/activity/activity_tasks.go | 40 +++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/chasm/lib/activity/activity.go b/chasm/lib/activity/activity.go index afb3a676946..59f02aff550 100644 --- a/chasm/lib/activity/activity.go +++ b/chasm/lib/activity/activity.go @@ -202,7 +202,8 @@ func (a *Activity) createAddActivityTaskRequest(ctx chasm.Context, namespaceID s } // buildCancelCommandTaskToken builds the serialized task token for a cancel command. -// This token matches what the worker received when the activity was dispatched. +// This token identifies the same activity as the poll response token but is not byte-identical — +// matching builds poll tokens with additional fields (Clock, Version, etc.). func (a *Activity) buildCancelCommandTaskToken(ctx chasm.Context, activityRef chasm.ComponentRef) ([]byte, error) { componentRefBytes, err := ctx.Ref(a) if err != nil { diff --git a/chasm/lib/activity/activity_tasks.go b/chasm/lib/activity/activity_tasks.go index a1d59d9c66c..3396f5539dc 100644 --- a/chasm/lib/activity/activity_tasks.go +++ b/chasm/lib/activity/activity_tasks.go @@ -422,16 +422,42 @@ func (h *cancelCommandDispatchTaskHandler) dispatchToWorker( return nil } - // Non-retryable errors are dropped — the activity will eventually time out. + return h.handleDispatchError(nexusErr, controlQueue) +} + +func (h *cancelCommandDispatchTaskHandler) handleDispatchError(nexusErr error, controlQueue string) error { var handlerErr *nexus.HandlerError - if errors.As(nexusErr, &handlerErr) && !handlerErr.Retryable() { - h.opts.Logger.Error("Cancel command non-retryable error", + if errors.As(nexusErr, &handlerErr) { + // Handler-level error (transport, timeout, internal). + if handlerErr.Type == nexus.HandlerErrorTypeUpstreamTimeout { + h.opts.Logger.Warn("No worker polling control queue", + tag.NewStringTag("control_queue", controlQueue)) + metrics.WorkerCommandsSent.With(h.opts.MetricsHandler).Record(1, metrics.OutcomeTag("no_poller")) + return nexusErr + } + + if !handlerErr.Retryable() { + h.opts.Logger.Error("Cancel command non-retryable handler error", + tag.NewStringTag("control_queue", controlQueue), + tag.Error(nexusErr)) + metrics.WorkerCommandsSent.With(h.opts.MetricsHandler).Record(1, metrics.OutcomeTag("non_retryable_error")) + return nil + } + + h.opts.Logger.Warn("Cancel command transport failure", tag.NewStringTag("control_queue", controlQueue), tag.Error(nexusErr)) - metrics.WorkerCommandsSent.With(h.opts.MetricsHandler).Record(1, metrics.OutcomeTag("non_retryable_error")) - return nil + metrics.WorkerCommandsSent.With(h.opts.MetricsHandler).Record(1, metrics.OutcomeTag("transport_error")) + return nexusErr } - metrics.WorkerCommandsSent.With(h.opts.MetricsHandler).Record(1, metrics.OutcomeTag("transport_error")) - return nexusErr + // Worker-returned failure (ApplicationError, CanceledError, etc.). The worker received + // and processed the request but returned an error. Permanent — the worker contract + // requires success for all defined commands, so this indicates a bug or version + // incompatibility. Retrying won't help. + h.opts.Logger.Error("Worker returned failure for cancel command", + tag.NewStringTag("control_queue", controlQueue), + tag.Error(nexusErr)) + metrics.WorkerCommandsSent.With(h.opts.MetricsHandler).Record(1, metrics.OutcomeTag("worker_error")) + return nil } From 2800c8e3cf5442e1a878f9484b77394c4016dcfa Mon Sep 17 00:00:00 2001 From: Kannan Rajah Date: Tue, 12 May 2026 11:57:36 -0700 Subject: [PATCH 13/73] Fix e2e test to use env pattern and fix formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test was using s.OverrideDynamicConfig, s.FrontendClient(), s.tv etc. which don't exist on the suite — must use env from newTestEnv(). Also fix gofmt formatting in activity_test.go and library.go. Co-Authored-By: Claude Opus 4.6 --- chasm/lib/activity/activity_test.go | 12 ++++---- chasm/lib/activity/library.go | 30 +++++++++---------- tests/standalone_activity_test.go | 46 +++++++++++++++-------------- 3 files changed, 45 insertions(+), 43 deletions(-) diff --git a/chasm/lib/activity/activity_test.go b/chasm/lib/activity/activity_test.go index f7797afe6e2..70a39e69066 100644 --- a/chasm/lib/activity/activity_test.go +++ b/chasm/lib/activity/activity_test.go @@ -321,9 +321,9 @@ func TestTransitionStartedStoresWorkerControlTaskQueue(t *testing.T) { attemptState := &activitypb.ActivityAttemptState{Count: 1, Stamp: 1} a := &Activity{ ActivityState: &activitypb.ActivityState{ - ActivityType: &commonpb.ActivityType{Name: "test-type"}, - Status: activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, - TaskQueue: &taskqueuepb.TaskQueue{Name: "test-queue"}, + ActivityType: &commonpb.ActivityType{Name: "test-type"}, + Status: activitypb.ACTIVITY_EXECUTION_STATUS_SCHEDULED, + TaskQueue: &taskqueuepb.TaskQueue{Name: "test-queue"}, StartToCloseTimeout: durationpb.New(3 * time.Minute), }, LastAttempt: chasm.NewDataField(ctx, attemptState), @@ -348,9 +348,9 @@ func TestCancelRequestDispatchesCancelCommand(t *testing.T) { testTime := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) testCases := []struct { - name string - activityStatus activitypb.ActivityExecutionStatus - controlQueue string + name string + activityStatus activitypb.ActivityExecutionStatus + controlQueue string expectDispatchTask bool }{ { diff --git a/chasm/lib/activity/library.go b/chasm/lib/activity/library.go index cb128c20111..269648a4a0f 100644 --- a/chasm/lib/activity/library.go +++ b/chasm/lib/activity/library.go @@ -77,13 +77,13 @@ func (l *componentOnlyLibrary) Components() []*chasm.RegistrableComponent { type library struct { componentOnlyLibrary - handler *handler - activityDispatchTaskHandler *activityDispatchTaskHandler - cancelCommandDispatchTaskHandler *cancelCommandDispatchTaskHandler - scheduleToStartTimeoutTaskHandler *scheduleToStartTimeoutTaskHandler - scheduleToCloseTimeoutTaskHandler *scheduleToCloseTimeoutTaskHandler - startToCloseTimeoutTaskHandler *startToCloseTimeoutTaskHandler - heartbeatTimeoutTaskHandler *heartbeatTimeoutTaskHandler + handler *handler + activityDispatchTaskHandler *activityDispatchTaskHandler + cancelCommandDispatchTaskHandler *cancelCommandDispatchTaskHandler + scheduleToStartTimeoutTaskHandler *scheduleToStartTimeoutTaskHandler + scheduleToCloseTimeoutTaskHandler *scheduleToCloseTimeoutTaskHandler + startToCloseTimeoutTaskHandler *startToCloseTimeoutTaskHandler + heartbeatTimeoutTaskHandler *heartbeatTimeoutTaskHandler } func newLibrary( @@ -98,14 +98,14 @@ func newLibrary( namespaceRegistry namespace.Registry, ) *library { return &library{ - componentOnlyLibrary: *newComponentOnlyLibrary(config, namespaceRegistry), - handler: handler, - activityDispatchTaskHandler: activityDispatchTaskHandler, - cancelCommandDispatchTaskHandler: cancelCommandDispatchTaskHandler, - scheduleToStartTimeoutTaskHandler: scheduleToStartTimeoutTaskHandler, - scheduleToCloseTimeoutTaskHandler: scheduleToCloseTimeoutTaskHandler, - startToCloseTimeoutTaskHandler: startToCloseTimeoutTaskHandler, - heartbeatTimeoutTaskHandler: heartbeatTimeoutTaskHandler, + componentOnlyLibrary: *newComponentOnlyLibrary(config, namespaceRegistry), + handler: handler, + activityDispatchTaskHandler: activityDispatchTaskHandler, + cancelCommandDispatchTaskHandler: cancelCommandDispatchTaskHandler, + scheduleToStartTimeoutTaskHandler: scheduleToStartTimeoutTaskHandler, + scheduleToCloseTimeoutTaskHandler: scheduleToCloseTimeoutTaskHandler, + startToCloseTimeoutTaskHandler: startToCloseTimeoutTaskHandler, + heartbeatTimeoutTaskHandler: heartbeatTimeoutTaskHandler, } } diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index c36cdd319c2..d88ab8c9fcc 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -6331,13 +6331,15 @@ func (s *standaloneActivityTestSuite) TestCallbacks() { } func (s *standaloneActivityTestSuite) TestDispatchCancelCommandToWorker() { + env := s.newTestEnv() t := s.T() ctx, cancel := context.WithTimeout(t.Context(), 30*time.Second) defer cancel() - s.OverrideDynamicConfig(dynamicconfig.EnableCancelActivityWorkerCommand, true) + env.OverrideDynamicConfig(dynamicconfig.EnableCancelActivityWorkerCommand, true) - controlQueueName := s.tv.ControlQueueName(s.Namespace().String()) + tv := env.Tv() + controlQueueName := tv.ControlQueueName(env.Namespace().String()) tokenSerializer := tasktoken.NewSerializer() @@ -6364,10 +6366,10 @@ func (s *standaloneActivityTestSuite) TestDispatchCancelCommandToWorker() { pollNexusControlQueue := func() *workerservicepb.ExecuteCommandsRequest { pollCtx, pollCancel := context.WithTimeout(ctx, 5*time.Second) defer pollCancel() - resp, err := s.FrontendClient().PollNexusTaskQueue(pollCtx, &workflowservice.PollNexusTaskQueueRequest{ - Namespace: s.Namespace().String(), + resp, err := env.FrontendClient().PollNexusTaskQueue(pollCtx, &workflowservice.PollNexusTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: controlQueueName, Kind: enumspb.TASK_QUEUE_KIND_WORKER_COMMANDS}, - Identity: s.tv.WorkerIdentity(), + Identity: tv.WorkerIdentity(), }) if err != nil || resp == nil || resp.Request == nil { return nil @@ -6387,36 +6389,36 @@ func (s *standaloneActivityTestSuite) TestDispatchCancelCommandToWorker() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId // Poll with a worker control task queue so the activity stores it. - pollTaskResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollTaskResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{ Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL, }, - Identity: s.tv.WorkerIdentity(), - WorkerInstanceKey: s.tv.WorkerInstanceKey(), + Identity: tv.WorkerIdentity(), + WorkerInstanceKey: tv.WorkerInstanceKey(), WorkerControlTaskQueue: controlQueueName, }) require.NoError(t, err) require.NotEmpty(t, pollTaskResp.TaskToken) // Request cancellation — should dispatch cancel command to the control queue. - _, err = s.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().RequestCancelActivityExecution(ctx, &workflowservice.RequestCancelActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Identity: "canceller", - RequestId: s.tv.RequestID(), + RequestId: tv.RequestID(), Reason: "test cancel", }) require.NoError(t, err) var executeReq *workerservicepb.ExecuteCommandsRequest - s.Eventually(func() bool { + require.Eventually(t, func() bool { executeReq = pollNexusControlQueue() return executeReq != nil }, 15*time.Second, 100*time.Millisecond, "cancel command not received on control queue") @@ -6431,26 +6433,26 @@ func (s *standaloneActivityTestSuite) TestDispatchCancelCommandToWorker() { activityID := testcore.RandomizeStr(t.Name()) taskQueue := testcore.RandomizeStr(t.Name()) - startResp := s.startAndValidateActivity(ctx, t, activityID, taskQueue) + startResp := env.startAndValidateActivity(ctx, t, activityID, taskQueue) runID := startResp.RunId // Poll with a worker control task queue. - pollTaskResp, err := s.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ - Namespace: s.Namespace().String(), + pollTaskResp, err := env.FrontendClient().PollActivityTaskQueue(ctx, &workflowservice.PollActivityTaskQueueRequest{ + Namespace: env.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{ Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL, }, - Identity: s.tv.WorkerIdentity(), - WorkerInstanceKey: s.tv.WorkerInstanceKey(), + Identity: tv.WorkerIdentity(), + WorkerInstanceKey: tv.WorkerInstanceKey(), WorkerControlTaskQueue: controlQueueName, }) require.NoError(t, err) require.NotEmpty(t, pollTaskResp.TaskToken) // Terminate — should dispatch cancel command to the control queue. - _, err = s.FrontendClient().TerminateActivityExecution(ctx, &workflowservice.TerminateActivityExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().TerminateActivityExecution(ctx, &workflowservice.TerminateActivityExecutionRequest{ + Namespace: env.Namespace().String(), ActivityId: activityID, RunId: runID, Reason: "test terminate", @@ -6459,7 +6461,7 @@ func (s *standaloneActivityTestSuite) TestDispatchCancelCommandToWorker() { require.NoError(t, err) var executeReq *workerservicepb.ExecuteCommandsRequest - s.Eventually(func() bool { + require.Eventually(t, func() bool { executeReq = pollNexusControlQueue() return executeReq != nil }, 15*time.Second, 100*time.Millisecond, "cancel command not received on control queue after terminate") From 0d8004bf4f05ca45f4801f320eaa7660090318c4 Mon Sep 17 00:00:00 2001 From: Alex Stanfield <13949480+chaptersix@users.noreply.github.com> Date: Tue, 12 May 2026 15:07:44 -0500 Subject: [PATCH 14/73] fix: schedule_action_delay metric and add schedule_generate_latency (#10201) ## Summary - Fix `schedule_action_delay` for CHASM schedules: `DesiredTime` is nil for most starts (only set when blocked behind overlap), causing the metric to record ~56 years (now minus epoch). Use `cmp.Or(start.DesiredTime, start.ActualTime)` to fall back to `ActualTime`, matching V1 behavior. - Add `schedule_generate_latency` timer metric to measure the delay between when a scheduled action was due and when the generator buffered it. Only recorded for non-manual (non-backfill) actions. --- chasm/lib/scheduler/invoker_tasks.go | 4 +++- chasm/lib/scheduler/spec_processor.go | 5 +++++ common/metrics/metric_defs.go | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/chasm/lib/scheduler/invoker_tasks.go b/chasm/lib/scheduler/invoker_tasks.go index 80162793594..19d5bc17309 100644 --- a/chasm/lib/scheduler/invoker_tasks.go +++ b/chasm/lib/scheduler/invoker_tasks.go @@ -1,6 +1,7 @@ package scheduler import ( + "cmp" "context" "errors" "fmt" @@ -602,9 +603,10 @@ func (h *InvokerExecuteTaskHandler) startWorkflow( // Record time taken from action eligible to workflow started. if !start.Manual { + desiredTime := cmp.Or(start.DesiredTime, start.ActualTime) metricsHandler. Timer(metrics.ScheduleActionDelay.Name()). - Record(actualStartTime.Sub(start.DesiredTime.AsTime())) + Record(actualStartTime.Sub(desiredTime.AsTime())) } return &schedulepb.ScheduleActionResult{ diff --git a/chasm/lib/scheduler/spec_processor.go b/chasm/lib/scheduler/spec_processor.go index 0bddb32b0ff..61585781b80 100644 --- a/chasm/lib/scheduler/spec_processor.go +++ b/chasm/lib/scheduler/spec_processor.go @@ -148,6 +148,11 @@ func (s *SpecProcessorImpl) ProcessTimeRange( continue } + if !manual { + metricsHandler.Timer(metrics.ScheduleGenerateLatency.Name()). + Record(end.Sub(next.Next)) + } + if limitReached { droppedCount++ continue diff --git a/common/metrics/metric_defs.go b/common/metrics/metric_defs.go index feae0c8ef7f..cd4d2ea0b31 100644 --- a/common/metrics/metric_defs.go +++ b/common/metrics/metric_defs.go @@ -1417,6 +1417,10 @@ var ( "schedule_action_delay", WithDescription("Delay between when scheduled actions should/actually happen"), ) + ScheduleGenerateLatency = NewTimerDef( + "schedule_generate_latency", + WithDescription("Delay between when a scheduled action was due and when the generator buffered it"), + ) SchedulePayloadSize = NewCounterDef( "schedule_payload_size", WithDescription("The size in bytes of a customer payload (including action results and update signals)"), From a05038cf9993ebe043466d3597952e0e29622467 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Tue, 12 May 2026 16:43:36 -0600 Subject: [PATCH 15/73] Emit payload size for UpdateWorkflowExecution requests (#10223) ## Summary - Emit the `EventBlobSize` for `UpdateWorkflowExecution` requess, tagged with `namespace`. --------- Co-authored-by: Claude Opus 4.7 (1M context) --- service/frontend/workflow_handler.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/service/frontend/workflow_handler.go b/service/frontend/workflow_handler.go index 114655ccf32..c6685de8a5c 100644 --- a/service/frontend/workflow_handler.go +++ b/service/frontend/workflow_handler.go @@ -785,6 +785,8 @@ func (wh *WorkflowHandler) ExecuteMultiOperation( return nil, errMultiOpNotStartAndUpdate } + metrics.EventBlobSize.With(wh.metricsScope(ctx)).Record(int64(request.Operations[1].GetUpdateWorkflow().GetRequest().GetInput().GetArgs().Size()), metrics.OperationTag("UpdateWorkflowExecution")) + historyReq, err := wh.convertToHistoryMultiOperationRequest(ctx, namespaceID, request) if err != nil { return nil, err @@ -5304,6 +5306,7 @@ func (wh *WorkflowHandler) UpdateWorkflowExecution( metricsHandler := wh.metricsScope(ctx).WithTags(metrics.HeaderCallsiteTag("UpdateWorkflowExecution")) metrics.HeaderSize.With(metricsHandler).Record(int64(request.GetRequest().GetInput().GetHeader().Size())) + metrics.EventBlobSize.With(metricsHandler).Record(int64(request.GetRequest().GetInput().GetArgs().Size()), metrics.OperationTag("UpdateWorkflowExecution")) switch request.WaitPolicy.LifecycleStage { // nolint:exhaustive case enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED: From 560f8b8db9e7832da42562b6de74305af621e5ae Mon Sep 17 00:00:00 2001 From: Kannan Date: Tue, 12 May 2026 17:12:51 -0700 Subject: [PATCH 16/73] Deduplicate poll cancellation RPCs by destination host (#10230) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What Deduplicates `CancelOutstandingWorkerPolls` RPCs by destination matching host during `ShutdownWorker`. Uses `Route()` on the matching client to determine which host each partition maps to, then sends only one RPC per unique host instead of one per partition. ## Why With N partitions across H matching hosts (H << N), the current code sends N RPCs per task type when H would suffice — the RPC cancels all pollers for the `workerInstanceKey` on the target host regardless of which partition was used for routing. ## How did you test it? Unit test 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 --- service/frontend/workflow_handler.go | 28 +++++++++- service/frontend/workflow_handler_test.go | 64 +++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/service/frontend/workflow_handler.go b/service/frontend/workflow_handler.go index c6685de8a5c..ae9c761fccf 100644 --- a/service/frontend/workflow_handler.go +++ b/service/frontend/workflow_handler.go @@ -41,6 +41,7 @@ import ( chasmscheduler "go.temporal.io/server/chasm/lib/scheduler" "go.temporal.io/server/chasm/lib/scheduler/gen/schedulerpb/v1" "go.temporal.io/server/client/frontend" + matchingclient "go.temporal.io/server/client/matching" "go.temporal.io/server/common" "go.temporal.io/server/common/archiver" "go.temporal.io/server/common/archiver/provider" @@ -3150,7 +3151,6 @@ func (wh *WorkflowHandler) cancelOutstandingWorkerPolls( } } - // The partition is only used for routing; the matching engine cancels all pollers for the workerInstanceKey. tqFamily, err := tqid.NewTaskQueueFamily(namespaceID, taskQueueName) if err != nil { wh.logger.Warn("Invalid task queue name for poll cancellation.", @@ -3159,6 +3159,15 @@ func (wh *WorkflowHandler) cancelOutstandingWorkerPolls( return } + // Deduplicate partitions by destination matching host. The partition is only used for + // routing; the matching engine cancels all pollers for the workerInstanceKey on that host + // regardless of partition. Sending one RPC per host instead of one per partition reduces + // RPCs from numPartitions*taskTypes to numHosts*taskTypes. + routingClient, ok := wh.matchingClient.(matchingclient.RoutingClient) + if !ok { + routingClient = nil + } + var waitGroup sync.WaitGroup var totalCancelled atomic.Int32 var failedPartitions atomic.Int32 @@ -3170,8 +3179,25 @@ func (wh *WorkflowHandler) cancelOutstandingWorkerPolls( } tq := tqFamily.TaskQueue(taskType) + // Skip partitions that route to an already-visited matching host. + seenHosts := make(map[string]bool) for partitionID := range numPartitions { partition := tq.NormalPartition(partitionID) + + if routingClient != nil { + host, err := routingClient.Route(partition) + if err != nil { + wh.logger.Warn("Failed to resolve matching host for poll cancellation dedup, sending RPC anyway.", + tag.WorkflowNamespaceID(namespaceID), + tag.WorkflowTaskQueueName(partition.RpcName()), + tag.Error(err)) + } else if seenHosts[host] { + continue + } else { + seenHosts[host] = true + } + } + waitGroup.Go(func() { resp, err := wh.matchingClient.CancelOutstandingWorkerPolls(ctx, &matchingservice.CancelOutstandingWorkerPollsRequest{ NamespaceId: namespaceID, diff --git a/service/frontend/workflow_handler_test.go b/service/frontend/workflow_handler_test.go index ccbb7c55d4c..5ed7bcba34b 100644 --- a/service/frontend/workflow_handler_test.go +++ b/service/frontend/workflow_handler_test.go @@ -63,6 +63,7 @@ import ( "go.temporal.io/server/common/tasktoken" "go.temporal.io/server/common/testing/protoassert" "go.temporal.io/server/common/testing/protorequire" + "go.temporal.io/server/common/tqid" "go.temporal.io/server/components/callbacks" "go.temporal.io/server/service/history/api" "go.temporal.io/server/service/history/tests" @@ -4340,6 +4341,58 @@ func (s *WorkflowHandlerSuite) TestShutdownWorkerWithEagerPollCancellation() { } } +func (s *WorkflowHandlerSuite) TestShutdownWorkerDeduplicatesByHost() { + // When multiple partitions route to the same matching host, only one RPC should be sent per host. + config := s.newConfig() + config.EnableCancelWorkerPollsOnShutdown = dc.GetBoolPropertyFnFilteredByNamespace(true) + config.NumTaskQueueReadPartitions = dc.GetIntPropertyFnFilteredByTaskQueue(4) // 4 partitions + wh := s.getWorkflowHandler(config) + ctx := context.Background() + + stickyTaskQueue := "sticky-task-queue" + taskQueue := "my-task-queue" + workerInstanceKey := "worker-instance-123" + + // Wrap the mock matching client with a Route() that maps partitions to 2 hosts: + // root (partition 0) and partition 1 -> host-a, partitions 2 and 3 -> host-b. + routingClient := &routingMatchingClient{ + MockMatchingServiceClient: s.mockMatchingClient, + routeFn: func(p tqid.Partition) (string, error) { + if strings.Contains(p.RpcName(), "/2") || strings.Contains(p.RpcName(), "/3") { + return "host-b", nil + } + return "host-a", nil + }, + } + wh.matchingClient = routingClient + + // 4 partitions across 2 hosts x 2 task types = 4 RPCs (not 8). + s.mockMatchingClient.EXPECT().CancelOutstandingWorkerPolls(gomock.Any(), gomock.Any()). + Return(&matchingservice.CancelOutstandingWorkerPollsResponse{CancelledCount: 1}, nil). + Times(4) + + s.mockNamespaceCache.EXPECT().GetNamespaceID(gomock.Eq(s.testNamespace)).Return(s.testNamespaceID, nil).AnyTimes() + + expectedForceUnloadRequest := &matchingservice.ForceUnloadTaskQueuePartitionRequest{ + NamespaceId: s.testNamespaceID.String(), + TaskQueuePartition: &taskqueuespb.TaskQueuePartition{ + TaskQueue: stickyTaskQueue, + TaskQueueType: enumspb.TASK_QUEUE_TYPE_WORKFLOW, + }, + } + s.mockMatchingClient.EXPECT().ForceUnloadTaskQueuePartition(gomock.Any(), gomock.Eq(expectedForceUnloadRequest)).Return(&matchingservice.ForceUnloadTaskQueuePartitionResponse{}, nil) + + _, err := wh.ShutdownWorker(ctx, &workflowservice.ShutdownWorkerRequest{ + Namespace: s.testNamespace.String(), + StickyTaskQueue: stickyTaskQueue, + Identity: "worker", + Reason: "graceful shutdown", + WorkerInstanceKey: workerInstanceKey, + TaskQueue: taskQueue, + }) + s.NoError(err) +} + func (s *WorkflowHandlerSuite) TestShutdownWorkerWithCancellationError() { // Verifies graceful degradation: ShutdownWorker succeeds even when poll cancellation fails. // This ensures backward compatibility during rolling upgrades. @@ -4726,3 +4779,14 @@ func (s *WorkflowHandlerSuite) TestUpdateActivityOptions_Priority() { s.ErrorContains(err, "priority key can't be negative") // NOTE: only testing a single validation scenario here; the priority validation has its own unit tests } + +// routingMatchingClient wraps a mock MatchingServiceClient to also implement matching.RoutingClient, +// allowing tests to verify host-based deduplication in cancelOutstandingWorkerPolls. +type routingMatchingClient struct { + *matchingservicemock.MockMatchingServiceClient + routeFn func(p tqid.Partition) (string, error) +} + +func (r *routingMatchingClient) Route(p tqid.Partition) (string, error) { + return r.routeFn(p) +} From 81535bfb50f3d7f3885706c709bdfa46e8feb29c Mon Sep 17 00:00:00 2001 From: Kannan Date: Tue, 12 May 2026 17:14:09 -0700 Subject: [PATCH 17/73] Clear StartedClock on activity retry (#10232) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What Clear `StartedClock` on activity retry/pause. To do this, refactored the code that clears per attempt field into a single `ClearActivityStartedState` helper, and updated all code paths. ## Why `StartedClock` is a per-attempt field introduced in #9233 to reconstruct task tokens for cancel worker commands. It was not being cleared when the activity leaves the started state (retry or pause), leaving a stale value during backoff. This can cause cancel commands to be unnecessarily dispatched for activities not currently running on any worker. ## How did you test it? - Unit tests 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 --- .../api/recordactivitytaskstarted/api.go | 5 +- .../workflow_task_completed_handler.go | 5 +- .../timer_queue_active_task_executor.go | 5 +- service/history/workflow/activity.go | 16 +++++-- .../history/workflow/mutable_state_impl.go | 5 +- ...utable_state_impl_restart_activity_test.go | 47 +++++++++++++++++++ 6 files changed, 65 insertions(+), 18 deletions(-) diff --git a/service/history/api/recordactivitytaskstarted/api.go b/service/history/api/recordactivitytaskstarted/api.go index 74eb393b030..25ae4803254 100644 --- a/service/history/api/recordactivitytaskstarted/api.go +++ b/service/history/api/recordactivitytaskstarted/api.go @@ -388,10 +388,7 @@ func processActivityWorkflowRules( // activity was paused, need to update activity if err := ms.UpdateActivity(ai.ScheduledEventId, func(activityInfo *persistencespb.ActivityInfo, _ historyi.MutableState) error { - activityInfo.StartedEventId = common.EmptyEventID - activityInfo.StartVersion = common.EmptyVersion - activityInfo.StartedTime = nil - activityInfo.RequestId = "" + workflow.ClearActivityStartedState(activityInfo) return nil }); err != nil { return rejectCodeUndefined, err diff --git a/service/history/api/respondworkflowtaskcompleted/workflow_task_completed_handler.go b/service/history/api/respondworkflowtaskcompleted/workflow_task_completed_handler.go index abf58eb89ff..6bd0d0222ad 100644 --- a/service/history/api/respondworkflowtaskcompleted/workflow_task_completed_handler.go +++ b/service/history/api/respondworkflowtaskcompleted/workflow_task_completed_handler.go @@ -698,9 +698,10 @@ func (handler *workflowTaskCompletedHandler) handleCommandRequestCancelActivity( handler.activityNotStartedCancelled = true } else if ai.WorkerControlTaskQueue != "" { if ai.StartedClock == nil { - // StartedClock may be nil for activities started before this feature was deployed. + // StartedClock is nil when the activity is not currently running on a worker + // (e.g., in retry backoff, or started before this feature was deployed). // Skip cancel command; the activity will time out normally. - handler.logger.Info("Skipping worker cancel command: activity missing StartedClock (pre-deploy)", + handler.logger.Info("Skipping worker cancel command: activity not currently started", tag.WorkflowNamespaceID(handler.mutableState.GetWorkflowKey().NamespaceID), tag.WorkflowID(handler.mutableState.GetWorkflowKey().WorkflowID), tag.WorkflowRunID(handler.mutableState.GetWorkflowKey().RunID), diff --git a/service/history/timer_queue_active_task_executor.go b/service/history/timer_queue_active_task_executor.go index bca92e09fb3..29cedf9a92f 100644 --- a/service/history/timer_queue_active_task_executor.go +++ b/service/history/timer_queue_active_task_executor.go @@ -1026,10 +1026,7 @@ func (t *timerQueueActiveTaskExecutor) processActivityWorkflowRules( if ai.Paused { // need to update activity if err := ms.UpdateActivity(ai.ScheduledEventId, func(activityInfo *persistencespb.ActivityInfo, _ historyi.MutableState) error { - activityInfo.StartedEventId = common.EmptyEventID - activityInfo.StartVersion = common.EmptyVersion - activityInfo.StartedTime = nil - activityInfo.RequestId = "" + workflow.ClearActivityStartedState(activityInfo) return nil }); err != nil { return err diff --git a/service/history/workflow/activity.go b/service/history/workflow/activity.go index 8054f4c02ba..e4c77661b1f 100644 --- a/service/history/workflow/activity.go +++ b/service/history/workflow/activity.go @@ -60,6 +60,17 @@ func GetActivityState(ai *persistencespb.ActivityInfo) enumspb.PendingActivitySt return enumspb.PENDING_ACTIVITY_STATE_SCHEDULED } +// ClearActivityStartedState resets the per-attempt "started" fields on an ActivityInfo. +// Called when an activity leaves the started state (retry, pause, etc.) so that stale +// values from the previous attempt don't leak into the next one. +func ClearActivityStartedState(ai *persistencespb.ActivityInfo) { + ai.StartedEventId = common.EmptyEventID + ai.StartVersion = common.EmptyVersion + ai.RequestId = "" + ai.StartedTime = nil + ai.StartedClock = nil +} + func UpdateActivityInfoForRetries( ai *persistencespb.ActivityInfo, version int64, @@ -72,10 +83,7 @@ func UpdateActivityInfoForRetries( ai.Attempt = attempt ai.Version = version ai.ScheduledTime = nextScheduledTime - ai.StartedEventId = common.EmptyEventID - ai.StartVersion = common.EmptyVersion - ai.RequestId = "" - ai.StartedTime = nil + ClearActivityStartedState(ai) // Mark per-attempt timers for recreation. ai.TimerTaskStatus &^= TimerTaskStatusCreatedHeartbeat | TimerTaskStatusCreatedStartToClose | TimerTaskStatusCreatedScheduleToStart ai.RetryLastWorkerIdentity = ai.StartedIdentity diff --git a/service/history/workflow/mutable_state_impl.go b/service/history/workflow/mutable_state_impl.go index 862894e92b3..4339f4d39f3 100644 --- a/service/history/workflow/mutable_state_impl.go +++ b/service/history/workflow/mutable_state_impl.go @@ -6431,10 +6431,7 @@ func (ms *MutableStateImpl) RetryActivity( if ai.Paused { // need to update activity if err := ms.UpdateActivity(ai.ScheduledEventId, func(activityInfo *persistencespb.ActivityInfo, _ historyi.MutableState) error { - activityInfo.StartedEventId = common.EmptyEventID - activityInfo.StartVersion = common.EmptyVersion - activityInfo.StartedTime = nil - activityInfo.RequestId = "" + ClearActivityStartedState(activityInfo) activityInfo.RetryLastFailure = ms.truncateRetryableActivityFailure(activityFailure) activityInfo.Attempt++ if ms.config.EnableActivityRetryStampIncrement() { diff --git a/service/history/workflow/mutable_state_impl_restart_activity_test.go b/service/history/workflow/mutable_state_impl_restart_activity_test.go index 3cb91b7fbd3..3873a5b9f23 100644 --- a/service/history/workflow/mutable_state_impl_restart_activity_test.go +++ b/service/history/workflow/mutable_state_impl_restart_activity_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/google/uuid" + "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/uber-go/tally/v4" commandpb "go.temporal.io/api/command/v1" @@ -14,8 +15,10 @@ import ( enumspb "go.temporal.io/api/enums/v1" failurepb "go.temporal.io/api/failure/v1" taskqueuepb "go.temporal.io/api/taskqueue/v1" + clockspb "go.temporal.io/server/api/clock/v1" enumsspb "go.temporal.io/server/api/enums/v1" persistencespb "go.temporal.io/server/api/persistence/v1" + "go.temporal.io/server/common" "go.temporal.io/server/common/backoff" commonclock "go.temporal.io/server/common/clock" "go.temporal.io/server/common/log" @@ -55,6 +58,32 @@ type ( } ) +func TestClearActivityStartedState(t *testing.T) { + ai := &persistencespb.ActivityInfo{ + StartedEventId: 42, + StartVersion: 10, + RequestId: "req-1", + StartedTime: timestamppb.Now(), + StartedClock: &clockspb.VectorClock{ClusterId: 1, ShardId: 1, Clock: 99}, + // Fields that should NOT be cleared. + ScheduledEventId: 7, + ActivityId: "activity-1", + Attempt: 3, + } + + ClearActivityStartedState(ai) + + require.Equal(t, common.EmptyEventID, ai.StartedEventId) + require.Equal(t, common.EmptyVersion, ai.StartVersion) + require.Empty(t, ai.RequestId) + require.Nil(t, ai.StartedTime) + require.Nil(t, ai.StartedClock) + // Verify non-started fields are untouched. + require.Equal(t, int64(7), ai.ScheduledEventId) + require.Equal(t, "activity-1", ai.ActivityId) + require.Equal(t, int32(3), ai.Attempt) +} + func TestMutableStateRetryActivitySuite(t *testing.T) { s := new(retryActivitySuite) @@ -165,6 +194,24 @@ func (s *retryActivitySuite) TestRetryActivity_should_be_scheduled_when_next_bac s.assertTruncateFailureCalled() } +func (s *retryActivitySuite) TestRetryActivity_should_clear_per_attempt_fields() { + s.mutableState.timeSource = s.timeSource + taskGeneratorMock := NewMockTaskGenerator(s.controller) + taskGeneratorMock.EXPECT().GenerateActivityRetryTasks(s.activity) + s.mutableState.taskGenerator = taskGeneratorMock + + // Set per-attempt fields that should be cleared on retry. + s.activity.StartedClock = &clockspb.VectorClock{ClusterId: 1, ShardId: 1, Clock: 42} + s.activity.StartedTime = timestamppb.Now() + + _, err := s.mutableState.RetryActivity(s.activity, s.failure) + s.Require().NoError(err) + + s.Nil(s.activity.StartedClock, "StartedClock should be cleared on retry") + s.Nil(s.activity.StartedTime, "StartedTime should be cleared on retry") + s.Equal(common.EmptyEventID, s.activity.StartedEventId, "StartedEventId should be reset to EmptyEventID") +} + // TestRetryActivity_should_be_scheduled_when_next_retry_delay_is_set asserts that the activity is retried after NextRetryDelay period specified in the application failure. func (s *retryActivitySuite) TestRetryActivity_should_be_scheduled_when_next_retry_delay_is_set() { s.mutableState.timeSource = s.timeSource From 4dfb596a62537c051dfeddd5c427ad24cd85d010 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Wed, 13 May 2026 09:22:35 -0700 Subject: [PATCH 18/73] Use EventuallyWithT in versioning tests (#10160) ## What changed? Replaces (almost) all use of `s.Eventually` with `s.EventuallyWithT`. ## Why? Assertions are often used inside `s.Eventually` here and that's not safe as it aborts the test immediately. --- tests/versioning_3_test.go | 320 ++++++++++++++++--------------------- tests/versioning_test.go | 97 ++++++----- 2 files changed, 189 insertions(+), 228 deletions(-) diff --git a/tests/versioning_3_test.go b/tests/versioning_3_test.go index e7793c8f6cb..d17d53877df 100644 --- a/tests/versioning_3_test.go +++ b/tests/versioning_3_test.go @@ -254,7 +254,7 @@ func (s *Versioning3Suite) TestSessionActivityResourceSpecificTaskQueueNotRegist Namespace: s.Namespace().String(), Version: tv.DeploymentVersionString(), }) - s.Require().NoError(err) + s.NoError(err) totalActTQ := 0 for _, tq := range resp.GetVersionTaskQueues() { @@ -862,7 +862,7 @@ func (s *Versioning3Suite) TestUnpinnedWorkflow_SuccessfulUpdate_TransitionsToNe 4 WorkflowTaskCompleted 5 WorkflowTaskScheduled // Was speculative WT... 6 WorkflowTaskStarted -7 WorkflowTaskCompleted // ...and events were written to the history when WT completes. +7 WorkflowTaskCompleted // ...and events were written to the history when WT completes. 8 WorkflowExecutionUpdateAccepted {"AcceptedRequestSequencingEventId": 5} // WTScheduled event which delivered update to the worker. 9 WorkflowExecutionUpdateCompleted {"AcceptedEventId": 8} `, events) @@ -2378,13 +2378,13 @@ func (s *Versioning3Suite) testPinnedCaNUpgradeOnCaN(normalTask, speculativeTask if i < eventsBeforeDeploymentChange { // Events before deployment change should NOT have the flag s.False(attr.GetSuggestContinueAsNew()) - s.Require().Empty(attr.GetSuggestContinueAsNewReasons()) + s.Empty(attr.GetSuggestContinueAsNewReasons()) s.False(attr.GetTargetWorkerDeploymentVersionChanged(), "Event %d should not have flag (before deployment change)", event.GetEventId()) } else { // Events after deployment change SHOULD have the flag (including failed attempts and transient retries) s.False(attr.GetSuggestContinueAsNew()) - s.Require().Empty(attr.GetSuggestContinueAsNewReasons()) + s.Empty(attr.GetSuggestContinueAsNewReasons()) s.True(attr.GetTargetWorkerDeploymentVersionChanged(), "Event %d should have flag (after deployment change)", event.GetEventId()) } @@ -2393,7 +2393,7 @@ func (s *Versioning3Suite) testPinnedCaNUpgradeOnCaN(normalTask, speculativeTask for _, event := range wfTaskStartedEvents { attr := event.GetWorkflowTaskStartedEventAttributes() s.False(attr.GetSuggestContinueAsNew()) - s.Require().Empty(attr.GetSuggestContinueAsNewReasons()) + s.Empty(attr.GetSuggestContinueAsNewReasons()) s.False(attr.GetTargetWorkerDeploymentVersionChanged()) } } @@ -2540,7 +2540,7 @@ func (s *Versioning3Suite) testPinnedCaNUseRampingVersionOnCaN(pinnedOverride, n if event.GetEventType() == enumspb.EVENT_TYPE_WORKFLOW_TASK_STARTED { attr := event.GetWorkflowTaskStartedEventAttributes() s.False(attr.GetSuggestContinueAsNew()) - s.Require().Empty(attr.GetSuggestContinueAsNewReasons()) + s.Empty(attr.GetSuggestContinueAsNewReasons()) // Setting a ramping version does not change the target for a Pinned workflow: // the target is the pinned version, not current or ramping. s.False(attr.GetTargetWorkerDeploymentVersionChanged(), @@ -2949,7 +2949,7 @@ func (s *Versioning3Suite) verifyTransientTask(task *workflowservice.PollWorkflo lastScheduledEvent = event } } - s.Require().NotNil(lastScheduledEvent) + s.NotNil(lastScheduledEvent) s.Equal(int32(2), lastScheduledEvent.GetWorkflowTaskScheduledEventAttributes().GetAttempt()) } @@ -3001,12 +3001,12 @@ func (s *Versioning3Suite) TestAutoUpgradeCaN_UpgradeOnCaN() { wfTaskStartedEvents = append(wfTaskStartedEvents, event) } } - s.Require().Len(wfTaskStartedEvents, 2) // make sure we are actually verifying non-zero # of events + s.Len(wfTaskStartedEvents, 2) // make sure we are actually verifying non-zero # of events for _, event := range wfTaskStartedEvents { attr := event.GetWorkflowTaskStartedEventAttributes() s.False(attr.GetSuggestContinueAsNew()) - s.Require().Empty(attr.GetSuggestContinueAsNewReasons()) + s.Empty(attr.GetSuggestContinueAsNewReasons()) } // For AutoUpgrade, I want to test that once the workflow has transitioned to v2, it doesn't get the CaN suggestion anymore. @@ -3037,7 +3037,7 @@ func (s *Versioning3Suite) TestAutoUpgradeCaN_UpgradeOnCaN() { for _, event := range wfTaskStartedEvents { attr := event.GetWorkflowTaskStartedEventAttributes() s.False(attr.GetSuggestContinueAsNew()) - s.Require().Empty(attr.GetSuggestContinueAsNewReasons()) + s.Empty(attr.GetSuggestContinueAsNewReasons()) } return &workflowservice.RespondWorkflowTaskCompletedRequest{ @@ -3507,20 +3507,23 @@ func (s *Versioning3Suite) TestSyncDeploymentUserDataWithRoutingConfig_Update() func (s *Versioning3Suite) setCurrentDeployment(tv *testvars.TestVars) { ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) defer cancel() - s.Eventually(func() bool { + + var unexpectedErr error + s.EventuallyWithT(func(t *assert.CollectT) { req := &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ Namespace: s.Namespace().String(), DeploymentName: tv.DeploymentSeries(), } req.BuildId = tv.BuildID() _, err := s.FrontendClient().SetWorkerDeploymentCurrentVersion(ctx, req) - var notFound *serviceerror.NotFound - if errors.As(err, ¬Found) || (err != nil && strings.Contains(err.Error(), serviceerror.NewFailedPreconditionf(workerdeployment.ErrCurrentVersionDoesNotHaveAllTaskQueues, tv.DeploymentVersionStringV32()).Error())) { - return false + if _, ok := errors.AsType[*serviceerror.NotFound](err); ok || (err != nil && strings.Contains(err.Error(), serviceerror.NewFailedPreconditionf(workerdeployment.ErrCurrentVersionDoesNotHaveAllTaskQueues, tv.DeploymentVersionStringV32()).Error())) { + require.NoError(t, err) + } + if err != nil { + unexpectedErr = err } - s.NoError(err) - return err == nil }, 60*time.Second, 500*time.Millisecond) + s.NoError(unexpectedErr) // Wait for propagation to complete since we have tests using async entity workflows to set the current version s.waitForDeploymentDataPropagationQueryWorkerDeployment(tv) @@ -3551,51 +3554,59 @@ func (s *Versioning3Suite) pollUntilRegistered(ctx context.Context, tv *testvars } }() } + // Wait until the version is visible and all requested task queue types are registered. - s.Eventually(func() bool { + var unexpectedErr error + s.EventuallyWithT(func(t *assert.CollectT) { resp, err := s.FrontendClient().DescribeWorkerDeploymentVersion(ctx, &workflowservice.DescribeWorkerDeploymentVersionRequest{ Namespace: s.Namespace().String(), Version: tv.DeploymentVersionString(), }) - var notFound *serviceerror.NotFound - if errors.As(err, ¬Found) { - return false + if _, ok := errors.AsType[*serviceerror.NotFound](err); ok { + require.NoError(t, err) } - s.NoError(err) - tqName := tv.TaskQueue().GetName() - for _, tqType := range tqTypes { - found := false - for _, tq := range resp.GetVersionTaskQueues() { - if tq.GetName() == tqName && tq.GetType() == tqType { - found = true - break + if err != nil { + unexpectedErr = err + } else { + tqName := tv.TaskQueue().GetName() + for _, tqType := range tqTypes { + found := false + for _, tq := range resp.GetVersionTaskQueues() { + if tq.GetName() == tqName && tq.GetType() == tqType { + found = true + break + } + } + if !found { + require.True(t, found) + return } - } - if !found { - return false } } - return true }, 30*time.Second, 100*time.Millisecond) + s.NoError(unexpectedErr) cancel() } func (s *Versioning3Suite) unsetCurrentDeployment(tv *testvars.TestVars) { ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) defer cancel() - s.Eventually(func() bool { + + var unexpectedErr error + s.EventuallyWithT(func(t *assert.CollectT) { req := &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ Namespace: s.Namespace().String(), DeploymentName: tv.DeploymentSeries(), } _, err := s.FrontendClient().SetWorkerDeploymentCurrentVersion(ctx, req) - var notFound *serviceerror.NotFound - if errors.As(err, ¬Found) { - return false + if _, ok := errors.AsType[*serviceerror.NotFound](err); ok { + require.NoError(t, err) + } + if err != nil { + unexpectedErr = err } - s.NoError(err) - return err == nil }, 60*time.Second, 500*time.Millisecond) + s.NoError(unexpectedErr) // Wait for propagation to complete since we have tests using async entity workflows to set the current version s.waitForDeploymentDataPropagationQueryWorkerDeployment(tv) @@ -3613,7 +3624,8 @@ func (s *Versioning3Suite) setRampingDeployment( bid = "" } - s.Eventually(func() bool { + var unexpectedErr error + s.EventuallyWithT(func(t *assert.CollectT) { req := &workflowservice.SetWorkerDeploymentRampingVersionRequest{ Namespace: s.Namespace().String(), DeploymentName: tv.DeploymentSeries(), @@ -3621,13 +3633,14 @@ func (s *Versioning3Suite) setRampingDeployment( } req.BuildId = bid _, err := s.FrontendClient().SetWorkerDeploymentRampingVersion(ctx, req) - var notFound *serviceerror.NotFound - if errors.As(err, ¬Found) || (err != nil && strings.Contains(err.Error(), serviceerror.NewFailedPreconditionf(workerdeployment.ErrRampingVersionDoesNotHaveAllTaskQueues, tv.DeploymentVersionStringV32()).Error())) { - return false + if _, ok := errors.AsType[*serviceerror.NotFound](err); ok || (err != nil && strings.Contains(err.Error(), serviceerror.NewFailedPreconditionf(workerdeployment.ErrRampingVersionDoesNotHaveAllTaskQueues, tv.DeploymentVersionStringV32()).Error())) { + require.NoError(t, err) + } + if err != nil { + unexpectedErr = err } - s.NoError(err) - return err == nil }, 60*time.Second, 500*time.Millisecond) + s.NoError(unexpectedErr) // Wait for propagation to complete since we have tests using async entity workflows to set the current version s.waitForDeploymentDataPropagationQueryWorkerDeployment(tv) @@ -3635,13 +3648,13 @@ func (s *Versioning3Suite) setRampingDeployment( func (s *Versioning3Suite) waitForDeploymentDataPropagationQueryWorkerDeployment(tv *testvars.TestVars) { if s.deploymentWorkflowVersion == workerdeployment.AsyncSetCurrentAndRamping { - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { resp, err := s.FrontendClient().DescribeWorkerDeployment(context.Background(), &workflowservice.DescribeWorkerDeploymentRequest{ Namespace: s.Namespace().String(), DeploymentName: tv.DeploymentSeries(), }) - s.NoError(err) - return resp.GetWorkerDeploymentInfo().GetRoutingConfigUpdateState() == enumspb.ROUTING_CONFIG_UPDATE_STATE_COMPLETED + require.NoError(t, err) + require.Equal(t, enumspb.ROUTING_CONFIG_UPDATE_STATE_COMPLETED, resp.GetWorkerDeploymentInfo().GetRoutingConfigUpdateState()) }, 10*time.Second, 100*time.Millisecond) } } @@ -3764,15 +3777,16 @@ func (s *Versioning3Suite) rollbackTaskQueueToVersion( }}, nil, tqTypeWf) // Verify that the rollback propagated to all partitions - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { ms, err := s.GetTestCluster().MatchingClient().GetTaskQueueUserData(context.Background(), &matchingservice.GetTaskQueueUserDataRequest{ NamespaceId: s.NamespaceID().String(), TaskQueue: tv.TaskQueue().GetName(), TaskQueueType: tqTypeWf, }) - s.NoError(err) + require.NoError(t, err) current, currentRevisionNumber, _, _, _, _, _, _ := worker_versioning.CalculateTaskQueueVersioningInfo(ms.GetUserData().GetData().GetPerType()[int32(tqTypeWf)].GetDeploymentData()) - return current.GetBuildId() == tv.DeploymentVersion().GetBuildId() && currentRevisionNumber == 0 + require.Equal(t, tv.DeploymentVersion().GetBuildId(), current.GetBuildId()) + require.Equal(t, int64(0), currentRevisionNumber) }, 10*time.Second, 100*time.Millisecond) } @@ -4868,21 +4882,14 @@ func (s *Versioning3Suite) TestChildStartsWithParentRevision_SameTQ_TQAhead() { s.NoError(s.SdkClient().SignalWorkflow(ctx, run.GetID(), run.GetRunID(), "startChild", nil)) // Verify that the child started and completed on the v2 worker. - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, tvChild.WorkflowID(), "") - if err != nil { - return false - } + require.NoError(t, err) execInfo := desc.GetWorkflowExecutionInfo() // Verify that the child workflow started and completed on the v1 worker - if execInfo.GetVersioningInfo().GetDeploymentVersion().GetBuildId() != tv2.BuildID() { - return false - } - if execInfo.GetStatus() != enumspb.WORKFLOW_EXECUTION_STATUS_COMPLETED { - return false - } - return true + require.Equal(t, tv2.BuildID(), execInfo.GetVersioningInfo().GetDeploymentVersion().GetBuildId()) + require.Equal(t, enumspb.WORKFLOW_EXECUTION_STATUS_COMPLETED, execInfo.GetStatus()) }, 10*time.Second, 100*time.Millisecond) // Verify that the parent workflow completed successfully. This shall only be possible if the child workflow @@ -4980,12 +4987,10 @@ func (s *Versioning3Suite) TestChildStartsWithParentRevision_SameTQ_TQLags() { s.NoError(err) // Verify that the parent workflow has started on the v1 worker - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, tvParent.WorkflowID(), "") - if err != nil { - return false - } - return desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId() == tvParent.BuildID() + require.NoError(t, err) + require.Equal(t, tvParent.BuildID(), desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId()) }, 10*time.Second, 100*time.Millisecond) // Roll back the child TQ routing-config revision to simulate Routing Config lag in matching partitions (set v0 as current with older revision) @@ -5001,21 +5006,14 @@ func (s *Versioning3Suite) TestChildStartsWithParentRevision_SameTQ_TQLags() { s.NoError(s.SdkClient().SignalWorkflow(ctx, run.GetID(), run.GetRunID(), "startChild", nil)) // Verify that the child started and completed on the v1 worker. - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, tvChild.WorkflowID(), "") - if err != nil { - return false - } + require.NoError(t, err) execInfo := desc.GetWorkflowExecutionInfo() // Verify that the child workflow started and completed on the v1 worker - if execInfo.GetVersioningInfo().GetDeploymentVersion().GetBuildId() != tvChild.BuildID() { - return false - } - if execInfo.GetStatus() != enumspb.WORKFLOW_EXECUTION_STATUS_COMPLETED { - return false - } - return true + require.Equal(t, tvChild.BuildID(), execInfo.GetVersioningInfo().GetDeploymentVersion().GetBuildId()) + require.Equal(t, enumspb.WORKFLOW_EXECUTION_STATUS_COMPLETED, execInfo.GetStatus()) }, 10*time.Second, 100*time.Millisecond) // Verify that the parent workflow completed successfully. This shall only be possible if the child workflow @@ -5088,21 +5086,13 @@ func (s *Versioning3Suite) TestChildStartsWithNoInheritedAutoUpgradeInfo_CrossTQ s.NoError(s.SdkClient().SignalWorkflow(ctx, run.GetID(), run.GetRunID(), "startChild", nil)) // Verify that the child workflow started and completed on it's worker, which shall verify that no AutoUpgradeInheritedInfo was inherited. - s.Eventually(func() bool { - s.Eventually(func() bool { - desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, tvChild.WorkflowID(), "") - if err != nil { - return false - } - execInfo := desc.GetWorkflowExecutionInfo() + s.EventuallyWithT(func(t *assert.CollectT) { + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, tvChild.WorkflowID(), "") + require.NoError(t, err) + execInfo := desc.GetWorkflowExecutionInfo() - // Verify that the new run starts on the v1 worker - if execInfo.GetVersioningInfo().GetDeploymentVersion().GetBuildId() != tvChild.BuildID() { - return false - } - return true - }, 10*time.Second, 100*time.Millisecond) - return true + // Verify that the new run starts on the v1 worker + require.Equal(t, tvChild.BuildID(), execInfo.GetVersioningInfo().GetDeploymentVersion().GetBuildId()) }, 10*time.Second, 100*time.Millisecond) // Verify that the parent workflow completed successfully. @@ -5149,12 +5139,10 @@ func (s *Versioning3Suite) TestContinueAsNewOfAutoUpgradeWorkflow_RevisionNumber s.NoError(err) // Ensure the workflow has started on the v1 worker - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, tv1.WorkflowID(), "") - if err != nil { - return false - } - return desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId() == tv1.BuildID() + require.NoError(t, err) + require.Equal(t, tv1.BuildID(), desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId()) }, 10*time.Second, 100*time.Millisecond) // Rollback the TaskQueueUserData to simulate task queue partition lag @@ -5170,27 +5158,18 @@ func (s *Versioning3Suite) TestContinueAsNewOfAutoUpgradeWorkflow_RevisionNumber // Wait for the new run to start var newRunID string - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, tv1.WorkflowID(), "") - if err != nil { - return false - } + require.NoError(t, err) execInfo := desc.GetWorkflowExecutionInfo() // Verify that the workflow truly CAN'ed and started a new run - if execInfo.GetStatus() == enumspb.WORKFLOW_EXECUTION_STATUS_COMPLETED { - newRunID = execInfo.GetExecution().GetRunId() - if newRunID == run.GetRunID() { - return false - } - } + require.Equal(t, enumspb.WORKFLOW_EXECUTION_STATUS_COMPLETED, execInfo.GetStatus()) + newRunID = execInfo.GetExecution().GetRunId() + require.NotEqual(t, run.GetRunID(), newRunID) // Verify that the new run starts on the v1 worker - if execInfo.GetVersioningInfo().GetDeploymentVersion().GetBuildId() != tv1.BuildID() { - return false - } - - return true + require.Equal(t, tv1.BuildID(), execInfo.GetVersioningInfo().GetDeploymentVersion().GetBuildId()) }, 10*time.Second, 100*time.Millisecond) // Verify that the workflow completed successfully on v1 @@ -5331,13 +5310,11 @@ func (s *Versioning3Suite) testRetryNoBounceBack(testContinueAsNew bool, testChi s.NoError(err) // Wait for child workflow to be created and executing on v1 - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, childWorkflowID, "") - if err != nil { - return false - } + require.NoError(t, err) runIDBeforeRetry = desc.GetWorkflowExecutionInfo().GetExecution().GetRunId() - return desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId() == tv1.BuildID() + require.Equal(t, tv1.BuildID(), desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId()) }, 10*time.Second, 100*time.Millisecond) wfID = childWorkflowID @@ -5360,29 +5337,21 @@ func (s *Versioning3Suite) testRetryNoBounceBack(testContinueAsNew bool, testChi if testContinueAsNew { // Wait for ContinueAsNew to happen - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, wfID, "") - s.NoError(err) - if err != nil { - return false - } + require.NoError(t, err) // After CAN, the run ID changes and we should see execution on v1 - if desc.GetWorkflowExecutionInfo().GetExecution().GetRunId() != run0.GetRunID() { - runIDBeforeRetry = desc.GetWorkflowExecutionInfo().GetExecution().GetRunId() - return desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId() == tv1.BuildID() - } - return false + runIDBeforeRetry = desc.GetWorkflowExecutionInfo().GetExecution().GetRunId() + require.NotEqual(t, run0.GetRunID(), runIDBeforeRetry) + require.Equal(t, tv1.BuildID(), desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId()) }, 10*time.Second, 100*time.Millisecond) } else { runIDBeforeRetry = run0.GetRunID() // Ensure initial run is executing on v1. - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, wfID, runIDBeforeRetry) - s.NoError(err) - if err != nil { - return false - } - return desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId() == tv1.BuildID() + require.NoError(t, err) + require.Equal(t, tv1.BuildID(), desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId()) }, 10*time.Second, 100*time.Millisecond) } } @@ -5396,44 +5365,37 @@ func (s *Versioning3Suite) testRetryNoBounceBack(testContinueAsNew bool, testChi go s.idlePollWorkflow(idlePollerCtx, tv0, true, 10*time.Second, "v0 poller should not receive a task") // Verify that the rollback propagated to all partitions - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { ms, err := s.GetTestCluster().MatchingClient().GetTaskQueueUserData(context.Background(), &matchingservice.GetTaskQueueUserDataRequest{ NamespaceId: s.NamespaceID().String(), TaskQueue: tv0.TaskQueue().GetName(), TaskQueueType: tqTypeWf, }) - s.NoError(err) + require.NoError(t, err) current, currentRevisionNumber, _, _, _, _, _, _ := worker_versioning.CalculateTaskQueueVersioningInfo(ms.GetUserData().GetData().GetPerType()[int32(tqTypeWf)].GetDeploymentData()) - return current.GetBuildId() == tv0.DeploymentVersion().GetBuildId() && currentRevisionNumber == 0 + require.Equal(t, tv0.DeploymentVersion().GetBuildId(), current.GetBuildId()) + require.Equal(t, int64(0), currentRevisionNumber) }, 10*time.Second, 100*time.Millisecond) // Trigger failure of the run to cause retry. s.NoError(s.SdkClient().SignalWorkflow(ctx, wfID, runIDBeforeRetry, "proceed", nil)) // Wait for run to fail. - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflow(ctx, wfID, runIDBeforeRetry) - s.NoError(err) - if err != nil { - return false - } - return desc.Status == enumspb.WORKFLOW_EXECUTION_STATUS_FAILED + require.NoError(t, err) + require.Equal(t, enumspb.WORKFLOW_EXECUTION_STATUS_FAILED, desc.Status) }, 10*time.Second, 100*time.Millisecond) // Verify that retry run is still on v1 (didn't bounce back to v0) - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, wfID, "") - s.NoError(err) - if err != nil { - return false - } + require.NoError(t, err) // After retry, there should be a new run - if desc.GetWorkflowExecutionInfo().GetExecution().GetRunId() != runIDBeforeRetry { - // Verify workflow (parent or child) is still on v1 - return desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId() == tv1.BuildID() - } - return false + require.NotEqual(t, runIDBeforeRetry, desc.GetWorkflowExecutionInfo().GetExecution().GetRunId()) + // Verify workflow (parent or child) is still on v1 + require.Equal(t, tv1.BuildID(), desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId()) }, 10*time.Second, 100*time.Millisecond) } @@ -5455,15 +5417,15 @@ func (s *Versioning3Suite) TestCheckTaskQueueVersionMembership() { tv1 := testvars.New(s).WithBuildIDNumber(1) // No version exists in the task queue's userData as of now - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { resp, err := s.GetTestCluster().MatchingClient().CheckTaskQueueVersionMembership(context.Background(), &matchingservice.CheckTaskQueueVersionMembershipRequest{ NamespaceId: s.NamespaceID().String(), TaskQueue: tv1.TaskQueue().GetName(), TaskQueueType: tqTypeWf, Version: worker_versioning.DeploymentVersionFromDeployment(tv1.Deployment()), }) - s.NoError(err) - return !resp.GetIsMember() // the check should pass if no version is present + require.NoError(t, err) + require.False(t, resp.GetIsMember()) // the check should pass if no version is present }, 10*time.Second, 100*time.Millisecond) // Start v1 worker which shall register the version in the task queue @@ -5477,15 +5439,15 @@ func (s *Versioning3Suite) TestCheckTaskQueueVersionMembership() { defer w1.Stop() // The version should eventually show up in the task queue's user data - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { resp, err := s.GetTestCluster().MatchingClient().CheckTaskQueueVersionMembership(context.Background(), &matchingservice.CheckTaskQueueVersionMembershipRequest{ NamespaceId: s.NamespaceID().String(), TaskQueue: tv1.TaskQueue().GetName(), TaskQueueType: tqTypeWf, Version: worker_versioning.DeploymentVersionFromDeployment(tv1.Deployment()), }) - s.NoError(err) - return resp.GetIsMember() + require.NoError(t, err) + require.True(t, resp.GetIsMember()) }, 10*time.Second, 100*time.Millisecond) } @@ -5667,12 +5629,10 @@ func (s *Versioning3Suite) TestActivityRetryAutoUpgradeDuringBackoff() { s.NoError(err) // Wait for the workflow to start on v1 - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, run.GetID(), "") - if err != nil { - return false - } - return desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId() == tv1.BuildID() + require.NoError(t, err) + require.Equal(t, tv1.BuildID(), desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId()) }, 10*time.Second, 100*time.Millisecond) // Wait for first activity attempt to fail (should be on v1) @@ -6686,12 +6646,10 @@ func (s *Versioning3Suite) TestRetryOfDeclinedCaN_SignalsOnNewTarget() { wfID := run0.GetID() // Wait for workflow to be running on v1. - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, wfID, run0.GetRunID()) - if err != nil { - return false - } - return desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId() == tv1.BuildID() + require.NoError(t, err) + require.Equal(t, tv1.BuildID(), desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId()) }, 10*time.Second, 100*time.Millisecond) // Set v2 as current, signal workflow to CaN without AU (decline upgrade). @@ -6702,38 +6660,32 @@ func (s *Versioning3Suite) TestRetryOfDeclinedCaN_SignalsOnNewTarget() { // Wait for CaN to happen — new run on v1. var canRunID string - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, wfID, "") - if err != nil { - return false - } + require.NoError(t, err) canRunID = desc.GetWorkflowExecutionInfo().GetExecution().GetRunId() - return canRunID != run0.GetRunID() && - desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId() == tv1.BuildID() + require.NotEqual(t, run0.GetRunID(), canRunID) + require.Equal(t, tv1.BuildID(), desc.GetWorkflowExecutionInfo().GetVersioningInfo().GetDeploymentVersion().GetBuildId()) }, 10*time.Second, 100*time.Millisecond) // Signal CaN run to fail (triggers server retry). Target remains v2. s.NoError(s.SdkClient().SignalWorkflow(ctx, wfID, canRunID, "proceed", nil)) // Wait for CaN run to fail. - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflow(ctx, wfID, canRunID) - if err != nil { - return false - } - return desc.Status == enumspb.WORKFLOW_EXECUTION_STATUS_FAILED + require.NoError(t, err) + require.Equal(t, enumspb.WORKFLOW_EXECUTION_STATUS_FAILED, desc.Status) }, 10*time.Second, 100*time.Millisecond) // Wait for retry run to complete. var retryRunID string - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, wfID, "") - if err != nil { - return false - } + require.NoError(t, err) retryRunID = desc.GetWorkflowExecutionInfo().GetExecution().GetRunId() - return retryRunID != canRunID && - desc.GetWorkflowExecutionInfo().GetStatus() == enumspb.WORKFLOW_EXECUTION_STATUS_COMPLETED + require.NotEqual(t, canRunID, retryRunID) + require.Equal(t, enumspb.WORKFLOW_EXECUTION_STATUS_COMPLETED, desc.GetWorkflowExecutionInfo().GetStatus()) }, 10*time.Second, 100*time.Millisecond) // Verify: retry run's WFT started should have targetDeploymentVersionChanged=false diff --git a/tests/versioning_test.go b/tests/versioning_test.go index 762a0d5cad0..2cf371a45e9 100644 --- a/tests/versioning_test.go +++ b/tests/versioning_test.go @@ -13,6 +13,8 @@ import ( "time" "github.com/dgryski/go-farm" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" commandpb "go.temporal.io/api/command/v1" commonpb "go.temporal.io/api/common/v1" @@ -2383,7 +2385,7 @@ func (s *VersioningIntegSuite) TestDispatchActivityEager() { defer cancel() _, err := s.SdkClient().ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{TaskQueue: tq}, "wf") - s.Require().NoError(err) + s.NoError(err) pollResponse, err := s.SdkClient().WorkflowService().PollWorkflowTaskQueue(ctx, &workflowservice.PollWorkflowTaskQueueRequest{ Namespace: s.Namespace().String(), @@ -2393,7 +2395,7 @@ func (s *VersioningIntegSuite) TestDispatchActivityEager() { BuildId: v1, }, }) - s.Require().NoError(err) + s.NoError(err) startToCloseTimeout := time.Minute completionResponse, err := s.SdkClient().WorkflowService().RespondWorkflowTaskCompleted(ctx, &workflowservice.RespondWorkflowTaskCompletedRequest{ @@ -2440,9 +2442,9 @@ func (s *VersioningIntegSuite) TestDispatchActivityEager() { }, }, }) - s.Require().NoError(err) - s.Require().Len(completionResponse.ActivityTasks, 1) - s.Require().Equal("compatible", completionResponse.ActivityTasks[0].ActivityId) + s.NoError(err) + s.Len(completionResponse.ActivityTasks, 1) + s.Equal("compatible", completionResponse.ActivityTasks[0].ActivityId) } func (s *VersioningIntegSuite) TestDispatchActivityCrossTQFails() { @@ -3953,14 +3955,14 @@ func (s *VersioningIntegSuite) TestDescribeTaskQueueEnhanced_Versioned_Reachabil s.WaitForChannel(ctx, started) // 2. Wait for visibility to show A as running with BuildId SearchAttribute 'assigned:A' - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { queryARunning := fmt.Sprintf("TaskQueue = '%s' AND BuildIds IN ('assigned:A') AND ExecutionStatus = \"Running\"", tq) resp, err := s.FrontendClient().CountWorkflowExecutions(ctx, &workflowservice.CountWorkflowExecutionsRequest{ Namespace: s.Namespace().String(), Query: queryARunning, }) - s.NoError(err) - return resp.GetCount() > 0 + require.NoError(t, err) + require.Positive(t, resp.GetCount()) }, 5*time.Second, 50*time.Millisecond) // 3. Commit a different build id --> A should now only be reachable via visibility query @@ -4021,14 +4023,14 @@ func (s *VersioningIntegSuite) TestDescribeTaskQueueEnhanced_Versioned_BasicReac s.WaitForChannel(ctx, started) // wait for visibility to show A as running with BuildId SearchAttribute 'assigned:A' - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { queryARunning := fmt.Sprintf("TaskQueue = '%s' AND BuildIds IN ('assigned:A') AND ExecutionStatus = \"Running\"", tq) resp, err := s.FrontendClient().CountWorkflowExecutions(ctx, &workflowservice.CountWorkflowExecutionsRequest{ Namespace: s.Namespace().String(), Query: queryARunning, }) - s.NoError(err) - return resp.GetCount() > 0 + require.NoError(t, err) + require.Positive(t, resp.GetCount()) }, 3*time.Second, 50*time.Millisecond) // commit a different build ID --> A should now only be reachable via visibility query, B reachable as default @@ -4045,11 +4047,11 @@ func (s *VersioningIntegSuite) TestDescribeTaskQueueEnhanced_Versioned_BasicReac s.NoError(s.SdkClient().SignalWorkflow(ctx, run.GetID(), "", "wait", nil)) // Query reachability(A) --> eventually shows closed_only by visibility db (after TTL passes and A is closed in visibility) - s.Eventually(func() bool { - return s.checkBuildIdReachability(ctx, tq, &taskqueuepb.TaskQueueVersionSelection{BuildIds: []string{"A"}}, map[string]enumspb.BuildIdTaskReachability{ + s.EventuallyWithT(func(t *assert.CollectT) { + require.True(t, s.checkBuildIdReachability(ctx, tq, &taskqueuepb.TaskQueueVersionSelection{BuildIds: []string{"A"}}, map[string]enumspb.BuildIdTaskReachability{ "A": enumspb.BUILD_ID_TASK_REACHABILITY_CLOSED_WORKFLOWS_ONLY, // closed_only by visibility db (after TTL) "B": enumspb.BUILD_ID_TASK_REACHABILITY_REACHABLE, // reachable by default assignment rule - }) + })) }, 5*time.Second, 50*time.Millisecond) } @@ -4073,7 +4075,7 @@ func (s *VersioningIntegSuite) TestDescribeTaskQueueEnhanced_Unversioned() { workerMap[wId] = w } - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { resp, err := s.FrontendClient().DescribeTaskQueue(ctx, &workflowservice.DescribeTaskQueueRequest{ Namespace: s.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: tq, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, @@ -4084,11 +4086,11 @@ func (s *VersioningIntegSuite) TestDescribeTaskQueueEnhanced_Unversioned() { ReportTaskReachability: true, ReportStats: false, }) - s.NoError(err) - s.NotNil(resp) - s.Len(resp.GetVersionsInfo(), 1, "should be 1 because only default/unversioned queue") //nolint:staticcheck + require.NoError(t, err) + require.NotNil(t, resp) + require.Len(t, resp.GetVersionsInfo(), 1, "should be 1 because only default/unversioned queue") //nolint:staticcheck versionInfo := resp.GetVersionsInfo()[""] - s.Equal(enumspb.BUILD_ID_TASK_REACHABILITY_REACHABLE, versionInfo.GetTaskReachability()) + require.Equal(t, enumspb.BUILD_ID_TASK_REACHABILITY_REACHABLE, versionInfo.GetTaskReachability()) var pollersInfo []*taskqueuepb.PollerInfo for _, t := range versionInfo.GetTypesInfo() { pollersInfo = append(pollersInfo, t.GetPollers()...) @@ -4096,7 +4098,8 @@ func (s *VersioningIntegSuite) TestDescribeTaskQueueEnhanced_Unversioned() { foundN := 0 for wId := range workerMap { for _, pi := range pollersInfo { - s.False(pi.GetWorkerVersionCapabilities().GetUseVersioning()) + //nolint:staticcheck // SA1019: this test covers legacy build-ID versioning. + require.False(t, pi.GetWorkerVersionCapabilities().GetUseVersioning()) if pi.GetIdentity() == wId { foundN++ break @@ -4104,7 +4107,7 @@ func (s *VersioningIntegSuite) TestDescribeTaskQueueEnhanced_Unversioned() { } } - return foundN == workerN + require.Equal(t, workerN, foundN) }, 3*time.Second, 50*time.Millisecond) } @@ -4124,7 +4127,7 @@ func (s *VersioningIntegSuite) TestDescribeTaskQueueEnhanced_ReportFlags() { defer w.Stop() // wait for pollers to show up, verify both ReportPollers and ReportTaskReachability - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { resp, err := s.FrontendClient().DescribeTaskQueue(ctx, &workflowservice.DescribeTaskQueueRequest{ Namespace: s.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: tq, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, @@ -4134,23 +4137,25 @@ func (s *VersioningIntegSuite) TestDescribeTaskQueueEnhanced_ReportFlags() { ReportPollers: true, ReportTaskReachability: true, }) - s.NoError(err) - s.NotNil(resp) - s.Len(resp.GetVersionsInfo(), 1, "should be 1 because only default/unversioned queue") //nolint:staticcheck + require.NoError(t, err) + require.NotNil(t, resp) + require.Len(t, resp.GetVersionsInfo(), 1, "should be 1 because only default/unversioned queue") //nolint:staticcheck versionInfo := resp.GetVersionsInfo()[""] - s.Equal(enumspb.BUILD_ID_TASK_REACHABILITY_REACHABLE, versionInfo.GetTaskReachability()) + require.Equal(t, enumspb.BUILD_ID_TASK_REACHABILITY_REACHABLE, versionInfo.GetTaskReachability()) var pollersInfo []*taskqueuepb.PollerInfo for _, t := range versionInfo.GetTypesInfo() { pollersInfo = append(pollersInfo, t.GetPollers()...) } + foundPoller := false for _, pi := range pollersInfo { - s.False(pi.GetWorkerVersionCapabilities().GetUseVersioning()) + //nolint:staticcheck // SA1019: this test covers legacy build-ID versioning. + require.False(t, pi.GetWorkerVersionCapabilities().GetUseVersioning()) if pi.GetIdentity() == wId { - return true + foundPoller = true + break } } - - return false + require.True(t, foundPoller) }, 3*time.Second, 50*time.Millisecond) // ask for reachability only @@ -4269,13 +4274,13 @@ func (s *VersioningIntegSuite) TestDescribeTaskQueueLegacy_VersionSets() { s.NoError(w2.Start()) defer w2.Stop() - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { resp, err := s.FrontendClient().DescribeTaskQueue(ctx, &workflowservice.DescribeTaskQueueRequest{ Namespace: s.Namespace().String(), TaskQueue: &taskqueuepb.TaskQueue{Name: tq, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, TaskQueueType: enumspb.TASK_QUEUE_TYPE_WORKFLOW, }) - s.NoError(err) + require.NoError(t, err) havePoller := func(v string) bool { for _, p := range resp.Pollers { if p.WorkerVersionCapabilities.UseVersioning && v == p.WorkerVersionCapabilities.BuildId { @@ -4285,7 +4290,9 @@ func (s *VersioningIntegSuite) TestDescribeTaskQueueLegacy_VersionSets() { return false } // v1 polls get rejected because v11 is newer - return !havePoller(v1) && havePoller(v11) && havePoller(v2) + require.False(t, havePoller(v1)) + require.True(t, havePoller(v11)) + require.True(t, havePoller(v2)) }, 3*time.Second, 50*time.Millisecond) } @@ -4328,10 +4335,11 @@ func (s *VersioningIntegSuite) TestDescribeWorkflowExecution() { s.WaitForChannel(ctx, started1) // describe and check build ID - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { resp, err := s.SdkClient().DescribeWorkflowExecution(ctx, run.GetID(), "") - s.NoError(err) - return v1 == resp.GetWorkflowExecutionInfo().GetMostRecentWorkerVersionStamp().GetBuildId() + require.NoError(t, err) + //nolint:staticcheck // SA1019: this test covers legacy build-ID versioning. + require.Equal(t, v1, resp.GetWorkflowExecutionInfo().GetMostRecentWorkerVersionStamp().GetBuildId()) }, 5*time.Second, 100*time.Millisecond) // now register v11 as newer compatible with v1 @@ -4356,10 +4364,11 @@ func (s *VersioningIntegSuite) TestDescribeWorkflowExecution() { s.NoError(s.SdkClient().SignalWorkflow(ctx, run.GetID(), "", "wait", nil)) s.WaitForChannel(ctx, started11) - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { resp, err := s.SdkClient().DescribeWorkflowExecution(ctx, run.GetID(), "") - s.NoError(err) - return v11 == resp.GetWorkflowExecutionInfo().GetMostRecentWorkerVersionStamp().GetBuildId() + require.NoError(t, err) + //nolint:staticcheck // SA1019: this test covers legacy build-ID versioning. + require.Equal(t, v11, resp.GetWorkflowExecutionInfo().GetMostRecentWorkerVersionStamp().GetBuildId()) }, 5*time.Second, 100*time.Millisecond) // unblock. it should complete @@ -4901,10 +4910,10 @@ func (s *VersioningIntegSuite) waitForPropagation( remaining[partAndType{i, enumspb.TASK_QUEUE_TYPE_ACTIVITY}] = struct{}{} remaining[partAndType{i, enumspb.TASK_QUEUE_TYPE_WORKFLOW}] = struct{}{} } - s.Eventually(func() bool { + s.EventuallyWithT(func(t *assert.CollectT) { for pt := range remaining { f, err := tqid.NewTaskQueueFamily(s.NamespaceID().String(), taskQueue) - s.NoError(err) + require.NoError(t, err) partition := f.TaskQueue(pt.tp).NormalPartition(pt.part) // Use lower-level GetTaskQueueUserData instead of GetWorkerBuildIdCompatibility // here so that we can target activity queues. @@ -4915,12 +4924,12 @@ func (s *VersioningIntegSuite) waitForPropagation( TaskQueue: partition.RpcName(), TaskQueueType: partition.TaskType(), }) - s.NoError(err) + require.NoError(t, err) if condition(res.GetUserData().GetData().GetVersioningData()) { delete(remaining, pt) } } - return len(remaining) == 0 + require.Empty(t, remaining) }, 10*time.Second, 100*time.Millisecond) } @@ -4932,7 +4941,7 @@ func (s *VersioningIntegSuite) unloadTaskQueue(ctx context.Context, tq string) { TaskQueueType: enumspb.TASK_QUEUE_TYPE_WORKFLOW, }, }) - s.Require().NoError(err) + s.NoError(err) } func (s *VersioningIntegSuite) getStickyQueueName(ctx context.Context, id string) string { From 1a8b8b528b61332600e0118c56772a88d15ee3c3 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Wed, 13 May 2026 16:24:31 -0600 Subject: [PATCH 19/73] fix: remove header_callsite from event_blob_size metric (#10253) ## What changed? Use a metrics handler without `header_callsite` tag because Prometheus rejects re-registering the same metric with a different label set and logs `error in prometheus reporter ... event_blob_size ... has different label names`. ## Why? Fix found from regression ## How did you test it? - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) ## Potential risks Fixes a regression introduced in #10223 --- service/frontend/workflow_handler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/frontend/workflow_handler.go b/service/frontend/workflow_handler.go index ae9c761fccf..d98a304e687 100644 --- a/service/frontend/workflow_handler.go +++ b/service/frontend/workflow_handler.go @@ -5332,7 +5332,7 @@ func (wh *WorkflowHandler) UpdateWorkflowExecution( metricsHandler := wh.metricsScope(ctx).WithTags(metrics.HeaderCallsiteTag("UpdateWorkflowExecution")) metrics.HeaderSize.With(metricsHandler).Record(int64(request.GetRequest().GetInput().GetHeader().Size())) - metrics.EventBlobSize.With(metricsHandler).Record(int64(request.GetRequest().GetInput().GetArgs().Size()), metrics.OperationTag("UpdateWorkflowExecution")) + metrics.EventBlobSize.With(wh.metricsScope(ctx)).Record(int64(request.GetRequest().GetInput().GetArgs().Size()), metrics.OperationTag("UpdateWorkflowExecution")) switch request.WaitPolicy.LifecycleStage { // nolint:exhaustive case enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED: From daa9bcc97cd8569f4b44346c3116fb15b6dd8c38 Mon Sep 17 00:00:00 2001 From: Fred Tzeng <41805201+fretz12@users.noreply.github.com> Date: Wed, 13 May 2026 17:36:20 -0700 Subject: [PATCH 20/73] added docs on protorequire.ProtoEqual in testing.md (#10255) ## What changed? Added a short protorequire package subsection to docs/development/testing.md documenting protorequire.ProtoEqual and the new protorequire.IgnoreFields option, with a minimal usage example. ## Why? Follow-up to PR #9937. Without a doc entry, the new IgnoreFields helper is undiscoverable and contributors will keep reaching for the verbose cmp.Diff pattern. ## How did you test it? - [X] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) --- docs/development/testing.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/development/testing.md b/docs/development/testing.md index ca74abbd93c..7e41158db7f 100644 --- a/docs/development/testing.md +++ b/docs/development/testing.md @@ -160,6 +160,22 @@ It is *not* a substitute for regular error handling, validation, or control flow In functional tests, a failed soft assertion will not stop the test execution immediately, but it will ultimately fail the test. +### protorequire package + +Use `protorequire.ProtoEqual` to compare proto messages with proto semantics. +Prefer a single `ProtoEqual` call over asserting fields one-by-one, since it catches unexpected field changes and keeps the expected value next to the assertion. + +To ignore specific fields on the top-level message (e.g. non-deterministic timestamps), pass `protorequire.IgnoreFields`: + +```go +protorequire.ProtoEqual(t, expected, actual, + protorequire.IgnoreFields( + "execution_duration", + "schedule_time", + ), +) +``` + ### Test Cluster Use `testcore.NewEnv(t)` to create a test environment with access to a Temporal cluster for end-to-end testing. From 0a6b0a9c4eb72aaee2bec256028ce1e964d4a61d Mon Sep 17 00:00:00 2001 From: Kannan Date: Wed, 13 May 2026 20:10:52 -0700 Subject: [PATCH 21/73] Add SyncMatchOutcome to hooks API with rate limiting signal (#10045) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What Add `SyncMatchOutcome` enum to the hooks API (NotMatched, Success, RateLimited) and plumb rate limiting signal from the matcher through to hooks. Keep `IsSyncMatch` as deprecated for backwards compatibility. ## Why Hook consumers (e.g. scaling operators) need to distinguish rate limiting from genuine lack of pollers when deciding whether to scale up workers. ## How did you test it? Unit tests — rate-limited and non-rate-limited scenarios, multiple hooks invocation. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 --- .../matching/hooks/task_lifecycle_hooks.go | 17 +++- service/matching/matcher_data.go | 15 ++-- service/matching/matcher_data_test.go | 17 ++++ .../physical_task_queue_manager_test.go | 4 +- .../matching/task_queue_partition_manager.go | 25 ++++-- .../task_queue_partition_manager_test.go | 83 +++++++++++++++++-- 6 files changed, 140 insertions(+), 21 deletions(-) diff --git a/service/matching/hooks/task_lifecycle_hooks.go b/service/matching/hooks/task_lifecycle_hooks.go index 7f7d3b7a96f..5ce110cd5fd 100644 --- a/service/matching/hooks/task_lifecycle_hooks.go +++ b/service/matching/hooks/task_lifecycle_hooks.go @@ -9,6 +9,20 @@ import ( "go.temporal.io/server/common/tqid" ) +// SyncMatchOutcome describes the outcome of a sync match attempt from the hook's perspective. +type SyncMatchOutcome int + +const ( + // Default zero value; should not be used explicitly. + SyncMatchOutcomeUnspecified SyncMatchOutcome = iota + // The task was not sync-matched. Catch-all for reasons not covered by more specific outcomes. + SyncMatchOutcomeNotMatched + // The task was sync-matched successfully. + SyncMatchOutcomeSuccess + // A poller was available but rate limiting blocked the match. + SyncMatchOutcomeRateLimited +) + type ( // TaskQueuePartition is a simplified version of tqid.Partition that removes details // the hooks should not concern themselves with @@ -25,7 +39,8 @@ type ( } TaskAddHookDetails struct { DeploymentVersion *deploymentpb.WorkerDeploymentVersion - IsSyncMatch bool + IsSyncMatch bool // Deprecated: use SyncMatchOutcome instead. + SyncMatchOutcome SyncMatchOutcome } TaskHookFactory interface { diff --git a/service/matching/matcher_data.go b/service/matching/matcher_data.go index 187c22c6137..5089bc1efcb 100644 --- a/service/matching/matcher_data.go +++ b/service/matching/matcher_data.go @@ -41,6 +41,8 @@ const ( syncMatchBacklogPresent // Sync match was attempted but no poller was available. syncMatchNoPoller + // A poller was available but rate limiting blocked the match. + syncMatchRateLimited ) type taskForwarderType int32 @@ -389,12 +391,15 @@ func (d *matcherData) MatchTaskImmediately(task *internalTask) syncMatchOutcome task.initMatch(d) d.tasks.Add(task) - d.findAndWakeMatches() + rateLimited := d.findAndWakeMatches() // don't wait, check if match() picked this one already if task.matchResult != nil { return syncMatchSuccess } d.tasks.Remove(task) + if rateLimited { + return syncMatchRateLimited + } return syncMatchNoPoller } @@ -504,8 +509,8 @@ func (d *matcherData) allowForwarding() (allowForwarding bool) { return delayToForwardingAllowed <= 0 } -// call with lock held -func (d *matcherData) findAndWakeMatches() { +// call with lock held. Returns true if a match was found but blocked by rate limiting. +func (d *matcherData) findAndWakeMatches() (rateLimited bool) { allowForwarding := d.canForward && d.allowForwarding() now := d.timeSource.Now().UnixNano() @@ -517,14 +522,14 @@ func (d *matcherData) findAndWakeMatches() { if task == nil || poller == nil { // no more current matches, stop rate limit timer if was running d.rateLimitTimer.unset() - return + return false } // check ready time delay := d.rateLimitManager.readyTimeForTask(task).delay(now) d.rateLimitTimer.set(d.timeSource, d.rematchAfterTimer, delay) if delay > 0 { - return // not ready yet, timer will call match later + return true // not ready yet, timer will call match later } // ready to signal match diff --git a/service/matching/matcher_data_test.go b/service/matching/matcher_data_test.go index ee376e85cc1..64073ec3d6c 100644 --- a/service/matching/matcher_data_test.go +++ b/service/matching/matcher_data_test.go @@ -210,6 +210,23 @@ func (s *MatcherDataSuite) TestMatchTaskImmediately() { s.Equal(t, pres.task) } +func (s *MatcherDataSuite) TestMatchTaskImmediatelyRateLimited() { + // Set rate limit to zero — blocks all matches. + s.md.rateLimitManager.SetEffectiveRPSAndSourceForTesting(0, enumspb.RATE_LIMIT_SOURCE_API) + s.md.rateLimitManager.UpdateSimpleRateLimitWithBurstForTesting(0) + + // Add a waiting poller. + go func() { + poller := &waitingPoller{startTime: s.now()} + s.md.EnqueuePollerAndWait(nil, poller) + }() + s.waitForPollers(1) + + // Sync match should fail due to rate limiting, not lack of poller. + t := s.newSyncTask(nil) + s.Equal(syncMatchRateLimited, s.md.MatchTaskImmediately(t)) +} + func (s *MatcherDataSuite) TestMatchTaskImmediatelyDisabledBacklog() { // register some backlog with old tasks s.md.EnqueueTaskNoWait(s.newBacklogTask(123, 10*time.Minute, nil)) diff --git a/service/matching/physical_task_queue_manager_test.go b/service/matching/physical_task_queue_manager_test.go index 37e0471b61e..15de4b1badd 100644 --- a/service/matching/physical_task_queue_manager_test.go +++ b/service/matching/physical_task_queue_manager_test.go @@ -134,9 +134,9 @@ func TestReaderSignaling(t *testing.T) { task := newInternalTaskForSyncMatch(&persistencespb.TaskInfo{ CreateTime: timestamp.TimePtr(time.Now().UTC()), }, nil) - sync, err := s.tqMgr.TrySyncMatch(context.TODO(), task) + outcome, err := s.tqMgr.TrySyncMatch(context.TODO(), task) require.NoError(t, err) - require.True(t, sync) + require.Equal(t, syncMatchSuccess, outcome) require.Len(t, readerNotifications, 0, "Sync match should not signal taskReader") } diff --git a/service/matching/task_queue_partition_manager.go b/service/matching/task_queue_partition_manager.go index 1c587adb05b..0b70f9434e3 100644 --- a/service/matching/task_queue_partition_manager.go +++ b/service/matching/task_queue_partition_manager.go @@ -439,15 +439,15 @@ reredirectTask: return "", false, err } + var outcome syncMatchOutcome if isActive { - var outcome syncMatchOutcome outcome, err = syncMatchQueue.TrySyncMatch(ctx, syncMatchTask) syncMatched = outcome == syncMatchSuccess if syncMatched && !pm.shouldBacklogSyncMatchTaskOnError(err) { // Only fire hooks for non-forwarded tasks. Forwarded tasks already had hooks fired // on the child partition that originally received the task. if params.forwardInfo == nil { - pm.processTaskAddHooks(ctx, targetVersion, syncMatched) + pm.processTaskAddHooks(ctx, targetVersion, outcome) } // Build ID is not returned for sync match. The returned build ID is used by History to update @@ -476,17 +476,32 @@ reredirectTask: err = spoolQueue.SpoolTask(params.taskInfo) if err == nil { - pm.processTaskAddHooks(ctx, targetVersion, false) + pm.processTaskAddHooks(ctx, targetVersion, outcome) } return assignedBuildId, false, err } -func (pm *taskQueuePartitionManagerImpl) processTaskAddHooks(ctx context.Context, targetVersion *deploymentspb.WorkerDeploymentVersion, syncMatched bool) { +func syncMatchOutcomeToHook(outcome syncMatchOutcome) hooks.SyncMatchOutcome { + switch outcome { + case syncMatchSuccess: + return hooks.SyncMatchOutcomeSuccess + case syncMatchRateLimited: + return hooks.SyncMatchOutcomeRateLimited + case syncMatchUnspecified: + return hooks.SyncMatchOutcomeUnspecified + default: + return hooks.SyncMatchOutcomeNotMatched + } +} + +func (pm *taskQueuePartitionManagerImpl) processTaskAddHooks(ctx context.Context, targetVersion *deploymentspb.WorkerDeploymentVersion, outcome syncMatchOutcome) { for _, l := range pm.taskHooks { + hookOutcome := syncMatchOutcomeToHook(outcome) l.ProcessTaskAdd(ctx, &hooks.TaskAddHookDetails{ DeploymentVersion: worker_versioning.ExternalWorkerDeploymentVersionFromVersion(targetVersion), - IsSyncMatch: syncMatched, + IsSyncMatch: hookOutcome == hooks.SyncMatchOutcomeSuccess, + SyncMatchOutcome: hookOutcome, }) } } diff --git a/service/matching/task_queue_partition_manager_test.go b/service/matching/task_queue_partition_manager_test.go index 9b5b4980395..85bfd0f7d49 100644 --- a/service/matching/task_queue_partition_manager_test.go +++ b/service/matching/task_queue_partition_manager_test.go @@ -1350,7 +1350,7 @@ type capturingTaskMatchHook struct { type capturedTaskMatchDetails struct { TaskQueueName string TaskQueueType enumspb.TaskQueueType - IsSyncMatch bool + SyncMatchOutcome hooks.SyncMatchOutcome DeploymentVersion *deploymentpb.WorkerDeploymentVersion } @@ -1370,9 +1370,9 @@ func (h *capturingTaskMatchHook) ProcessTaskAdd(ctx context.Context, event *hook h.mu.Lock() defer h.mu.Unlock() details := capturedTaskMatchDetails{ - TaskQueueName: h.taskQueueName, - TaskQueueType: h.taskQueueType, - IsSyncMatch: event.IsSyncMatch, + TaskQueueName: h.taskQueueName, + TaskQueueType: h.taskQueueType, + SyncMatchOutcome: event.SyncMatchOutcome, } if event.DeploymentVersion != nil { details.DeploymentVersion = &deploymentpb.WorkerDeploymentVersion{ @@ -1610,7 +1610,7 @@ func (s *PartitionManagerTestSuite) TestTaskAddHooks_AddHookSyncMatch() { s.Require().Len(calls, 1) s.Equal(taskQueueName, calls[0].TaskQueueName) s.Equal(enumspb.TASK_QUEUE_TYPE_WORKFLOW, calls[0].TaskQueueType) - s.True(calls[0].IsSyncMatch) + s.Equal(hooks.SyncMatchOutcomeSuccess, calls[0].SyncMatchOutcome) s.Nil(calls[0].DeploymentVersion) } @@ -1634,7 +1634,74 @@ func (s *PartitionManagerTestSuite) TestTaskAddHooks_AddHookNoSyncMatch() { s.Require().Len(calls, 1) s.Equal(taskQueueName, calls[0].TaskQueueName) s.Equal(enumspb.TASK_QUEUE_TYPE_WORKFLOW, calls[0].TaskQueueType) - s.False(calls[0].IsSyncMatch) + s.Equal(hooks.SyncMatchOutcomeNotMatched, calls[0].SyncMatchOutcome) +} + +func (s *PartitionManagerTestSuite) TestTaskAddHooks_RateLimited() { + if !s.newMatcher { + s.T().Skip("rate limiting signal from matcher is only available in new matcher") + } + hook := &capturingTaskMatchHook{} + pm, cleanup := s.setupPartitionManagerWithTaskHookFactories([]hooks.TaskHookFactory{hook}) + defer cleanup() + + // Set rate limit to zero RPS — this blocks all sync matches due to rate limiting. + pm.rateLimitManager.SetEffectiveRPSAndSourceForTesting(0, enumspb.RATE_LIMIT_SOURCE_API) + pm.rateLimitManager.UpdateSimpleRateLimitWithBurstForTesting(0) + + // Set up a waiting poller so sync match would succeed if not rate-limited. + go func() { + ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) + defer cancel() + task, _, _ := pm.PollTask(ctx, &pollMetadata{ + workerVersionCapabilities: &commonpb.WorkerVersionCapabilities{ + BuildId: "", + UseVersioning: false, + }, + }) + if task != nil && task.responseC != nil { + close(task.responseC) + } + }() + pq := pm.defaultQueue().(*physicalTaskQueueManagerImpl) + s.Require().Eventually(pq.matcher.HasWaitingPoller, 2*time.Second, time.Millisecond) + + // AddTask should fall through to spool because rate limiting blocked sync match. + _, syncMatched, err := pm.AddTask(context.Background(), addTaskParams{ + taskInfo: &persistencespb.TaskInfo{ + NamespaceId: namespaceID, + RunId: "run", + WorkflowId: "wf", + }, + }) + s.Require().NoError(err) + s.Require().False(syncMatched) + + calls := hook.getCalls() + s.Require().Len(calls, 1) + s.Equal(hooks.SyncMatchOutcomeRateLimited, calls[0].SyncMatchOutcome) +} + +func (s *PartitionManagerTestSuite) TestTaskAddHooks_NotRateLimited() { + hook := &capturingTaskMatchHook{} + pm, cleanup := s.setupPartitionManagerWithTaskHookFactories([]hooks.TaskHookFactory{hook}) + defer cleanup() + + // No rate limiting configured — task should spool normally without rate limit flag. + _, syncMatched, err := pm.AddTask(context.Background(), addTaskParams{ + taskInfo: &persistencespb.TaskInfo{ + NamespaceId: namespaceID, + RunId: "run", + WorkflowId: "wf", + VersionDirective: worker_versioning.MakeBuildIdDirective("buildXYZ"), + }, + }) + s.Require().NoError(err) + s.Require().False(syncMatched) + + calls := hook.getCalls() + s.Require().Len(calls, 1) + s.Equal(hooks.SyncMatchOutcomeNotMatched, calls[0].SyncMatchOutcome) } func (s *PartitionManagerTestSuite) TestTaskAddHooks_ForwardedSyncMatch_HooksNotInvoked() { @@ -1741,8 +1808,8 @@ func (s *PartitionManagerTestSuite) TestTaskAddHooks_MultipleHooksInvoked() { s.Len(hook1.getCalls(), 1) s.Len(hook2.getCalls(), 1) - s.False(hook1.getCalls()[0].IsSyncMatch) - s.False(hook2.getCalls()[0].IsSyncMatch) + s.Equal(hooks.SyncMatchOutcomeNotMatched, hook1.getCalls()[0].SyncMatchOutcome) + s.Equal(hooks.SyncMatchOutcomeNotMatched, hook2.getCalls()[0].SyncMatchOutcome) } type mockUserDataManager struct { From 71d978c734e0219bd7a71f26ccec72d0bfab91a8 Mon Sep 17 00:00:00 2001 From: samm Date: Thu, 14 May 2026 08:29:41 -0700 Subject: [PATCH 22/73] Tweaks cancellation error log (#10254) ## What changed? Log message for nexus operation cancellation invocation ## Why? Makes the log distinct from errors during operation invocation. ## How did you test it? - [x] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) --- chasm/lib/nexusoperation/cancellation_tasks.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chasm/lib/nexusoperation/cancellation_tasks.go b/chasm/lib/nexusoperation/cancellation_tasks.go index 81b28697766..893b3c1793c 100644 --- a/chasm/lib/nexusoperation/cancellation_tasks.go +++ b/chasm/lib/nexusoperation/cancellation_tasks.go @@ -114,7 +114,7 @@ func (h *cancellationInvocationTaskHandler) Execute( endpoint, err := h.lookupEndpoint(ctx, ns.ID(), args.endpointID, args.endpointName) if err != nil { if _, ok := errors.AsType[*serviceerror.NotFound](err); ok { - h.logger.Error("endpoint not found while processing invocation task", tag.Error(err)) + h.logger.Error("endpoint not found while processing cancellation invocation", tag.Error(err)) handlerErr := nexus.NewHandlerErrorf(nexus.HandlerErrorTypeNotFound, "endpoint not registered") return h.saveCancellationResult(ctx, cancelRef, handlerErr) } From 4f5884bae424cd4c236a3b21ba22faf4e05d1942 Mon Sep 17 00:00:00 2001 From: michaely520 Date: Thu, 14 May 2026 08:30:18 -0700 Subject: [PATCH 23/73] Queue fixes (#10247) ## What changed? Reverting dropping tasks when feature flag disabled -> returning error Interface change for active or not based on business id ## Why? regression ## How did you test it? - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) --- service/history/outbound_queue_standby_task_executor.go | 3 +-- service/history/queues/active_standby_executor.go | 2 +- service/history/queues/active_standby_executor_test.go | 2 ++ 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/service/history/outbound_queue_standby_task_executor.go b/service/history/outbound_queue_standby_task_executor.go index 7f931c0b39a..fdddeeb42b2 100644 --- a/service/history/outbound_queue_standby_task_executor.go +++ b/service/history/outbound_queue_standby_task_executor.go @@ -166,10 +166,9 @@ func (e *outboundQueueStandbyTaskExecutor) executeStateMachineTask( "standby task executor returned retryable error", err, ) - return err } - return nil + return err } func (e *outboundQueueStandbyTaskExecutor) executeChasmSideEffectTask( diff --git a/service/history/queues/active_standby_executor.go b/service/history/queues/active_standby_executor.go index 8fbe7dd9ef4..453ba40ba6e 100644 --- a/service/history/queues/active_standby_executor.go +++ b/service/history/queues/active_standby_executor.go @@ -62,7 +62,7 @@ func (e *activeStandbyExecutor) isActiveTask( return true } - if !entry.ActiveInCluster(e.currentClusterName) { + if entry.ActiveClusterName(namespace.RoutingKey{ID: executable.GetWorkflowID()}) != e.currentClusterName { e.logger.Debug("Process task as standby.", tag.WorkflowNamespaceID(namespaceID), tag.Value(executable.GetTask())) return false } diff --git a/service/history/queues/active_standby_executor_test.go b/service/history/queues/active_standby_executor_test.go index 17b76357720..a4caa2d1416 100644 --- a/service/history/queues/active_standby_executor_test.go +++ b/service/history/queues/active_standby_executor_test.go @@ -54,6 +54,7 @@ func (s *executorSuite) SetupTest() { func (s *executorSuite) TestExecute_Active() { executable := NewMockExecutable(s.ctrl) executable.EXPECT().GetNamespaceID().Return("namespace_id") + executable.EXPECT().GetWorkflowID().Return("workflow_id") executable.EXPECT().GetTask().Return(nil) ns := namespace.NewGlobalNamespaceForTest(nil, nil, &persistencespb.NamespaceReplicationConfig{ ActiveClusterName: currentCluster, @@ -73,6 +74,7 @@ func (s *executorSuite) TestExecute_Active() { func (s *executorSuite) TestExecute_Standby() { executable := NewMockExecutable(s.ctrl) executable.EXPECT().GetNamespaceID().Return("namespace_id") + executable.EXPECT().GetWorkflowID().Return("workflow_id") executable.EXPECT().GetTask().Return(nil) ns := namespace.NewGlobalNamespaceForTest(nil, nil, &persistencespb.NamespaceReplicationConfig{ ActiveClusterName: nonCurrentCluster, From 51a63b7be08d9613cfe6ff7ac1431ecae5ba38ea Mon Sep 17 00:00:00 2001 From: Vladyslav Simonenko Date: Thu, 14 May 2026 09:41:45 -0700 Subject: [PATCH 24/73] PGX SimpleProtocol test suite (#10198) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What changed? Adds TestPGXSimpleProtocol — a new entry in the persistence integration suite that runs `PostgreSQLSuite` under the `postgres12_pgx` plugin with `default_query_exec_mode=simple_protocol`. ## Why? Regression coverage for issues like [#9804](https://github.com/temporalio/temporal/issues/9804). With pgx ≤ v5.9.1, current_executions.state/status (proto-enum-typed int32 fields with a String() method) were text-encoded via fmt.Stringer and rejected by `Postgres` on simple/exec protocol, the path users land on behind PgBouncer in transaction pooling. pgx v5.9.2 fixed it upstream; this test makes sure we notice if pgx is ever downgraded or if a similar issue sneaks in. ## How did you test it? - [X] built - [X] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [X] added new functional test(s) Verified locally: passes on pgx v5.9.2; fails reproducibly on v5.9.1 with invalid input syntax for type integer: "Created" matching [#9804](https://github.com/temporalio/temporal/issues/9804). ## Potential risks Adds one additional pass through PostgreSQLSuite to the Integration test job. Job timeout is 15 min, so should be fine, but worth observing. --- common/persistence/persistencetest/queues.go | 4 +- .../persistencetest/queues_test.go | 18 ++-- common/persistence/tests/postgresql_test.go | 97 +++++++++++-------- .../persistence/tests/postgresql_test_util.go | 13 +-- 4 files changed, 75 insertions(+), 57 deletions(-) diff --git a/common/persistence/persistencetest/queues.go b/common/persistence/persistencetest/queues.go index f9001ab8cbe..2c3d21e34dd 100644 --- a/common/persistence/persistencetest/queues.go +++ b/common/persistence/persistencetest/queues.go @@ -43,8 +43,8 @@ func GetQueueKey(t *testing.T, opts ...func(p *getQueueKeyParams)) persistence.Q return persistence.QueueKey{ QueueType: params.QueueType, Category: params.Category, - SourceCluster: "test-source-cluster-" + t.Name(), - TargetCluster: "test-target-cluster-" + t.Name(), + SourceCluster: "src-" + t.Name(), + TargetCluster: "tgt-" + t.Name(), } } diff --git a/common/persistence/persistencetest/queues_test.go b/common/persistence/persistencetest/queues_test.go index 41d01647a0a..43a3d6a08c9 100644 --- a/common/persistence/persistencetest/queues_test.go +++ b/common/persistence/persistencetest/queues_test.go @@ -3,7 +3,7 @@ package persistencetest_test import ( "testing" - "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "go.temporal.io/server/common/persistence" "go.temporal.io/server/common/persistence/persistencetest" "go.temporal.io/server/service/history/tasks" @@ -13,10 +13,10 @@ func TestGetQueueKey_Default(t *testing.T) { t.Parallel() queueKey := persistencetest.GetQueueKey(t) - assert.Equal(t, persistence.QueueTypeHistoryNormal, queueKey.QueueType) - assert.Equal(t, tasks.CategoryTransfer, queueKey.Category) - assert.Equal(t, "test-source-cluster-TestGetQueueKey_Default", queueKey.SourceCluster) - assert.Equal(t, "test-target-cluster-TestGetQueueKey_Default", queueKey.TargetCluster) + require.Equal(t, persistence.QueueTypeHistoryNormal, queueKey.QueueType) + require.Equal(t, tasks.CategoryTransfer, queueKey.Category) + require.Equal(t, "src-TestGetQueueKey_Default", queueKey.SourceCluster) + require.Equal(t, "tgt-TestGetQueueKey_Default", queueKey.TargetCluster) } func TestGetQueueKey_WithOptions(t *testing.T) { @@ -26,8 +26,8 @@ func TestGetQueueKey_WithOptions(t *testing.T) { persistencetest.WithQueueType(persistence.QueueTypeHistoryDLQ), persistencetest.WithCategory(tasks.CategoryTimer), ) - assert.Equal(t, persistence.QueueTypeHistoryDLQ, queueKey.QueueType) - assert.Equal(t, tasks.CategoryTimer, queueKey.Category) - assert.Equal(t, "test-source-cluster-TestGetQueueKey_WithOptions", queueKey.SourceCluster) - assert.Equal(t, "test-target-cluster-TestGetQueueKey_WithOptions", queueKey.TargetCluster) + require.Equal(t, persistence.QueueTypeHistoryDLQ, queueKey.QueueType) + require.Equal(t, tasks.CategoryTimer, queueKey.Category) + require.Equal(t, "src-TestGetQueueKey_WithOptions", queueKey.SourceCluster) + require.Equal(t, "tgt-TestGetQueueKey_WithOptions", queueKey.TargetCluster) } diff --git a/common/persistence/tests/postgresql_test.go b/common/persistence/tests/postgresql_test.go index 20c4df7246f..a969bedf662 100644 --- a/common/persistence/tests/postgresql_test.go +++ b/common/persistence/tests/postgresql_test.go @@ -19,11 +19,12 @@ import ( type PostgreSQLSuite struct { suite.Suite - pluginName string + pluginName string + connectAttrs map[string]string } func (p *PostgreSQLSuite) TestPostgreSQLShardStoreSuite() { - testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName) + testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName, p.connectAttrs) defer tearDown() shardStore, err := testData.Factory.NewShardStore() @@ -41,7 +42,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLShardStoreSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLExecutionMutableStateStoreSuite() { - testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName) + testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName, p.connectAttrs) defer tearDown() shardStore, err := testData.Factory.NewShardStore() @@ -64,7 +65,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLExecutionMutableStateStoreSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLExecutionMutableStateTaskStoreSuite() { - testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName) + testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName, p.connectAttrs) defer tearDown() shardStore, err := testData.Factory.NewShardStore() @@ -87,7 +88,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLExecutionMutableStateTaskStoreSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryStoreSuite() { - testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName) + testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName, p.connectAttrs) defer tearDown() store, err := testData.Factory.NewExecutionStore() @@ -100,7 +101,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryStoreSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLTaskQueueSuite() { - testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName) + testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName, p.connectAttrs) defer tearDown() taskQueueStore, err := testData.Factory.NewTaskStore() @@ -113,7 +114,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLTaskQueueSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLFairTaskQueueSuite() { - testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName) + testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName, p.connectAttrs) defer tearDown() taskQueueStore, err := testData.Factory.NewFairTaskStore() @@ -126,7 +127,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLFairTaskQueueSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLTaskQueueTaskSuite() { - testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName) + testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName, p.connectAttrs) defer tearDown() taskQueueStore, err := testData.Factory.NewTaskStore() @@ -139,7 +140,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLTaskQueueTaskSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLTaskQueueFairTaskSuite() { - testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName) + testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName, p.connectAttrs) defer tearDown() taskQueueStore, err := testData.Factory.NewFairTaskStore() @@ -152,7 +153,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLTaskQueueFairTaskSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLTaskQueueUserDataSuite() { - testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName) + testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName, p.connectAttrs) defer tearDown() taskQueueStore, err := testData.Factory.NewTaskStore() @@ -204,7 +205,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLQueuePersistence() { // SQL store tests func (p *PostgreSQLSuite) TestPostgreSQLNamespaceSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -221,7 +222,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLNamespaceSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLQueueMessageSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -238,7 +239,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLQueueMessageSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLQueueMetadataSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -255,7 +256,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLQueueMetadataSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLMatchingTaskSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -272,7 +273,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLMatchingTaskSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLMatchingTaskV2Suite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -289,7 +290,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLMatchingTaskV2Suite() { } func (p *PostgreSQLSuite) TestPostgreSQLMatchingTaskQueueSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -306,7 +307,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLMatchingTaskQueueSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLMatchingFairTaskQueueSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -323,7 +324,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLMatchingFairTaskQueueSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryShardSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -340,7 +341,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryShardSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryNodeSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -357,7 +358,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryNodeSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryTreeSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -374,7 +375,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryTreeSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryCurrentExecutionSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -391,7 +392,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryCurrentExecutionSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryCurrentChasmExecutionSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -408,7 +409,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryCurrentChasmExecutionSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -425,7 +426,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryTransferTaskSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -442,7 +443,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryTransferTaskSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryTimerTaskSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -459,7 +460,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryTimerTaskSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryReplicationTaskSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -476,7 +477,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryReplicationTaskSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryVisibilityTaskSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -493,7 +494,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryVisibilityTaskSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryReplicationDLQTaskSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -510,7 +511,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryReplicationDLQTaskSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionBufferSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -527,7 +528,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionBufferSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionActivitySuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -544,7 +545,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionActivitySuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionChildWorkflowSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -561,7 +562,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionChildWorkflowSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionTimerSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -578,7 +579,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionTimerSuite() { } func (p *PostgreSQLSuite) TestPostgresHistoryExecutionChasmSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) @@ -596,7 +597,7 @@ func (p *PostgreSQLSuite) TestPostgresHistoryExecutionChasmSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionRequestCancelSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -613,7 +614,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionRequestCancelSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionSignalSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -630,7 +631,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionSignalSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionSignalRequestSuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindMain, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -647,7 +648,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLHistoryExecutionSignalRequestSuite() { } func (p *PostgreSQLSuite) TestPostgreSQLVisibilitySuite() { - cfg := NewPostgreSQLConfig(p.pluginName) + cfg := NewPostgreSQLConfig(p.pluginName, p.connectAttrs) SetupPostgreSQLDatabase(p.T(), cfg) SetupPostgreSQLSchema(p.T(), cfg) store, err := sql.NewSQLDB(sqlplugin.DbKindVisibility, cfg, resolver.NewNoopResolver(), log.NewTestLogger(), metrics.NoopMetricsHandler) @@ -664,7 +665,7 @@ func (p *PostgreSQLSuite) TestPostgreSQLVisibilitySuite() { } func (p *PostgreSQLSuite) TestPostgreSQLClosedConnectionError() { - testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName) + testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName, p.connectAttrs) defer tearDown() s := newConnectionSuite(p.T(), testData.Factory) @@ -672,13 +673,13 @@ func (p *PostgreSQLSuite) TestPostgreSQLClosedConnectionError() { } func (p *PostgreSQLSuite) TestPGQueueV2() { - testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName) + testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName, p.connectAttrs) p.T().Cleanup(tearDown) RunQueueV2TestSuiteForSQL(p.T(), testData.Factory) } func (p *PostgreSQLSuite) TestPostgreSQLNexusEndpointPersistence() { - testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName) + testData, tearDown := setUpPostgreSQLTest(p.T(), p.pluginName, p.connectAttrs) p.T().Cleanup(tearDown) RunNexusEndpointTestSuiteForSQL(p.T(), testData.Factory) } @@ -694,3 +695,19 @@ func TestPGX(t *testing.T) { s := &PostgreSQLSuite{pluginName: "postgres12_pgx"} suite.Run(t, s) } + +// TestPGXSP exercises the pgx plugin in simple-protocol mode, the +// configuration users land on when fronting Postgres with PgBouncer in +// transaction pooling. In that mode pgx text-encodes parameters client-side +// without OID hints, which previously caused proto enum fields (e.g. +// CurrentExecutionsRow.State) to be serialized via fmt.Stringer, producing +// values like "Created" instead of integers and breaking inserts into +// integer columns. See temporalio/temporal#9804. +func TestPGXSP(t *testing.T) { + t.Parallel() + s := &PostgreSQLSuite{ + pluginName: "postgres12_pgx", + connectAttrs: map[string]string{"default_query_exec_mode": "simple_protocol"}, + } + suite.Run(t, s) +} diff --git a/common/persistence/tests/postgresql_test_util.go b/common/persistence/tests/postgresql_test_util.go index 57d2d34e199..049feed8df1 100644 --- a/common/persistence/tests/postgresql_test_util.go +++ b/common/persistence/tests/postgresql_test_util.go @@ -45,9 +45,9 @@ type ( } ) -func setUpPostgreSQLTest(t *testing.T, pluginName string) (PostgreSQLTestData, func()) { +func setUpPostgreSQLTest(t *testing.T, pluginName string, connectAttrs map[string]string) (PostgreSQLTestData, func()) { var testData PostgreSQLTestData - testData.Cfg = NewPostgreSQLConfig(pluginName) + testData.Cfg = NewPostgreSQLConfig(pluginName, connectAttrs) testData.Logger = log.NewZapLogger(zaptest.NewLogger(t)) mh := metricstest.NewCaptureHandler() testData.Metrics = mh.StartCapture() @@ -73,7 +73,7 @@ func setUpPostgreSQLTest(t *testing.T, pluginName string) (PostgreSQLTestData, f } // NewPostgreSQLConfig returns a new MySQL config for test -func NewPostgreSQLConfig(pluginName string) *config.SQL { +func NewPostgreSQLConfig(pluginName string, connectAttrs map[string]string) *config.SQL { return &config.SQL{ User: testPostgreSQLUser, Password: testPostgreSQLPassword, @@ -81,9 +81,10 @@ func NewPostgreSQLConfig(pluginName string) *config.SQL { environment.GetPostgreSQLAddress(), strconv.Itoa(environment.GetPostgreSQLPort()), ), - ConnectProtocol: testPostgreSQLConnectionProtocol, - PluginName: pluginName, - DatabaseName: testPostgreSQLDatabaseNamePrefix + shuffle.String(testPostgreSQLDatabaseNameSuffix), + ConnectProtocol: testPostgreSQLConnectionProtocol, + PluginName: pluginName, + DatabaseName: testPostgreSQLDatabaseNamePrefix + shuffle.String(testPostgreSQLDatabaseNameSuffix), + ConnectAttributes: connectAttrs, } } From 824f28dc235ccfbaa7ca9b8e58c115b175412b2b Mon Sep 17 00:00:00 2001 From: Rodrigo Zhou Date: Thu, 14 May 2026 11:46:14 -0700 Subject: [PATCH 25/73] Add flag to skip setting up ES cluster settings (#10152) ## What changed? Add flag to skip setting up ES cluster settings. Eg: `temporal-elasticsearch-tool setup-schema --skip-cluster-settings` ## Why? Allow users to not use our provided cluster settings (eg: avoid overwriting their cluster settings). https://github.com/temporalio/temporal/issues/9857 ## How did you test it? - [x] built - [x] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) ## Potential risks --- tools/elasticsearch/handler.go | 11 +++++++---- tools/elasticsearch/main.go | 22 ++++++++++++++-------- tools/elasticsearch/tasks.go | 2 +- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/tools/elasticsearch/handler.go b/tools/elasticsearch/handler.go index 83fe1b4a65a..352fc6995c9 100644 --- a/tools/elasticsearch/handler.go +++ b/tools/elasticsearch/handler.go @@ -48,10 +48,13 @@ func setupSchema(cli *cli.Context, logger log.Logger) error { return err } - settingsContent, err := schema.ElasticsearchClusterSettings() - if err != nil { - logger.Error("Unable to load embedded cluster settings.", tag.Error(err)) - return err + settingsContent := "" + if !cli.Bool(CLIOptSkipClusterSettings) { + settingsContent, err = schema.ElasticsearchClusterSettings() + if err != nil { + logger.Error("Unable to load embedded cluster settings.", tag.Error(err)) + return err + } } templateContent, err := schema.ElasticsearchIndexTemplate() diff --git a/tools/elasticsearch/main.go b/tools/elasticsearch/main.go index 1408ac470d5..21fb75cfc6f 100644 --- a/tools/elasticsearch/main.go +++ b/tools/elasticsearch/main.go @@ -9,15 +9,17 @@ import ( ) const ( - CLIOptVisibilityIndex = "index" - CLIOptAWSCredentials = "aws-credentials" - CLIOptAWSToken = "aws-session-token" - CLIOptFailSilently = "fail" + CLIOptVisibilityIndex = "index" + CLIOptAWSCredentials = "aws-credentials" + CLIOptAWSToken = "aws-session-token" + CLIOptFailSilently = "fail" + CLIOptSkipClusterSettings = "skip-cluster-settings" - CLIFlagVisibilityIndex = CLIOptVisibilityIndex + ", i" - CLIFlagAWSToken = CLIOptAWSToken - CLIFlagAWSCredentials = CLIOptAWSCredentials + ", aws" - CLIFlagFailSilently = CLIOptFailSilently + CLIFlagVisibilityIndex = CLIOptVisibilityIndex + ", i" + CLIFlagAWSToken = CLIOptAWSToken + CLIFlagAWSCredentials = CLIOptAWSCredentials + ", aws" + CLIFlagFailSilently = CLIOptFailSilently + CLIFlagSkipClusterSettings = CLIOptSkipClusterSettings ) // RunTool runs the temporal-elasticsearch-tool command line tool @@ -127,6 +129,10 @@ func BuildCLIOptions() *cli.App { Name: CLIFlagFailSilently, Usage: "fail silently on HTTP errors", }, + cli.BoolFlag{ + Name: CLIFlagSkipClusterSettings, + Usage: "skip setting up cluster settings", + }, }, Action: func(c *cli.Context) error { cliHandler(c, setupSchema, logger) diff --git a/tools/elasticsearch/tasks.go b/tools/elasticsearch/tasks.go index c10fc21162b..4e013416d3e 100644 --- a/tools/elasticsearch/tasks.go +++ b/tools/elasticsearch/tasks.go @@ -31,7 +31,7 @@ type SetupTask struct { func (task *SetupTask) setupClusterSettings() error { config := task.config if len(config.SettingsContent) == 0 { - task.logger.Info("Skipping cluster settings update, no embedded settings content") + task.logger.Info("Skipping cluster settings update") return nil } From cbc9ea327f14a98fe2c7289db12d049463bc9806 Mon Sep 17 00:00:00 2001 From: Prathyush PV Date: Thu, 14 May 2026 13:29:21 -0700 Subject: [PATCH 26/73] Restore operator rate burst to base burst (#10271) ## What changed? Revert `OperatorRateBurstImpl.Burst()` to `baseRateBurstFn.Burst()`. ## Why? We don't have to reduce the burst value for operator priority ## How did you test it? - [x] built - [x] covered by existing tests --- common/quotas/rate_burst.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/quotas/rate_burst.go b/common/quotas/rate_burst.go index 3c86b89ed11..ccd8982e8af 100644 --- a/common/quotas/rate_burst.go +++ b/common/quotas/rate_burst.go @@ -202,5 +202,5 @@ func (c *OperatorRateBurstImpl) Rate() float64 { } func (c *OperatorRateBurstImpl) Burst() int { - return int(c.operatorRateRatio() * float64(c.baseRateBurstFn.Burst())) + return c.baseRateBurstFn.Burst() } From 48c15307364160eef53fabd855daa4da25f91148 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Thu, 14 May 2026 13:37:46 -0700 Subject: [PATCH 27/73] Validate user metadata of StartNexusOperationExecutionRequest (#10257) ## What changed? Added validation of the user metadata in the `StartNexusOperationExecutionRequest`. ## Why? All other fields are validated. ## How did you test it? - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [x] added new unit test(s) - [ ] added new functional test(s) --- chasm/lib/nexusoperation/config.go | 4 ++++ chasm/lib/nexusoperation/validator.go | 15 +++++++++++++++ chasm/lib/nexusoperation/validator_test.go | 21 +++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/chasm/lib/nexusoperation/config.go b/chasm/lib/nexusoperation/config.go index 2159e25b86a..69fc695a5eb 100644 --- a/chasm/lib/nexusoperation/config.go +++ b/chasm/lib/nexusoperation/config.go @@ -235,6 +235,8 @@ type Config struct { CallbackURLTemplate dynamicconfig.TypedPropertyFn[*template.Template] UseSystemCallbackURL dynamicconfig.BoolPropertyFn PayloadSizeLimitWarn dynamicconfig.IntPropertyFnWithNamespaceFilter + MaxUserMetadataSummarySize dynamicconfig.IntPropertyFnWithNamespaceFilter + MaxUserMetadataDetailsSize dynamicconfig.IntPropertyFnWithNamespaceFilter UseNewFailureWireFormat dynamicconfig.BoolPropertyFnWithNamespaceFilter RecordCancelRequestCompletionEvents dynamicconfig.BoolPropertyFn VisibilityMaxPageSize dynamicconfig.IntPropertyFnWithNamespaceFilter @@ -262,6 +264,8 @@ func configProvider(dc *dynamicconfig.Collection, cfg *config.Persistence) *Conf MaxOperationScheduleToCloseTimeout: MaxOperationScheduleToCloseTimeout.Get(dc), PayloadSizeLimit: dynamicconfig.BlobSizeLimitError.Get(dc), PayloadSizeLimitWarn: dynamicconfig.BlobSizeLimitWarn.Get(dc), + MaxUserMetadataSummarySize: dynamicconfig.MaxUserMetadataSummarySize.Get(dc), + MaxUserMetadataDetailsSize: dynamicconfig.MaxUserMetadataDetailsSize.Get(dc), CallbackURLTemplate: CallbackURLTemplate.Get(dc), UseSystemCallbackURL: UseSystemCallbackURL.Get(dc), UseNewFailureWireFormat: UseNewFailureWireFormat.Get(dc), diff --git a/chasm/lib/nexusoperation/validator.go b/chasm/lib/nexusoperation/validator.go index 181e06e0b93..de9028f5a7b 100644 --- a/chasm/lib/nexusoperation/validator.go +++ b/chasm/lib/nexusoperation/validator.go @@ -145,6 +145,21 @@ func validateAndNormalizeStartRequest( inputSize, config.PayloadSizeLimit(ns)) } + if summary := req.GetUserMetadata().GetSummary(); summary != nil && summary.Size() > config.MaxUserMetadataSummarySize(ns) { + return serviceerror.NewInvalidArgumentf( + "user_metadata.summary exceeds size limit. Length=%d Limit=%d", + summary.Size(), + config.MaxUserMetadataSummarySize(ns), + ) + } + if details := req.GetUserMetadata().GetDetails(); details != nil && details.Size() > config.MaxUserMetadataDetailsSize(ns) { + return serviceerror.NewInvalidArgumentf( + "user_metadata.details exceeds size limit. Length=%d Limit=%d", + details.Size(), + config.MaxUserMetadataDetailsSize(ns), + ) + } + loweredHeaders, err := ValidateAndLowercaseNexusHeaders(req.GetNexusHeader(), config.DisallowedOperationHeaders(), config.MaxOperationHeaderSize(ns)) if err != nil { return serviceerror.NewInvalidArgument(err.Error()) diff --git a/chasm/lib/nexusoperation/validator_test.go b/chasm/lib/nexusoperation/validator_test.go index 63bebd75504..ebb9d7f1d96 100644 --- a/chasm/lib/nexusoperation/validator_test.go +++ b/chasm/lib/nexusoperation/validator_test.go @@ -8,6 +8,7 @@ import ( "github.com/stretchr/testify/require" commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" + sdkpb "go.temporal.io/api/sdk/v1" "go.temporal.io/api/serviceerror" "go.temporal.io/api/workflowservice/v1" persistencespb "go.temporal.io/server/api/persistence/v1" @@ -43,6 +44,8 @@ func TestValidateStartNexusOperationExecutionRequest(t *testing.T) { MaxOperationNameLength: func(string) int { return 10 }, PayloadSizeLimit: func(string) int { return 20 }, PayloadSizeLimitWarn: func(string) int { return 10 }, + MaxUserMetadataSummarySize: func(string) int { return 10 }, + MaxUserMetadataDetailsSize: func(string) int { return 20 }, MaxOperationHeaderSize: func(string) int { return 10 }, DisallowedOperationHeaders: func() []string { return []string{"disallowed-header"} }, MaxOperationScheduleToCloseTimeout: func(string) time.Duration { return time.Hour }, @@ -241,6 +244,24 @@ func TestValidateStartNexusOperationExecutionRequest(t *testing.T) { }, errMsg: "input exceeds size limit", }, + { + name: "user_metadata.summary - exceeds size limit", + mutate: func(r *workflowservice.StartNexusOperationExecutionRequest) { + r.UserMetadata = &sdkpb.UserMetadata{ + Summary: &commonpb.Payload{Data: []byte("too-long-summary")}, + } + }, + errMsg: "user_metadata.summary exceeds size limit", + }, + { + name: "user_metadata.details - exceeds size limit", + mutate: func(r *workflowservice.StartNexusOperationExecutionRequest) { + r.UserMetadata = &sdkpb.UserMetadata{ + Details: &commonpb.Payload{Data: []byte("this-details-payload-is-too-long")}, + } + }, + errMsg: "user_metadata.details exceeds size limit", + }, { name: "nexus_header - disallowed key", mutate: func(r *workflowservice.StartNexusOperationExecutionRequest) { From 4be69e5f46059b342762140d34363e24297bccb2 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Thu, 14 May 2026 13:50:08 -0700 Subject: [PATCH 28/73] long poll timeout constants (#10246) ## What changed? Introduces shared constants for default long poll timeout/buffer and applies them to SAA and SANO. ## Why? After a long internal technical discussion, 60s was determined as the default for the timeout. --- chasm/lib/activity/config.go | 7 +++---- chasm/lib/nexusoperation/config.go | 5 +++-- common/constants.go | 12 +++++++++--- service/history/api/get_workflow_util.go | 5 +---- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/chasm/lib/activity/config.go b/chasm/lib/activity/config.go index 9c838f5d78e..79f86946843 100644 --- a/chasm/lib/activity/config.go +++ b/chasm/lib/activity/config.go @@ -1,9 +1,8 @@ package activity import ( - "time" - "go.temporal.io/server/chasm/lib/callback" + "go.temporal.io/server/common" "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/retrypolicy" ) @@ -17,13 +16,13 @@ var ( LongPollTimeout = dynamicconfig.NewNamespaceDurationSetting( "activity.longPollTimeout", - 20*time.Second, + common.DefaultLongPollTimeout, `Timeout for activity long-poll requests.`, ) LongPollBuffer = dynamicconfig.NewNamespaceDurationSetting( "activity.longPollBuffer", - time.Second, + common.DefaultLongPollBuffer, `A buffer used to adjust the activity long-poll timeouts. Specifically, activity long-poll requests are timed out at a time which leaves at least the buffer's duration remaining before the caller's deadline, if permitted by the caller's deadline.`, diff --git a/chasm/lib/nexusoperation/config.go b/chasm/lib/nexusoperation/config.go index 69fc695a5eb..990d8dbef03 100644 --- a/chasm/lib/nexusoperation/config.go +++ b/chasm/lib/nexusoperation/config.go @@ -6,6 +6,7 @@ import ( "text/template" "time" + "go.temporal.io/server/common" "go.temporal.io/server/common/backoff" "go.temporal.io/server/common/config" "go.temporal.io/server/common/dynamicconfig" @@ -15,14 +16,14 @@ import ( var LongPollTimeout = dynamicconfig.NewNamespaceDurationSetting( "nexusoperation.longPollTimeout", - 20*time.Second, + common.DefaultLongPollTimeout, `Maximum timeout for nexus operation long-poll requests. Actual wait may be shorter to leave longPollBuffer before the caller deadline.`, ) var LongPollBuffer = dynamicconfig.NewNamespaceDurationSetting( "nexusoperation.longPollBuffer", - time.Second, + common.DefaultLongPollBuffer, `A buffer used to adjust the nexus operation long-poll timeouts. Specifically, nexus operation long-poll requests are timed out at a time which leaves at least the buffer's duration remaining before the caller's deadline, if permitted by the caller's deadline.`, diff --git a/common/constants.go b/common/constants.go index fc506ee594c..d40c79fdcaa 100644 --- a/common/constants.go +++ b/common/constants.go @@ -31,10 +31,16 @@ const ( ) const ( - // MinLongPollTimeout is the minimum context timeout for long poll API, below which - // the request won't be processed + // DefaultLongPollTimeout is the default context timeout for a long poll request. + DefaultLongPollTimeout = time.Second * 60 + // DefaultLongPollBuffer is the buffer used to adjust a long poll request timeout. + // Specifically, long poll requests are timed out at a time which leaves at least the buffer's duration + // remaining before the caller's deadline, if permitted by the caller's deadline. + DefaultLongPollBuffer = time.Second + // MinLongPollTimeout is the minimum context timeout for a long poll request, below which + // the request won't be processed. MinLongPollTimeout = time.Second * 2 - // CriticalLongPollTimeout is a threshold for the context timeout passed into long poll API, + // CriticalLongPollTimeout is a threshold for the context timeout passed into a long poll request, // below which a warning will be logged CriticalLongPollTimeout = time.Second * 10 ) diff --git a/service/history/api/get_workflow_util.go b/service/history/api/get_workflow_util.go index f2a8b380120..900b59d75b9 100644 --- a/service/history/api/get_workflow_util.go +++ b/service/history/api/get_workflow_util.go @@ -3,7 +3,6 @@ package api import ( "context" "fmt" - "time" commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" @@ -25,8 +24,6 @@ import ( historyi "go.temporal.io/server/service/history/interfaces" ) -const longPollSoftTimeout = time.Second - //nolint:revive // cognitive complexity 39 (> max enabled 25) func GetOrPollWorkflowMutableState( ctx context.Context, @@ -191,7 +188,7 @@ func GetOrPollWorkflowMutableState( // Send back response just before caller context would time out. longPollInterval := shardContext.GetConfig().LongPollExpirationInterval(namespaceRegistry.Name().String()) - longPollCtx, cancel := contextutil.WithDeadlineBuffer(ctx, longPollInterval, longPollSoftTimeout) + longPollCtx, cancel := contextutil.WithDeadlineBuffer(ctx, longPollInterval, common.DefaultLongPollBuffer) defer cancel() for { From d6877f169ee6ee0961186b6e4239b22736b40cc0 Mon Sep 17 00:00:00 2001 From: Jacob Moody Date: Thu, 14 May 2026 16:08:34 -0500 Subject: [PATCH 29/73] metric for dispatched tasks (#10180) ## What changed? New counter for dispatched tasks, tagged with their dispatch result. ## Why? Recent debugging around incidents has made us desire this information. --- common/metrics/metric_defs.go | 10 +++++--- common/metrics/tags.go | 13 +++++++++++ .../matching/physical_task_queue_manager.go | 9 ++++++++ .../physical_task_queue_manager_interface.go | 4 ++++ .../physical_task_queue_manager_mock.go | 13 +++++++++++ .../matching/task_queue_partition_manager.go | 23 ++++++++++++++++++- 6 files changed, 68 insertions(+), 4 deletions(-) diff --git a/common/metrics/metric_defs.go b/common/metrics/metric_defs.go index cd4d2ea0b31..c4d806c277f 100644 --- a/common/metrics/metric_defs.go +++ b/common/metrics/metric_defs.go @@ -1204,9 +1204,13 @@ var ( LoadedPhysicalTaskQueueGauge = NewGaugeDef("loaded_physical_task_queue_count") TaskQueueStartedCounter = NewCounterDef("task_queue_started") TaskQueueStoppedCounter = NewCounterDef("task_queue_stopped") - TaskWriteThrottlePerTaskQueueCounter = NewCounterDef("task_write_throttle_count") - TaskWriteLatencyPerTaskQueue = NewTimerDef("task_write_latency") - TaskRewrites = NewCounterDef( + TasksAddedCounter = NewCounterDef( + "tasks_added", + WithDescription("Number of tasks arriving at a physical task queue, broken down by add result, forwarding, and versioning behavior"), + ) + TaskWriteThrottlePerTaskQueueCounter = NewCounterDef("task_write_throttle_count") + TaskWriteLatencyPerTaskQueue = NewTimerDef("task_write_latency") + TaskRewrites = NewCounterDef( "task_rewrites", WithDescription("Number of times tasks are rewritten to persistence after failing to process"), ) diff --git a/common/metrics/tags.go b/common/metrics/tags.go index 1f298605641..ea4e7350395 100644 --- a/common/metrics/tags.go +++ b/common/metrics/tags.go @@ -46,6 +46,7 @@ const ( replicationTaskType = "replicationTaskType" replicationTaskPriority = "replicationTaskPriority" taskExpireStage = "task_expire_stage" + taskAddResult = "task_add_result" versioningBehavior = "versioning_behavior" continueAsNewVersioningBehavior = "continue_as_new_versioning_behavior" suggestContinueAsNewReasonTooManyUpdates = "suggest_continue_as_new_reason_too_many_updates" @@ -315,6 +316,18 @@ func ForwardedTag(forwarded bool) Tag { return Tag{Key: forwardedTag, Value: strconv.FormatBool(forwarded)} } +const ( + TaskAddResultSyncMatch = "sync_match" + TaskAddResultSyncMatchUnavail = "sync_match_unavailable" + TaskAddResultBacklog = "backlog" + TaskAddResultThrottled = "throttled" + TaskAddResultFailure = "failure" +) + +func TaskAddResultTag(result string) Tag { + return Tag{Key: taskAddResult, Value: result} +} + func MatchingTaskPriorityTag(value int32) Tag { priStr := "" if value != 0 { diff --git a/service/matching/physical_task_queue_manager.go b/service/matching/physical_task_queue_manager.go index 1f5a8ab4002..281894d3b0f 100644 --- a/service/matching/physical_task_queue_manager.go +++ b/service/matching/physical_task_queue_manager.go @@ -451,6 +451,15 @@ func (c *physicalTaskQueueManagerImpl) SpoolTask(taskInfo *persistencespb.TaskIn return c.backlogMgr.SpoolTask(taskInfo) } +func (c *physicalTaskQueueManagerImpl) RecordTaskAdd(result string, forwarded bool, behavior enumspb.VersioningBehavior) { + c.metricsHandler.Counter(metrics.TasksAddedCounter.Name()).Record( + 1, + metrics.TaskAddResultTag(result), + metrics.ForwardedTag(forwarded), + metrics.VersioningBehaviorTag(behavior), + ) +} + // PollTask blocks waiting for a task. // Returns error when context deadline is exceeded // maxDispatchPerSecond is the max rate at which tasks are allowed diff --git a/service/matching/physical_task_queue_manager_interface.go b/service/matching/physical_task_queue_manager_interface.go index 2fe10565824..6dbeabe4672 100644 --- a/service/matching/physical_task_queue_manager_interface.go +++ b/service/matching/physical_task_queue_manager_interface.go @@ -6,6 +6,7 @@ import ( "context" "time" + enumspb "go.temporal.io/api/enums/v1" taskqueuepb "go.temporal.io/api/taskqueue/v1" "go.temporal.io/server/api/matchingservice/v1" persistencespb "go.temporal.io/server/api/persistence/v1" @@ -65,5 +66,8 @@ type ( // GetFairnessWeightOverrides returns current fairness weight overrides for this queue. GetFairnessWeightOverrides() fairnessWeightOverrides UpdateRemotePriorityBacklogs(remotePriorityBacklogSet) + // RecordTaskAdd records the outcome of a task add to this physical queue using + // the queue's tagged metrics handler, so all per-physical-queue labels are included. + RecordTaskAdd(result string, forwarded bool, behavior enumspb.VersioningBehavior) } ) diff --git a/service/matching/physical_task_queue_manager_mock.go b/service/matching/physical_task_queue_manager_mock.go index 86d63f8fce4..5a899fda3ab 100644 --- a/service/matching/physical_task_queue_manager_mock.go +++ b/service/matching/physical_task_queue_manager_mock.go @@ -14,6 +14,7 @@ import ( reflect "reflect" time "time" + enums "go.temporal.io/api/enums/v1" taskqueue "go.temporal.io/api/taskqueue/v1" matchingservice "go.temporal.io/server/api/matchingservice/v1" persistence "go.temporal.io/server/api/persistence/v1" @@ -282,6 +283,18 @@ func (mr *MockphysicalTaskQueueManagerMockRecorder) QueueKey() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "QueueKey", reflect.TypeOf((*MockphysicalTaskQueueManager)(nil).QueueKey)) } +// RecordTaskAdd mocks base method. +func (m *MockphysicalTaskQueueManager) RecordTaskAdd(result string, forwarded bool, behavior enums.VersioningBehavior) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "RecordTaskAdd", result, forwarded, behavior) +} + +// RecordTaskAdd indicates an expected call of RecordTaskAdd. +func (mr *MockphysicalTaskQueueManagerMockRecorder) RecordTaskAdd(result, forwarded, behavior any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RecordTaskAdd", reflect.TypeOf((*MockphysicalTaskQueueManager)(nil).RecordTaskAdd), result, forwarded, behavior) +} + // RemovePoller mocks base method. func (m *MockphysicalTaskQueueManager) RemovePoller(arg0 pollerIdentity) { m.ctrl.T.Helper() diff --git a/service/matching/task_queue_partition_manager.go b/service/matching/task_queue_partition_manager.go index 0b70f9434e3..c06a3be70c0 100644 --- a/service/matching/task_queue_partition_manager.go +++ b/service/matching/task_queue_partition_manager.go @@ -439,6 +439,9 @@ reredirectTask: return "", false, err } + behavior := directive.GetBehavior() + forwarded := params.forwardInfo != nil + var outcome syncMatchOutcome if isActive { outcome, err = syncMatchQueue.TrySyncMatch(ctx, syncMatchTask) @@ -446,10 +449,16 @@ reredirectTask: if syncMatched && !pm.shouldBacklogSyncMatchTaskOnError(err) { // Only fire hooks for non-forwarded tasks. Forwarded tasks already had hooks fired // on the child partition that originally received the task. - if params.forwardInfo == nil { + if !forwarded { pm.processTaskAddHooks(ctx, targetVersion, outcome) } + syncMatchResult := metrics.TaskAddResultSyncMatch + if err != nil { + syncMatchResult = taskAddErrResult(err) + } + syncMatchQueue.RecordTaskAdd(syncMatchResult, forwarded, behavior) + // Build ID is not returned for sync match. The returned build ID is used by History to update // mutable state (and visibility) when the first workflow task is spooled. // For sync-match case, History has already received the build ID in the Record*TaskStarted call. @@ -465,6 +474,7 @@ reredirectTask: if spoolQueue == nil { // This means the task is being forwarded. Child partition will persist the task when sync match fails. + syncMatchQueue.RecordTaskAdd(metrics.TaskAddResultSyncMatchUnavail, forwarded, behavior) return "", false, errRemoteSyncMatchFailed } @@ -476,7 +486,10 @@ reredirectTask: err = spoolQueue.SpoolTask(params.taskInfo) if err == nil { + spoolQueue.RecordTaskAdd(metrics.TaskAddResultBacklog, forwarded, behavior) pm.processTaskAddHooks(ctx, targetVersion, outcome) + } else { + spoolQueue.RecordTaskAdd(taskAddErrResult(err), forwarded, behavior) } return assignedBuildId, false, err @@ -506,6 +519,14 @@ func (pm *taskQueuePartitionManagerImpl) processTaskAddHooks(ctx context.Context } } +func taskAddErrResult(err error) string { + var resourceExhausted *serviceerror.ResourceExhausted + if errors.As(err, &resourceExhausted) { + return metrics.TaskAddResultThrottled + } + return metrics.TaskAddResultFailure +} + func (pm *taskQueuePartitionManagerImpl) shouldBacklogSyncMatchTaskOnError(err error) bool { var resourceExhaustedErr *serviceerror.ResourceExhausted if err != nil && errors.As(err, &resourceExhaustedErr) { From 545b896a285627a6c47363eb8be30e1d91bb319d Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Thu, 14 May 2026 15:09:13 -0600 Subject: [PATCH 30/73] fix: event metrics with missing tags (#10268) ## What changed? Add `WithTags(metrics.CommandType(Unspecified))` to the call sites for Workflow Update event_blob_size metric emission, which was missing in the pathch #10253 to the problem introduced in #10223 ## Why? Fixes a warning and dropped metric when the metrics handler has different shapes ## How did you test it? - [ ] built - [X] run locally and tested manually: Ran server with `make start`, created a simple workflow with an update handler, start the worker, start a workflow, and send the update. Grepped server logs for `error in prometheus reporter` and `event_blob_size` and curled the metrics endpoint `curl -s http://127.0.0.1:8000/metrics > /tmp/snapshot.txt` - [ ] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) ## Potential risks Minimal, fixing bug --- service/frontend/workflow_handler.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/service/frontend/workflow_handler.go b/service/frontend/workflow_handler.go index d98a304e687..47af481aa2a 100644 --- a/service/frontend/workflow_handler.go +++ b/service/frontend/workflow_handler.go @@ -786,7 +786,7 @@ func (wh *WorkflowHandler) ExecuteMultiOperation( return nil, errMultiOpNotStartAndUpdate } - metrics.EventBlobSize.With(wh.metricsScope(ctx)).Record(int64(request.Operations[1].GetUpdateWorkflow().GetRequest().GetInput().GetArgs().Size()), metrics.OperationTag("UpdateWorkflowExecution")) + metrics.EventBlobSize.With(wh.metricsScope(ctx).WithTags(metrics.CommandTypeTag(enumspb.COMMAND_TYPE_UNSPECIFIED.String()))).Record(int64(request.Operations[1].GetUpdateWorkflow().GetRequest().GetInput().GetArgs().Size()), metrics.OperationTag("UpdateWorkflowExecution")) historyReq, err := wh.convertToHistoryMultiOperationRequest(ctx, namespaceID, request) if err != nil { @@ -5332,7 +5332,7 @@ func (wh *WorkflowHandler) UpdateWorkflowExecution( metricsHandler := wh.metricsScope(ctx).WithTags(metrics.HeaderCallsiteTag("UpdateWorkflowExecution")) metrics.HeaderSize.With(metricsHandler).Record(int64(request.GetRequest().GetInput().GetHeader().Size())) - metrics.EventBlobSize.With(wh.metricsScope(ctx)).Record(int64(request.GetRequest().GetInput().GetArgs().Size()), metrics.OperationTag("UpdateWorkflowExecution")) + metrics.EventBlobSize.With(wh.metricsScope(ctx).WithTags(metrics.CommandTypeTag(enumspb.COMMAND_TYPE_UNSPECIFIED.String()))).Record(int64(request.GetRequest().GetInput().GetArgs().Size()), metrics.OperationTag("UpdateWorkflowExecution")) switch request.WaitPolicy.LifecycleStage { // nolint:exhaustive case enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED: From be2fab2a6f004c86cdae6dc3987b641c580f54d3 Mon Sep 17 00:00:00 2001 From: Roey Berman Date: Thu, 14 May 2026 15:02:34 -0700 Subject: [PATCH 31/73] Preserve metadata for json/plain payloads in nexus serializer (#10273) When a payload with json/plain encoding had additional metadata fields, those fields were silently dropped during nexus serialization. Fall back to x-temporal-payload to preserve the full payload, matching the behavior already in place for other encodings. --- common/nexus/payload_serializer.go | 3 +++ common/nexus/payload_serializer_test.go | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/common/nexus/payload_serializer.go b/common/nexus/payload_serializer.go index ff4a86438b8..dce81b0d8c3 100644 --- a/common/nexus/payload_serializer.go +++ b/common/nexus/payload_serializer.go @@ -139,6 +139,9 @@ func (payloadSerializer) Serialize(v any) (*nexus.Content, error) { } content.Header["type"] = fmt.Sprintf("application/x-protobuf; message-type=%q", messageType) case "json/plain": + if len(payload.Metadata) != 1 { + return xTemporalPayload(payload) + } content.Header["type"] = "application/json" case "binary/null": if len(payload.Metadata) != 1 { diff --git a/common/nexus/payload_serializer_test.go b/common/nexus/payload_serializer_test.go index bf83d2ee015..13e78dde25e 100644 --- a/common/nexus/payload_serializer_test.go +++ b/common/nexus/payload_serializer_test.go @@ -123,6 +123,17 @@ func TestNexusPayloadSerializer(t *testing.T) { }, header: nexus.Header{"type": "application/x-temporal-payload"}, }, + { + name: "json/plain with non-standard metadata field", + inputPayload: &commonpb.Payload{ + Data: []byte(`"data"`), + Metadata: map[string][]byte{ + "encoding": []byte("json/plain"), + "non-standard": []byte("value"), + }, + }, + header: nexus.Header{"type": "application/x-temporal-payload"}, + }, { name: "nexus content with non-standard header", inputPayload: &commonpb.Payload{ From 96ce01cc4f9a3e6e774ccff59345c0ddc3a1ef38 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Thu, 14 May 2026 17:08:25 -0700 Subject: [PATCH 32/73] config: sqlite + cluster address env overrides for embedded template (#10229) ## What Extend `common/config/config_template_embedded.yaml` with (1) SQLite option (2) option to override `rpcAddress` and/or `httpAddress` ## Why https://github.com/temporalio/omes needs to be able to start a Temporal server with sqlite and custom addresses. See https://github.com/temporalio/omes/pull/348 --- common/config/config_template_embedded.yaml | 36 +++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/common/config/config_template_embedded.yaml b/common/config/config_template_embedded.yaml index 68c66ec0a96..f7b44358a60 100644 --- a/common/config/config_template_embedded.yaml +++ b/common/config/config_template_embedded.yaml @@ -144,6 +144,37 @@ persistence: keyFile: {{ default "" (env "SQL_CERT_KEY") }} enableHostVerification: {{ default "false" (env "SQL_HOST_VERIFICATION") }} serverName: {{ default "" (env "SQL_HOST_NAME") }} + {{- else if eq $db "sqlite" }} + default: + sql: + pluginName: "{{ $db }}" + databaseName: "{{ default "temporal" (env "DBNAME") }}" + connectAddr: "localhost" + connectProtocol: "tcp" + connectAttributes: + mode: "{{ default "rwc" (env "SQLITE_MODE") }}" + cache: "{{ default "private" (env "SQLITE_CACHE") }}" + setup: "{{ default "true" (env "SQLITE_SETUP") }}" + journal_mode: "{{ default "wal" (env "SQLITE_JOURNAL_MODE") }}" + synchronous: "{{ default "2" (env "SQLITE_SYNCHRONOUS") }}" + busy_timeout: "{{ default "10000" (env "SQLITE_BUSY_TIMEOUT") }}" + maxConns: {{ default "1" (env "SQL_MAX_CONNS") }} + maxIdleConns: {{ default "1" (env "SQL_MAX_IDLE_CONNS") }} + visibility: + sql: + pluginName: "{{ $db }}" + databaseName: "{{ default "temporal_visibility" (env "VISIBILITY_DBNAME") }}" + connectAddr: "localhost" + connectProtocol: "tcp" + connectAttributes: + mode: "{{ default "rwc" (env "SQLITE_MODE") }}" + cache: "{{ default "private" (env "SQLITE_CACHE") }}" + setup: "{{ default "true" (env "SQLITE_SETUP") }}" + journal_mode: "{{ default "wal" (env "SQLITE_JOURNAL_MODE") }}" + synchronous: "{{ default "2" (env "SQLITE_SYNCHRONOUS") }}" + busy_timeout: "{{ default "10000" (env "SQLITE_BUSY_TIMEOUT") }}" + maxConns: {{ default "1" (env "SQL_VIS_MAX_CONNS") }} + maxIdleConns: {{ default "1" (env "SQL_VIS_MAX_IDLE_CONNS") }} {{- end }} {{- if eq $es "true" }} es-visibility: @@ -272,6 +303,7 @@ global: {{- $temporalGrpcPort := default "7233" (env "FRONTEND_GRPC_PORT") }} {{- $temporalHTTPPort := default "7243" (env "FRONTEND_HTTP_PORT") }} {{- $temporalInternalHTTPPort := default "7246" (env "INTERNAL_FRONTEND_HTTP_PORT") }} +{{- $temporalClusterAddress := default (default "127.0.0.1" (env "BIND_ON_IP")) (env "TEMPORAL_BROADCAST_ADDRESS") }} services: frontend: rpc: @@ -317,8 +349,8 @@ clusterMetadata: enabled: true initialFailoverVersion: 1 rpcName: "frontend" - rpcAddress: {{ (print "127.0.0.1:" $temporalGrpcPort) }} - httpAddress: {{ (print "127.0.0.1:" $temporalHTTPPort) }} + rpcAddress: {{ default (print $temporalClusterAddress ":" $temporalGrpcPort) (env "CLUSTER_RPC_ADDRESS") }} + httpAddress: {{ default (print $temporalClusterAddress ":" $temporalHTTPPort) (env "CLUSTER_HTTP_ADDRESS") }} dcRedirectionPolicy: policy: "noop" From d2ed7f156e56f86db53953c3e80a4c298557839e Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Thu, 14 May 2026 21:35:52 -0700 Subject: [PATCH 33/73] DescribeNexusOperationRequest as long poll category (#10256) ## What changed? Mark `DescribeNexusOperationRequest` as long poll API category. ## Why? I response to https://github.com/temporalio/temporal/pull/10192#discussion_r3222982198 --- chasm/lib/nexusoperation/gen/nexusoperationpb/v1/service.pb.go | 2 +- chasm/lib/nexusoperation/proto/v1/service.proto | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/chasm/lib/nexusoperation/gen/nexusoperationpb/v1/service.pb.go b/chasm/lib/nexusoperation/gen/nexusoperationpb/v1/service.pb.go index 115d4ecd863..44b56e90792 100644 --- a/chasm/lib/nexusoperation/gen/nexusoperationpb/v1/service.pb.go +++ b/chasm/lib/nexusoperation/gen/nexusoperationpb/v1/service.pb.go @@ -30,7 +30,7 @@ const file_temporal_server_chasm_lib_nexusoperation_proto_v1_service_proto_rawDe "?temporal/server/chasm/lib/nexusoperation/proto/v1/service.proto\x121temporal.server.chasm.lib.nexusoperation.proto.v1\x1aHtemporal/server/chasm/lib/nexusoperation/proto/v1/request_response.proto\x1a0temporal/server/api/common/v1/api_category.proto\x1a.temporal/server/api/routing/v1/extension.proto2\x90\v\n" + "\x15NexusOperationService\x12\xdf\x01\n" + "\x13StartNexusOperation\x12M.temporal.server.chasm.lib.nexusoperation.proto.v1.StartNexusOperationRequest\x1aN.temporal.server.chasm.lib.nexusoperation.proto.v1.StartNexusOperationResponse\")\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1f\x1a\x1dfrontend_request.operation_id\x12\xe8\x01\n" + - "\x16DescribeNexusOperation\x12P.temporal.server.chasm.lib.nexusoperation.proto.v1.DescribeNexusOperationRequest\x1aQ.temporal.server.chasm.lib.nexusoperation.proto.v1.DescribeNexusOperationResponse\")\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1f\x1a\x1dfrontend_request.operation_id\x12\xf7\x01\n" + + "\x16DescribeNexusOperation\x12P.temporal.server.chasm.lib.nexusoperation.proto.v1.DescribeNexusOperationRequest\x1aQ.temporal.server.chasm.lib.nexusoperation.proto.v1.DescribeNexusOperationResponse\")\x8a\xb5\x18\x02\b\x02\xd2\xc3\x18\x1f\x1a\x1dfrontend_request.operation_id\x12\xf7\x01\n" + "\x1bRequestCancelNexusOperation\x12U.temporal.server.chasm.lib.nexusoperation.proto.v1.RequestCancelNexusOperationRequest\x1aV.temporal.server.chasm.lib.nexusoperation.proto.v1.RequestCancelNexusOperationResponse\")\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1f\x1a\x1dfrontend_request.operation_id\x12\xeb\x01\n" + "\x17TerminateNexusOperation\x12Q.temporal.server.chasm.lib.nexusoperation.proto.v1.TerminateNexusOperationRequest\x1aR.temporal.server.chasm.lib.nexusoperation.proto.v1.TerminateNexusOperationResponse\")\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1f\x1a\x1dfrontend_request.operation_id\x12\xe2\x01\n" + "\x14DeleteNexusOperation\x12N.temporal.server.chasm.lib.nexusoperation.proto.v1.DeleteNexusOperationRequest\x1aO.temporal.server.chasm.lib.nexusoperation.proto.v1.DeleteNexusOperationResponse\")\x8a\xb5\x18\x02\b\x01\xd2\xc3\x18\x1f\x1a\x1dfrontend_request.operation_id\x12\xdc\x01\n" + diff --git a/chasm/lib/nexusoperation/proto/v1/service.proto b/chasm/lib/nexusoperation/proto/v1/service.proto index c71599abdfb..f7e767e484c 100644 --- a/chasm/lib/nexusoperation/proto/v1/service.proto +++ b/chasm/lib/nexusoperation/proto/v1/service.proto @@ -16,7 +16,7 @@ service NexusOperationService { rpc DescribeNexusOperation(DescribeNexusOperationRequest) returns (DescribeNexusOperationResponse) { option (temporal.server.api.routing.v1.routing).business_id = "frontend_request.operation_id"; - option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_LONG_POLL; } rpc RequestCancelNexusOperation(RequestCancelNexusOperationRequest) returns (RequestCancelNexusOperationResponse) { From 27115b584d2520ff303ab2c2043eb2bc94e7172b Mon Sep 17 00:00:00 2001 From: Long Tran Date: Fri, 15 May 2026 13:05:55 -0400 Subject: [PATCH 34/73] Propagate backlinks on Signal and Signal-with-Start responses (#9897) ## What changed? ### **High level** With https://github.com/temporalio/api/pull/761 to add the linking on the Signal and Signal-with-Start responses, This PR adds logic from the server that: * Adds `requestID` from Signal and Signal-with-Start requests to the CHASM workflow tree under a new map field `IncomingSignals`, and event store, so these requestIDs stay in buffer * Return a backlink in the response that references the `requestID` * On buffer flush to the DB transaction, attach these `requestID` to a concrete `eventID`, which would allow users to later know which event correlated w/ this request. We will wire the concrete event ID to the signal request IDs stored in the workflow component CHASM tree (`IncomingSignals` map) > [!NOTE] > Feature is gated behind a new dynamicconfig `EnableCHASMSignalBacklinks`, which implicitly is only checked if `EnableChasm` is enabled. ## Why? This will enable the caller of the signal to have a backlink to the cross-namespace signal invoked, which will become more relevant for Nexus SDK ergonomics. ## How did you test it? - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [x] added new functional test(s) In functional tests, I augmented existing tests for Signal and Signal-with-Start to: * Ensure that backlink is returned via the responses * Later use `DescribeWorkflow` to ensure that we get a concrete EventID (mapped when buffer flushed) * Multiple signals with the same `requestID` gets de-duped ``` $ go test ./tests/ -run TestLinksTestSuite ok go.temporal.io/server/tests 1.486s ``` ``` $ go test ./tests/ -run 'TestNexusWorkflowTestSuite' -count=1 ok go.temporal.io/server/tests 4.714s ``` ## Potential risks Need to test end-to-end to see that the link shows up correctly in the Web UI. Feature is gated behind dynamicconfig since it requires CHASM-based workflow to be enabled. --- api/historyservice/v1/request_response.pb.go | 363 +++++++++--------- .../gen/workflowpb/v1/state.go-helpers.pb.go | 37 ++ .../workflow/gen/workflowpb/v1/state.pb.go | 55 ++- chasm/lib/workflow/proto/v1/state.proto | 6 + chasm/lib/workflow/workflow.go | 52 +++ common/dynamicconfig/constants.go | 9 + common/metrics/metric_defs.go | 60 +-- .../historyservice/v1/request_response.proto | 5 +- service/frontend/workflow_handler.go | 11 +- service/history/api/create_workflow_util.go | 1 + service/history/api/describeworkflow/api.go | 61 ++- service/history/api/link_util.go | 50 +++ service/history/api/multioperation/api.go | 16 +- .../api/removesignalmutablestate/api.go | 5 +- .../api/signalwithstartworkflow/api.go | 8 + .../signal_with_start_workflow.go | 1 + .../signal_with_start_workflow_test.go | 3 + service/history/api/signalworkflow/api.go | 16 +- service/history/api/startworkflow/api.go | 43 +-- service/history/configs/config.go | 6 +- service/history/history_engine_test.go | 231 +++++++---- .../history/historybuilder/event_factory.go | 2 + service/history/historybuilder/event_store.go | 6 + .../history/historybuilder/history_builder.go | 2 + .../history_builder_categorization_test.go | 24 ++ .../historybuilder/history_builder_test.go | 3 +- service/history/interfaces/mutable_state.go | 3 + .../history/interfaces/mutable_state_mock.go | 30 +- service/history/ndc/events_reapplier_test.go | 5 + service/history/ndc/workflow_resetter.go | 1 + service/history/ndc/workflow_resetter_test.go | 5 +- .../history/workflow/mutable_state_impl.go | 90 ++++- .../workflow/mutable_state_impl_test.go | 4 + .../workflow_test/mutable_state_impl_test.go | 1 + tests/links_test.go | 320 ++++++++++++++- tests/nexus_workflow_test.go | 36 +- tests/signal_workflow_test.go | 83 ++-- tests/update_workflow_test.go | 14 + 38 files changed, 1270 insertions(+), 398 deletions(-) create mode 100644 service/history/api/link_util.go diff --git a/api/historyservice/v1/request_response.pb.go b/api/historyservice/v1/request_response.pb.go index d2b58ab8900..46d476bcba2 100644 --- a/api/historyservice/v1/request_response.pb.go +++ b/api/historyservice/v1/request_response.pb.go @@ -2921,6 +2921,7 @@ func (x *SignalWorkflowExecutionRequest) GetChildWorkflowOnly() bool { type SignalWorkflowExecutionResponse struct { state protoimpl.MessageState `protogen:"open.v1"` + Link *v14.Link `protobuf:"bytes,1,opt,name=link,proto3" json:"link,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -2955,6 +2956,13 @@ func (*SignalWorkflowExecutionResponse) Descriptor() ([]byte, []int) { return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{33} } +func (x *SignalWorkflowExecutionResponse) GetLink() *v14.Link { + if x != nil { + return x.Link + } + return nil +} + type SignalWithStartWorkflowExecutionRequest struct { state protoimpl.MessageState `protogen:"open.v1"` NamespaceId string `protobuf:"bytes,1,opt,name=namespace_id,json=namespaceId,proto3" json:"namespace_id,omitempty"` @@ -3014,6 +3022,7 @@ type SignalWithStartWorkflowExecutionResponse struct { state protoimpl.MessageState `protogen:"open.v1"` RunId string `protobuf:"bytes,1,opt,name=run_id,json=runId,proto3" json:"run_id,omitempty"` Started bool `protobuf:"varint,2,opt,name=started,proto3" json:"started,omitempty"` + SignalLink *v14.Link `protobuf:"bytes,3,opt,name=signal_link,json=signalLink,proto3" json:"signal_link,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -3062,6 +3071,13 @@ func (x *SignalWithStartWorkflowExecutionResponse) GetStarted() bool { return false } +func (x *SignalWithStartWorkflowExecutionResponse) GetSignalLink() *v14.Link { + if x != nil { + return x.SignalLink + } + return nil +} + type RemoveSignalMutableStateRequest struct { state protoimpl.MessageState `protogen:"open.v1"` NamespaceId string `protobuf:"bytes,1,opt,name=namespace_id,json=namespaceId,proto3" json:"namespace_id,omitempty"` @@ -10715,14 +10731,17 @@ const file_temporal_server_api_historyservice_v1_request_response_proto_rawDesc "\fnamespace_id\x18\x01 \x01(\tR\vnamespaceId\x12f\n" + "\x0esignal_request\x18\x02 \x01(\v2?.temporal.api.workflowservice.v1.SignalWorkflowExecutionRequestR\rsignalRequest\x12i\n" + "\x1bexternal_workflow_execution\x18\x03 \x01(\v2).temporal.api.common.v1.WorkflowExecutionR\x19externalWorkflowExecution\x12.\n" + - "\x13child_workflow_only\x18\x04 \x01(\bR\x11childWorkflowOnly:3\x92\xc4\x03/*-signal_request.workflow_execution.workflow_id\"!\n" + - "\x1fSignalWorkflowExecutionResponse\"\xff\x01\n" + + "\x13child_workflow_only\x18\x04 \x01(\bR\x11childWorkflowOnly:3\x92\xc4\x03/*-signal_request.workflow_execution.workflow_id\"S\n" + + "\x1fSignalWorkflowExecutionResponse\x120\n" + + "\x04link\x18\x01 \x01(\v2\x1c.temporal.api.common.v1.LinkR\x04link\"\xff\x01\n" + "'SignalWithStartWorkflowExecutionRequest\x12!\n" + "\fnamespace_id\x18\x01 \x01(\tR\vnamespaceId\x12\x83\x01\n" + - "\x19signal_with_start_request\x18\x02 \x01(\v2H.temporal.api.workflowservice.v1.SignalWithStartWorkflowExecutionRequestR\x16signalWithStartRequest:+\x92\xc4\x03'*%signal_with_start_request.workflow_id\"[\n" + + "\x19signal_with_start_request\x18\x02 \x01(\v2H.temporal.api.workflowservice.v1.SignalWithStartWorkflowExecutionRequestR\x16signalWithStartRequest:+\x92\xc4\x03'*%signal_with_start_request.workflow_id\"\x9a\x01\n" + "(SignalWithStartWorkflowExecutionResponse\x12\x15\n" + "\x06run_id\x18\x01 \x01(\tR\x05runId\x12\x18\n" + - "\astarted\x18\x02 \x01(\bR\astarted\"\xe3\x01\n" + + "\astarted\x18\x02 \x01(\bR\astarted\x12=\n" + + "\vsignal_link\x18\x03 \x01(\v2\x1c.temporal.api.common.v1.LinkR\n" + + "signalLink\"\xe3\x01\n" + "\x1fRemoveSignalMutableStateRequest\x12!\n" + "\fnamespace_id\x18\x01 \x01(\tR\vnamespaceId\x12X\n" + "\x12workflow_execution\x18\x02 \x01(\v2).temporal.api.common.v1.WorkflowExecutionR\x11workflowExecution\x12\x1d\n" + @@ -11607,173 +11626,175 @@ var file_temporal_server_api_historyservice_v1_request_response_proto_depIdxs = 182, // 98: temporal.server.api.historyservice.v1.IsActivityTaskValidRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock 212, // 99: temporal.server.api.historyservice.v1.SignalWorkflowExecutionRequest.signal_request:type_name -> temporal.api.workflowservice.v1.SignalWorkflowExecutionRequest 186, // 100: temporal.server.api.historyservice.v1.SignalWorkflowExecutionRequest.external_workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 213, // 101: temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionRequest.signal_with_start_request:type_name -> temporal.api.workflowservice.v1.SignalWithStartWorkflowExecutionRequest - 186, // 102: temporal.server.api.historyservice.v1.RemoveSignalMutableStateRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 214, // 103: temporal.server.api.historyservice.v1.TerminateWorkflowExecutionRequest.terminate_request:type_name -> temporal.api.workflowservice.v1.TerminateWorkflowExecutionRequest - 186, // 104: temporal.server.api.historyservice.v1.TerminateWorkflowExecutionRequest.external_workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 186, // 105: temporal.server.api.historyservice.v1.DeleteWorkflowExecutionRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 215, // 106: temporal.server.api.historyservice.v1.ResetWorkflowExecutionRequest.reset_request:type_name -> temporal.api.workflowservice.v1.ResetWorkflowExecutionRequest - 216, // 107: temporal.server.api.historyservice.v1.RequestCancelWorkflowExecutionRequest.cancel_request:type_name -> temporal.api.workflowservice.v1.RequestCancelWorkflowExecutionRequest - 186, // 108: temporal.server.api.historyservice.v1.RequestCancelWorkflowExecutionRequest.external_workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 186, // 109: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 182, // 110: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskRequest.child_clock:type_name -> temporal.server.api.clock.v1.VectorClock - 182, // 111: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskRequest.parent_clock:type_name -> temporal.server.api.clock.v1.VectorClock - 186, // 112: temporal.server.api.historyservice.v1.VerifyFirstWorkflowTaskScheduledRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 182, // 113: temporal.server.api.historyservice.v1.VerifyFirstWorkflowTaskScheduledRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 186, // 114: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.parent_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 186, // 115: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.child_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 202, // 116: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.completion_event:type_name -> temporal.api.history.v1.HistoryEvent - 182, // 117: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 186, // 118: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedRequest.parent_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 186, // 119: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedRequest.child_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 182, // 120: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 217, // 121: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionRequest.request:type_name -> temporal.api.workflowservice.v1.DescribeWorkflowExecutionRequest - 218, // 122: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.execution_config:type_name -> temporal.api.workflow.v1.WorkflowExecutionConfig - 219, // 123: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.workflow_execution_info:type_name -> temporal.api.workflow.v1.WorkflowExecutionInfo - 220, // 124: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_activities:type_name -> temporal.api.workflow.v1.PendingActivityInfo - 221, // 125: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_children:type_name -> temporal.api.workflow.v1.PendingChildExecutionInfo - 222, // 126: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_workflow_task:type_name -> temporal.api.workflow.v1.PendingWorkflowTaskInfo - 223, // 127: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.callbacks:type_name -> temporal.api.workflow.v1.CallbackInfo - 224, // 128: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_nexus_operations:type_name -> temporal.api.workflow.v1.PendingNexusOperationInfo - 225, // 129: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.workflow_extended_info:type_name -> temporal.api.workflow.v1.WorkflowExecutionExtendedInfo - 186, // 130: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 187, // 131: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.version_history_items:type_name -> temporal.server.api.history.v1.VersionHistoryItem - 226, // 132: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.events:type_name -> temporal.api.common.v1.DataBlob - 226, // 133: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.new_run_events:type_name -> temporal.api.common.v1.DataBlob - 227, // 134: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.base_execution_info:type_name -> temporal.server.api.workflow.v1.BaseExecutionInfo - 228, // 135: temporal.server.api.historyservice.v1.ReplicateWorkflowStateRequest.workflow_state:type_name -> temporal.server.api.persistence.v1.WorkflowMutableState - 171, // 136: temporal.server.api.historyservice.v1.SyncShardStatusRequest.status_time:type_name -> google.protobuf.Timestamp - 171, // 137: temporal.server.api.historyservice.v1.SyncActivityRequest.scheduled_time:type_name -> google.protobuf.Timestamp - 171, // 138: temporal.server.api.historyservice.v1.SyncActivityRequest.started_time:type_name -> google.protobuf.Timestamp - 171, // 139: temporal.server.api.historyservice.v1.SyncActivityRequest.last_heartbeat_time:type_name -> google.protobuf.Timestamp - 174, // 140: temporal.server.api.historyservice.v1.SyncActivityRequest.details:type_name -> temporal.api.common.v1.Payloads - 173, // 141: temporal.server.api.historyservice.v1.SyncActivityRequest.last_failure:type_name -> temporal.api.failure.v1.Failure - 229, // 142: temporal.server.api.historyservice.v1.SyncActivityRequest.version_history:type_name -> temporal.server.api.history.v1.VersionHistory - 227, // 143: temporal.server.api.historyservice.v1.SyncActivityRequest.base_execution_info:type_name -> temporal.server.api.workflow.v1.BaseExecutionInfo - 171, // 144: temporal.server.api.historyservice.v1.SyncActivityRequest.first_scheduled_time:type_name -> google.protobuf.Timestamp - 171, // 145: temporal.server.api.historyservice.v1.SyncActivityRequest.last_attempt_complete_time:type_name -> google.protobuf.Timestamp - 175, // 146: temporal.server.api.historyservice.v1.SyncActivityRequest.retry_initial_interval:type_name -> google.protobuf.Duration - 175, // 147: temporal.server.api.historyservice.v1.SyncActivityRequest.retry_maximum_interval:type_name -> google.protobuf.Duration - 64, // 148: temporal.server.api.historyservice.v1.SyncActivitiesRequest.activities_info:type_name -> temporal.server.api.historyservice.v1.ActivitySyncInfo - 171, // 149: temporal.server.api.historyservice.v1.ActivitySyncInfo.scheduled_time:type_name -> google.protobuf.Timestamp - 171, // 150: temporal.server.api.historyservice.v1.ActivitySyncInfo.started_time:type_name -> google.protobuf.Timestamp - 171, // 151: temporal.server.api.historyservice.v1.ActivitySyncInfo.last_heartbeat_time:type_name -> google.protobuf.Timestamp - 174, // 152: temporal.server.api.historyservice.v1.ActivitySyncInfo.details:type_name -> temporal.api.common.v1.Payloads - 173, // 153: temporal.server.api.historyservice.v1.ActivitySyncInfo.last_failure:type_name -> temporal.api.failure.v1.Failure - 229, // 154: temporal.server.api.historyservice.v1.ActivitySyncInfo.version_history:type_name -> temporal.server.api.history.v1.VersionHistory - 171, // 155: temporal.server.api.historyservice.v1.ActivitySyncInfo.first_scheduled_time:type_name -> google.protobuf.Timestamp - 171, // 156: temporal.server.api.historyservice.v1.ActivitySyncInfo.last_attempt_complete_time:type_name -> google.protobuf.Timestamp - 175, // 157: temporal.server.api.historyservice.v1.ActivitySyncInfo.retry_initial_interval:type_name -> google.protobuf.Duration - 175, // 158: temporal.server.api.historyservice.v1.ActivitySyncInfo.retry_maximum_interval:type_name -> google.protobuf.Duration - 186, // 159: temporal.server.api.historyservice.v1.DescribeMutableStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 228, // 160: temporal.server.api.historyservice.v1.DescribeMutableStateResponse.cache_mutable_state:type_name -> temporal.server.api.persistence.v1.WorkflowMutableState - 228, // 161: temporal.server.api.historyservice.v1.DescribeMutableStateResponse.database_mutable_state:type_name -> temporal.server.api.persistence.v1.WorkflowMutableState - 186, // 162: temporal.server.api.historyservice.v1.DescribeHistoryHostRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 230, // 163: temporal.server.api.historyservice.v1.DescribeHistoryHostResponse.namespace_cache:type_name -> temporal.server.api.namespace.v1.NamespaceCacheInfo - 231, // 164: temporal.server.api.historyservice.v1.GetShardResponse.shard_info:type_name -> temporal.server.api.persistence.v1.ShardInfo - 171, // 165: temporal.server.api.historyservice.v1.RemoveTaskRequest.visibility_time:type_name -> google.protobuf.Timestamp - 232, // 166: temporal.server.api.historyservice.v1.GetReplicationMessagesRequest.tokens:type_name -> temporal.server.api.replication.v1.ReplicationToken - 164, // 167: temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.shard_messages:type_name -> temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.ShardMessagesEntry - 233, // 168: temporal.server.api.historyservice.v1.GetDLQReplicationMessagesRequest.task_infos:type_name -> temporal.server.api.replication.v1.ReplicationTaskInfo - 234, // 169: temporal.server.api.historyservice.v1.GetDLQReplicationMessagesResponse.replication_tasks:type_name -> temporal.server.api.replication.v1.ReplicationTask - 235, // 170: temporal.server.api.historyservice.v1.QueryWorkflowRequest.request:type_name -> temporal.api.workflowservice.v1.QueryWorkflowRequest - 236, // 171: temporal.server.api.historyservice.v1.QueryWorkflowResponse.response:type_name -> temporal.api.workflowservice.v1.QueryWorkflowResponse - 237, // 172: temporal.server.api.historyservice.v1.ReapplyEventsRequest.request:type_name -> temporal.server.api.adminservice.v1.ReapplyEventsRequest - 238, // 173: temporal.server.api.historyservice.v1.GetDLQMessagesRequest.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType - 238, // 174: temporal.server.api.historyservice.v1.GetDLQMessagesResponse.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType - 234, // 175: temporal.server.api.historyservice.v1.GetDLQMessagesResponse.replication_tasks:type_name -> temporal.server.api.replication.v1.ReplicationTask - 233, // 176: temporal.server.api.historyservice.v1.GetDLQMessagesResponse.replication_tasks_info:type_name -> temporal.server.api.replication.v1.ReplicationTaskInfo - 238, // 177: temporal.server.api.historyservice.v1.PurgeDLQMessagesRequest.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType - 238, // 178: temporal.server.api.historyservice.v1.MergeDLQMessagesRequest.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType - 239, // 179: temporal.server.api.historyservice.v1.RefreshWorkflowTasksRequest.request:type_name -> temporal.server.api.adminservice.v1.RefreshWorkflowTasksRequest - 186, // 180: temporal.server.api.historyservice.v1.GenerateLastHistoryReplicationTasksRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 96, // 181: temporal.server.api.historyservice.v1.GetReplicationStatusResponse.shards:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatus - 171, // 182: temporal.server.api.historyservice.v1.ShardReplicationStatus.shard_local_time:type_name -> google.protobuf.Timestamp - 165, // 183: temporal.server.api.historyservice.v1.ShardReplicationStatus.remote_clusters:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatus.RemoteClustersEntry - 166, // 184: temporal.server.api.historyservice.v1.ShardReplicationStatus.handover_namespaces:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatus.HandoverNamespacesEntry - 171, // 185: temporal.server.api.historyservice.v1.ShardReplicationStatus.max_replication_task_visibility_time:type_name -> google.protobuf.Timestamp - 171, // 186: temporal.server.api.historyservice.v1.ShardReplicationStatusPerCluster.acked_task_visibility_time:type_name -> google.protobuf.Timestamp - 186, // 187: temporal.server.api.historyservice.v1.RebuildMutableStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 186, // 188: temporal.server.api.historyservice.v1.ImportWorkflowExecutionRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 226, // 189: temporal.server.api.historyservice.v1.ImportWorkflowExecutionRequest.history_batches:type_name -> temporal.api.common.v1.DataBlob - 229, // 190: temporal.server.api.historyservice.v1.ImportWorkflowExecutionRequest.version_history:type_name -> temporal.server.api.history.v1.VersionHistory - 186, // 191: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 171, // 192: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest.workflow_start_time:type_name -> google.protobuf.Timestamp - 171, // 193: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest.workflow_close_time:type_name -> google.protobuf.Timestamp - 240, // 194: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionRequest.request:type_name -> temporal.api.workflowservice.v1.UpdateWorkflowExecutionRequest - 241, // 195: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse.response:type_name -> temporal.api.workflowservice.v1.UpdateWorkflowExecutionResponse - 242, // 196: temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesRequest.sync_replication_state:type_name -> temporal.server.api.replication.v1.SyncReplicationState - 243, // 197: temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesResponse.messages:type_name -> temporal.server.api.replication.v1.WorkflowReplicationMessages - 244, // 198: temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateRequest.request:type_name -> temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateRequest - 245, // 199: temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateResponse.response:type_name -> temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateResponse - 246, // 200: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryRequest.request:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryRequest - 247, // 201: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponse.response:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryResponse - 200, // 202: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponse.history:type_name -> temporal.api.history.v1.History - 247, // 203: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponseWithRaw.response:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryResponse - 248, // 204: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseRequest.request:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryReverseRequest - 249, // 205: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseResponse.response:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryReverseResponse - 250, // 206: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Request.request:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryV2Request - 251, // 207: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Response.response:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryV2Response - 252, // 208: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryRequest.request:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryRequest - 253, // 209: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryResponse.response:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryResponse - 254, // 210: temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionRequest.request:type_name -> temporal.server.api.adminservice.v1.DeleteWorkflowExecutionRequest - 255, // 211: temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionResponse.response:type_name -> temporal.server.api.adminservice.v1.DeleteWorkflowExecutionResponse - 256, // 212: temporal.server.api.historyservice.v1.GetDLQTasksRequest.dlq_key:type_name -> temporal.server.api.common.v1.HistoryDLQKey - 257, // 213: temporal.server.api.historyservice.v1.GetDLQTasksResponse.dlq_tasks:type_name -> temporal.server.api.common.v1.HistoryDLQTask - 256, // 214: temporal.server.api.historyservice.v1.DeleteDLQTasksRequest.dlq_key:type_name -> temporal.server.api.common.v1.HistoryDLQKey - 258, // 215: temporal.server.api.historyservice.v1.DeleteDLQTasksRequest.inclusive_max_task_metadata:type_name -> temporal.server.api.common.v1.HistoryDLQTaskMetadata - 167, // 216: temporal.server.api.historyservice.v1.ListQueuesResponse.queues:type_name -> temporal.server.api.historyservice.v1.ListQueuesResponse.QueueInfo - 168, // 217: temporal.server.api.historyservice.v1.AddTasksRequest.tasks:type_name -> temporal.server.api.historyservice.v1.AddTasksRequest.Task - 259, // 218: temporal.server.api.historyservice.v1.ListTasksRequest.request:type_name -> temporal.server.api.adminservice.v1.ListHistoryTasksRequest - 260, // 219: temporal.server.api.historyservice.v1.ListTasksResponse.response:type_name -> temporal.server.api.adminservice.v1.ListHistoryTasksResponse - 261, // 220: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.completion:type_name -> temporal.server.api.token.v1.NexusOperationCompletion - 262, // 221: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.success:type_name -> temporal.api.common.v1.Payload - 173, // 222: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.failure:type_name -> temporal.api.failure.v1.Failure - 171, // 223: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.close_time:type_name -> google.protobuf.Timestamp - 185, // 224: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.links:type_name -> temporal.api.common.v1.Link - 171, // 225: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.start_time:type_name -> google.protobuf.Timestamp - 261, // 226: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.completion:type_name -> temporal.server.api.token.v1.NexusOperationCompletion - 262, // 227: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.success:type_name -> temporal.api.common.v1.Payload - 263, // 228: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.failure:type_name -> temporal.api.nexus.v1.Failure - 171, // 229: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.start_time:type_name -> google.protobuf.Timestamp - 185, // 230: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.links:type_name -> temporal.api.common.v1.Link - 264, // 231: temporal.server.api.historyservice.v1.InvokeStateMachineMethodRequest.ref:type_name -> temporal.server.api.persistence.v1.StateMachineRef - 265, // 232: temporal.server.api.historyservice.v1.DeepHealthCheckResponse.state:type_name -> temporal.server.api.enums.v1.HealthState - 266, // 233: temporal.server.api.historyservice.v1.DeepHealthCheckResponse.checks:type_name -> temporal.server.api.health.v1.HealthCheck - 186, // 234: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 188, // 235: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest.versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition - 192, // 236: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest.version_histories:type_name -> temporal.server.api.history.v1.VersionHistories - 267, // 237: temporal.server.api.historyservice.v1.SyncWorkflowStateResponse.versioned_transition_artifact:type_name -> temporal.server.api.replication.v1.VersionedTransitionArtifact - 268, // 238: temporal.server.api.historyservice.v1.UpdateActivityOptionsRequest.update_request:type_name -> temporal.api.workflowservice.v1.UpdateActivityOptionsRequest - 269, // 239: temporal.server.api.historyservice.v1.UpdateActivityOptionsResponse.activity_options:type_name -> temporal.api.activity.v1.ActivityOptions - 270, // 240: temporal.server.api.historyservice.v1.PauseActivityRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.PauseActivityRequest - 271, // 241: temporal.server.api.historyservice.v1.UnpauseActivityRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.UnpauseActivityRequest - 272, // 242: temporal.server.api.historyservice.v1.ResetActivityRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.ResetActivityRequest - 273, // 243: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsRequest.update_request:type_name -> temporal.api.workflowservice.v1.UpdateWorkflowExecutionOptionsRequest - 274, // 244: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsResponse.workflow_execution_options:type_name -> temporal.api.workflow.v1.WorkflowExecutionOptions - 275, // 245: temporal.server.api.historyservice.v1.PauseWorkflowExecutionRequest.pause_request:type_name -> temporal.api.workflowservice.v1.PauseWorkflowExecutionRequest - 276, // 246: temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionRequest.unpause_request:type_name -> temporal.api.workflowservice.v1.UnpauseWorkflowExecutionRequest - 277, // 247: temporal.server.api.historyservice.v1.StartNexusOperationRequest.request:type_name -> temporal.api.nexus.v1.StartOperationRequest - 278, // 248: temporal.server.api.historyservice.v1.StartNexusOperationResponse.response:type_name -> temporal.api.nexus.v1.StartOperationResponse - 279, // 249: temporal.server.api.historyservice.v1.CancelNexusOperationRequest.request:type_name -> temporal.api.nexus.v1.CancelOperationRequest - 280, // 250: temporal.server.api.historyservice.v1.CancelNexusOperationResponse.response:type_name -> temporal.api.nexus.v1.CancelOperationResponse - 1, // 251: temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.Operation.start_workflow:type_name -> temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest - 105, // 252: temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.Operation.update_workflow:type_name -> temporal.server.api.historyservice.v1.UpdateWorkflowExecutionRequest - 2, // 253: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.Response.start_workflow:type_name -> temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse - 106, // 254: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.Response.update_workflow:type_name -> temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse - 281, // 255: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.QueriesEntry.value:type_name -> temporal.api.query.v1.WorkflowQuery - 281, // 256: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.QueriesEntry.value:type_name -> temporal.api.query.v1.WorkflowQuery - 282, // 257: temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.ShardMessagesEntry.value:type_name -> temporal.server.api.replication.v1.ReplicationMessages - 98, // 258: temporal.server.api.historyservice.v1.ShardReplicationStatus.RemoteClustersEntry.value:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatusPerCluster - 97, // 259: temporal.server.api.historyservice.v1.ShardReplicationStatus.HandoverNamespacesEntry.value:type_name -> temporal.server.api.historyservice.v1.HandoverNamespaceInfo - 226, // 260: temporal.server.api.historyservice.v1.AddTasksRequest.Task.blob:type_name -> temporal.api.common.v1.DataBlob - 283, // 261: temporal.server.api.historyservice.v1.routing:extendee -> google.protobuf.MessageOptions - 0, // 262: temporal.server.api.historyservice.v1.routing:type_name -> temporal.server.api.historyservice.v1.RoutingOptions - 263, // [263:263] is the sub-list for method output_type - 263, // [263:263] is the sub-list for method input_type - 262, // [262:263] is the sub-list for extension type_name - 261, // [261:262] is the sub-list for extension extendee - 0, // [0:261] is the sub-list for field type_name + 185, // 101: temporal.server.api.historyservice.v1.SignalWorkflowExecutionResponse.link:type_name -> temporal.api.common.v1.Link + 213, // 102: temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionRequest.signal_with_start_request:type_name -> temporal.api.workflowservice.v1.SignalWithStartWorkflowExecutionRequest + 185, // 103: temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionResponse.signal_link:type_name -> temporal.api.common.v1.Link + 186, // 104: temporal.server.api.historyservice.v1.RemoveSignalMutableStateRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 214, // 105: temporal.server.api.historyservice.v1.TerminateWorkflowExecutionRequest.terminate_request:type_name -> temporal.api.workflowservice.v1.TerminateWorkflowExecutionRequest + 186, // 106: temporal.server.api.historyservice.v1.TerminateWorkflowExecutionRequest.external_workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 186, // 107: temporal.server.api.historyservice.v1.DeleteWorkflowExecutionRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 215, // 108: temporal.server.api.historyservice.v1.ResetWorkflowExecutionRequest.reset_request:type_name -> temporal.api.workflowservice.v1.ResetWorkflowExecutionRequest + 216, // 109: temporal.server.api.historyservice.v1.RequestCancelWorkflowExecutionRequest.cancel_request:type_name -> temporal.api.workflowservice.v1.RequestCancelWorkflowExecutionRequest + 186, // 110: temporal.server.api.historyservice.v1.RequestCancelWorkflowExecutionRequest.external_workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 186, // 111: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 182, // 112: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskRequest.child_clock:type_name -> temporal.server.api.clock.v1.VectorClock + 182, // 113: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskRequest.parent_clock:type_name -> temporal.server.api.clock.v1.VectorClock + 186, // 114: temporal.server.api.historyservice.v1.VerifyFirstWorkflowTaskScheduledRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 182, // 115: temporal.server.api.historyservice.v1.VerifyFirstWorkflowTaskScheduledRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 186, // 116: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.parent_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 186, // 117: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.child_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 202, // 118: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.completion_event:type_name -> temporal.api.history.v1.HistoryEvent + 182, // 119: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 186, // 120: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedRequest.parent_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 186, // 121: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedRequest.child_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 182, // 122: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 217, // 123: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionRequest.request:type_name -> temporal.api.workflowservice.v1.DescribeWorkflowExecutionRequest + 218, // 124: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.execution_config:type_name -> temporal.api.workflow.v1.WorkflowExecutionConfig + 219, // 125: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.workflow_execution_info:type_name -> temporal.api.workflow.v1.WorkflowExecutionInfo + 220, // 126: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_activities:type_name -> temporal.api.workflow.v1.PendingActivityInfo + 221, // 127: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_children:type_name -> temporal.api.workflow.v1.PendingChildExecutionInfo + 222, // 128: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_workflow_task:type_name -> temporal.api.workflow.v1.PendingWorkflowTaskInfo + 223, // 129: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.callbacks:type_name -> temporal.api.workflow.v1.CallbackInfo + 224, // 130: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_nexus_operations:type_name -> temporal.api.workflow.v1.PendingNexusOperationInfo + 225, // 131: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.workflow_extended_info:type_name -> temporal.api.workflow.v1.WorkflowExecutionExtendedInfo + 186, // 132: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 187, // 133: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.version_history_items:type_name -> temporal.server.api.history.v1.VersionHistoryItem + 226, // 134: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.events:type_name -> temporal.api.common.v1.DataBlob + 226, // 135: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.new_run_events:type_name -> temporal.api.common.v1.DataBlob + 227, // 136: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.base_execution_info:type_name -> temporal.server.api.workflow.v1.BaseExecutionInfo + 228, // 137: temporal.server.api.historyservice.v1.ReplicateWorkflowStateRequest.workflow_state:type_name -> temporal.server.api.persistence.v1.WorkflowMutableState + 171, // 138: temporal.server.api.historyservice.v1.SyncShardStatusRequest.status_time:type_name -> google.protobuf.Timestamp + 171, // 139: temporal.server.api.historyservice.v1.SyncActivityRequest.scheduled_time:type_name -> google.protobuf.Timestamp + 171, // 140: temporal.server.api.historyservice.v1.SyncActivityRequest.started_time:type_name -> google.protobuf.Timestamp + 171, // 141: temporal.server.api.historyservice.v1.SyncActivityRequest.last_heartbeat_time:type_name -> google.protobuf.Timestamp + 174, // 142: temporal.server.api.historyservice.v1.SyncActivityRequest.details:type_name -> temporal.api.common.v1.Payloads + 173, // 143: temporal.server.api.historyservice.v1.SyncActivityRequest.last_failure:type_name -> temporal.api.failure.v1.Failure + 229, // 144: temporal.server.api.historyservice.v1.SyncActivityRequest.version_history:type_name -> temporal.server.api.history.v1.VersionHistory + 227, // 145: temporal.server.api.historyservice.v1.SyncActivityRequest.base_execution_info:type_name -> temporal.server.api.workflow.v1.BaseExecutionInfo + 171, // 146: temporal.server.api.historyservice.v1.SyncActivityRequest.first_scheduled_time:type_name -> google.protobuf.Timestamp + 171, // 147: temporal.server.api.historyservice.v1.SyncActivityRequest.last_attempt_complete_time:type_name -> google.protobuf.Timestamp + 175, // 148: temporal.server.api.historyservice.v1.SyncActivityRequest.retry_initial_interval:type_name -> google.protobuf.Duration + 175, // 149: temporal.server.api.historyservice.v1.SyncActivityRequest.retry_maximum_interval:type_name -> google.protobuf.Duration + 64, // 150: temporal.server.api.historyservice.v1.SyncActivitiesRequest.activities_info:type_name -> temporal.server.api.historyservice.v1.ActivitySyncInfo + 171, // 151: temporal.server.api.historyservice.v1.ActivitySyncInfo.scheduled_time:type_name -> google.protobuf.Timestamp + 171, // 152: temporal.server.api.historyservice.v1.ActivitySyncInfo.started_time:type_name -> google.protobuf.Timestamp + 171, // 153: temporal.server.api.historyservice.v1.ActivitySyncInfo.last_heartbeat_time:type_name -> google.protobuf.Timestamp + 174, // 154: temporal.server.api.historyservice.v1.ActivitySyncInfo.details:type_name -> temporal.api.common.v1.Payloads + 173, // 155: temporal.server.api.historyservice.v1.ActivitySyncInfo.last_failure:type_name -> temporal.api.failure.v1.Failure + 229, // 156: temporal.server.api.historyservice.v1.ActivitySyncInfo.version_history:type_name -> temporal.server.api.history.v1.VersionHistory + 171, // 157: temporal.server.api.historyservice.v1.ActivitySyncInfo.first_scheduled_time:type_name -> google.protobuf.Timestamp + 171, // 158: temporal.server.api.historyservice.v1.ActivitySyncInfo.last_attempt_complete_time:type_name -> google.protobuf.Timestamp + 175, // 159: temporal.server.api.historyservice.v1.ActivitySyncInfo.retry_initial_interval:type_name -> google.protobuf.Duration + 175, // 160: temporal.server.api.historyservice.v1.ActivitySyncInfo.retry_maximum_interval:type_name -> google.protobuf.Duration + 186, // 161: temporal.server.api.historyservice.v1.DescribeMutableStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 228, // 162: temporal.server.api.historyservice.v1.DescribeMutableStateResponse.cache_mutable_state:type_name -> temporal.server.api.persistence.v1.WorkflowMutableState + 228, // 163: temporal.server.api.historyservice.v1.DescribeMutableStateResponse.database_mutable_state:type_name -> temporal.server.api.persistence.v1.WorkflowMutableState + 186, // 164: temporal.server.api.historyservice.v1.DescribeHistoryHostRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 230, // 165: temporal.server.api.historyservice.v1.DescribeHistoryHostResponse.namespace_cache:type_name -> temporal.server.api.namespace.v1.NamespaceCacheInfo + 231, // 166: temporal.server.api.historyservice.v1.GetShardResponse.shard_info:type_name -> temporal.server.api.persistence.v1.ShardInfo + 171, // 167: temporal.server.api.historyservice.v1.RemoveTaskRequest.visibility_time:type_name -> google.protobuf.Timestamp + 232, // 168: temporal.server.api.historyservice.v1.GetReplicationMessagesRequest.tokens:type_name -> temporal.server.api.replication.v1.ReplicationToken + 164, // 169: temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.shard_messages:type_name -> temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.ShardMessagesEntry + 233, // 170: temporal.server.api.historyservice.v1.GetDLQReplicationMessagesRequest.task_infos:type_name -> temporal.server.api.replication.v1.ReplicationTaskInfo + 234, // 171: temporal.server.api.historyservice.v1.GetDLQReplicationMessagesResponse.replication_tasks:type_name -> temporal.server.api.replication.v1.ReplicationTask + 235, // 172: temporal.server.api.historyservice.v1.QueryWorkflowRequest.request:type_name -> temporal.api.workflowservice.v1.QueryWorkflowRequest + 236, // 173: temporal.server.api.historyservice.v1.QueryWorkflowResponse.response:type_name -> temporal.api.workflowservice.v1.QueryWorkflowResponse + 237, // 174: temporal.server.api.historyservice.v1.ReapplyEventsRequest.request:type_name -> temporal.server.api.adminservice.v1.ReapplyEventsRequest + 238, // 175: temporal.server.api.historyservice.v1.GetDLQMessagesRequest.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType + 238, // 176: temporal.server.api.historyservice.v1.GetDLQMessagesResponse.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType + 234, // 177: temporal.server.api.historyservice.v1.GetDLQMessagesResponse.replication_tasks:type_name -> temporal.server.api.replication.v1.ReplicationTask + 233, // 178: temporal.server.api.historyservice.v1.GetDLQMessagesResponse.replication_tasks_info:type_name -> temporal.server.api.replication.v1.ReplicationTaskInfo + 238, // 179: temporal.server.api.historyservice.v1.PurgeDLQMessagesRequest.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType + 238, // 180: temporal.server.api.historyservice.v1.MergeDLQMessagesRequest.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType + 239, // 181: temporal.server.api.historyservice.v1.RefreshWorkflowTasksRequest.request:type_name -> temporal.server.api.adminservice.v1.RefreshWorkflowTasksRequest + 186, // 182: temporal.server.api.historyservice.v1.GenerateLastHistoryReplicationTasksRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 96, // 183: temporal.server.api.historyservice.v1.GetReplicationStatusResponse.shards:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatus + 171, // 184: temporal.server.api.historyservice.v1.ShardReplicationStatus.shard_local_time:type_name -> google.protobuf.Timestamp + 165, // 185: temporal.server.api.historyservice.v1.ShardReplicationStatus.remote_clusters:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatus.RemoteClustersEntry + 166, // 186: temporal.server.api.historyservice.v1.ShardReplicationStatus.handover_namespaces:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatus.HandoverNamespacesEntry + 171, // 187: temporal.server.api.historyservice.v1.ShardReplicationStatus.max_replication_task_visibility_time:type_name -> google.protobuf.Timestamp + 171, // 188: temporal.server.api.historyservice.v1.ShardReplicationStatusPerCluster.acked_task_visibility_time:type_name -> google.protobuf.Timestamp + 186, // 189: temporal.server.api.historyservice.v1.RebuildMutableStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 186, // 190: temporal.server.api.historyservice.v1.ImportWorkflowExecutionRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 226, // 191: temporal.server.api.historyservice.v1.ImportWorkflowExecutionRequest.history_batches:type_name -> temporal.api.common.v1.DataBlob + 229, // 192: temporal.server.api.historyservice.v1.ImportWorkflowExecutionRequest.version_history:type_name -> temporal.server.api.history.v1.VersionHistory + 186, // 193: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 171, // 194: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest.workflow_start_time:type_name -> google.protobuf.Timestamp + 171, // 195: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest.workflow_close_time:type_name -> google.protobuf.Timestamp + 240, // 196: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionRequest.request:type_name -> temporal.api.workflowservice.v1.UpdateWorkflowExecutionRequest + 241, // 197: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse.response:type_name -> temporal.api.workflowservice.v1.UpdateWorkflowExecutionResponse + 242, // 198: temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesRequest.sync_replication_state:type_name -> temporal.server.api.replication.v1.SyncReplicationState + 243, // 199: temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesResponse.messages:type_name -> temporal.server.api.replication.v1.WorkflowReplicationMessages + 244, // 200: temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateRequest.request:type_name -> temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateRequest + 245, // 201: temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateResponse.response:type_name -> temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateResponse + 246, // 202: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryRequest.request:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryRequest + 247, // 203: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponse.response:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryResponse + 200, // 204: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponse.history:type_name -> temporal.api.history.v1.History + 247, // 205: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponseWithRaw.response:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryResponse + 248, // 206: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseRequest.request:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryReverseRequest + 249, // 207: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseResponse.response:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryReverseResponse + 250, // 208: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Request.request:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryV2Request + 251, // 209: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Response.response:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryV2Response + 252, // 210: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryRequest.request:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryRequest + 253, // 211: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryResponse.response:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryResponse + 254, // 212: temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionRequest.request:type_name -> temporal.server.api.adminservice.v1.DeleteWorkflowExecutionRequest + 255, // 213: temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionResponse.response:type_name -> temporal.server.api.adminservice.v1.DeleteWorkflowExecutionResponse + 256, // 214: temporal.server.api.historyservice.v1.GetDLQTasksRequest.dlq_key:type_name -> temporal.server.api.common.v1.HistoryDLQKey + 257, // 215: temporal.server.api.historyservice.v1.GetDLQTasksResponse.dlq_tasks:type_name -> temporal.server.api.common.v1.HistoryDLQTask + 256, // 216: temporal.server.api.historyservice.v1.DeleteDLQTasksRequest.dlq_key:type_name -> temporal.server.api.common.v1.HistoryDLQKey + 258, // 217: temporal.server.api.historyservice.v1.DeleteDLQTasksRequest.inclusive_max_task_metadata:type_name -> temporal.server.api.common.v1.HistoryDLQTaskMetadata + 167, // 218: temporal.server.api.historyservice.v1.ListQueuesResponse.queues:type_name -> temporal.server.api.historyservice.v1.ListQueuesResponse.QueueInfo + 168, // 219: temporal.server.api.historyservice.v1.AddTasksRequest.tasks:type_name -> temporal.server.api.historyservice.v1.AddTasksRequest.Task + 259, // 220: temporal.server.api.historyservice.v1.ListTasksRequest.request:type_name -> temporal.server.api.adminservice.v1.ListHistoryTasksRequest + 260, // 221: temporal.server.api.historyservice.v1.ListTasksResponse.response:type_name -> temporal.server.api.adminservice.v1.ListHistoryTasksResponse + 261, // 222: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.completion:type_name -> temporal.server.api.token.v1.NexusOperationCompletion + 262, // 223: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.success:type_name -> temporal.api.common.v1.Payload + 173, // 224: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.failure:type_name -> temporal.api.failure.v1.Failure + 171, // 225: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.close_time:type_name -> google.protobuf.Timestamp + 185, // 226: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.links:type_name -> temporal.api.common.v1.Link + 171, // 227: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.start_time:type_name -> google.protobuf.Timestamp + 261, // 228: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.completion:type_name -> temporal.server.api.token.v1.NexusOperationCompletion + 262, // 229: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.success:type_name -> temporal.api.common.v1.Payload + 263, // 230: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.failure:type_name -> temporal.api.nexus.v1.Failure + 171, // 231: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.start_time:type_name -> google.protobuf.Timestamp + 185, // 232: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.links:type_name -> temporal.api.common.v1.Link + 264, // 233: temporal.server.api.historyservice.v1.InvokeStateMachineMethodRequest.ref:type_name -> temporal.server.api.persistence.v1.StateMachineRef + 265, // 234: temporal.server.api.historyservice.v1.DeepHealthCheckResponse.state:type_name -> temporal.server.api.enums.v1.HealthState + 266, // 235: temporal.server.api.historyservice.v1.DeepHealthCheckResponse.checks:type_name -> temporal.server.api.health.v1.HealthCheck + 186, // 236: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 188, // 237: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest.versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition + 192, // 238: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest.version_histories:type_name -> temporal.server.api.history.v1.VersionHistories + 267, // 239: temporal.server.api.historyservice.v1.SyncWorkflowStateResponse.versioned_transition_artifact:type_name -> temporal.server.api.replication.v1.VersionedTransitionArtifact + 268, // 240: temporal.server.api.historyservice.v1.UpdateActivityOptionsRequest.update_request:type_name -> temporal.api.workflowservice.v1.UpdateActivityOptionsRequest + 269, // 241: temporal.server.api.historyservice.v1.UpdateActivityOptionsResponse.activity_options:type_name -> temporal.api.activity.v1.ActivityOptions + 270, // 242: temporal.server.api.historyservice.v1.PauseActivityRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.PauseActivityRequest + 271, // 243: temporal.server.api.historyservice.v1.UnpauseActivityRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.UnpauseActivityRequest + 272, // 244: temporal.server.api.historyservice.v1.ResetActivityRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.ResetActivityRequest + 273, // 245: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsRequest.update_request:type_name -> temporal.api.workflowservice.v1.UpdateWorkflowExecutionOptionsRequest + 274, // 246: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsResponse.workflow_execution_options:type_name -> temporal.api.workflow.v1.WorkflowExecutionOptions + 275, // 247: temporal.server.api.historyservice.v1.PauseWorkflowExecutionRequest.pause_request:type_name -> temporal.api.workflowservice.v1.PauseWorkflowExecutionRequest + 276, // 248: temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionRequest.unpause_request:type_name -> temporal.api.workflowservice.v1.UnpauseWorkflowExecutionRequest + 277, // 249: temporal.server.api.historyservice.v1.StartNexusOperationRequest.request:type_name -> temporal.api.nexus.v1.StartOperationRequest + 278, // 250: temporal.server.api.historyservice.v1.StartNexusOperationResponse.response:type_name -> temporal.api.nexus.v1.StartOperationResponse + 279, // 251: temporal.server.api.historyservice.v1.CancelNexusOperationRequest.request:type_name -> temporal.api.nexus.v1.CancelOperationRequest + 280, // 252: temporal.server.api.historyservice.v1.CancelNexusOperationResponse.response:type_name -> temporal.api.nexus.v1.CancelOperationResponse + 1, // 253: temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.Operation.start_workflow:type_name -> temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest + 105, // 254: temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.Operation.update_workflow:type_name -> temporal.server.api.historyservice.v1.UpdateWorkflowExecutionRequest + 2, // 255: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.Response.start_workflow:type_name -> temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse + 106, // 256: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.Response.update_workflow:type_name -> temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse + 281, // 257: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.QueriesEntry.value:type_name -> temporal.api.query.v1.WorkflowQuery + 281, // 258: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.QueriesEntry.value:type_name -> temporal.api.query.v1.WorkflowQuery + 282, // 259: temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.ShardMessagesEntry.value:type_name -> temporal.server.api.replication.v1.ReplicationMessages + 98, // 260: temporal.server.api.historyservice.v1.ShardReplicationStatus.RemoteClustersEntry.value:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatusPerCluster + 97, // 261: temporal.server.api.historyservice.v1.ShardReplicationStatus.HandoverNamespacesEntry.value:type_name -> temporal.server.api.historyservice.v1.HandoverNamespaceInfo + 226, // 262: temporal.server.api.historyservice.v1.AddTasksRequest.Task.blob:type_name -> temporal.api.common.v1.DataBlob + 283, // 263: temporal.server.api.historyservice.v1.routing:extendee -> google.protobuf.MessageOptions + 0, // 264: temporal.server.api.historyservice.v1.routing:type_name -> temporal.server.api.historyservice.v1.RoutingOptions + 265, // [265:265] is the sub-list for method output_type + 265, // [265:265] is the sub-list for method input_type + 264, // [264:265] is the sub-list for extension type_name + 263, // [263:264] is the sub-list for extension extendee + 0, // [0:263] is the sub-list for field type_name } func init() { file_temporal_server_api_historyservice_v1_request_response_proto_init() } diff --git a/chasm/lib/workflow/gen/workflowpb/v1/state.go-helpers.pb.go b/chasm/lib/workflow/gen/workflowpb/v1/state.go-helpers.pb.go index 99fa7d3f466..50980db658e 100644 --- a/chasm/lib/workflow/gen/workflowpb/v1/state.go-helpers.pb.go +++ b/chasm/lib/workflow/gen/workflowpb/v1/state.go-helpers.pb.go @@ -78,3 +78,40 @@ func (this *NexusCancellationParentData) Equal(that interface{}) bool { return proto.Equal(this, that1) } + +// Marshal an object of type IncomingSignalData to the protobuf v3 wire format +func (val *IncomingSignalData) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type IncomingSignalData from the protobuf v3 wire format +func (val *IncomingSignalData) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *IncomingSignalData) Size() int { + return proto.Size(val) +} + +// Equal returns whether two IncomingSignalData values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *IncomingSignalData) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *IncomingSignalData + switch t := that.(type) { + case *IncomingSignalData: + that1 = t + case IncomingSignalData: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} diff --git a/chasm/lib/workflow/gen/workflowpb/v1/state.pb.go b/chasm/lib/workflow/gen/workflowpb/v1/state.pb.go index 4669064b276..304853e6afd 100644 --- a/chasm/lib/workflow/gen/workflowpb/v1/state.pb.go +++ b/chasm/lib/workflow/gen/workflowpb/v1/state.pb.go @@ -125,6 +125,52 @@ func (x *NexusCancellationParentData) GetRequestedEventId() int64 { return 0 } +// IncomingSignalData records the event associated with a signal's request ID, which allows +// DescribeWorkflow to resolve RequestIDRef signal backlinks. +type IncomingSignalData struct { + state protoimpl.MessageState `protogen:"open.v1"` + EventId int64 `protobuf:"varint,1,opt,name=event_id,json=eventId,proto3" json:"event_id,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *IncomingSignalData) Reset() { + *x = IncomingSignalData{} + mi := &file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *IncomingSignalData) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*IncomingSignalData) ProtoMessage() {} + +func (x *IncomingSignalData) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use IncomingSignalData.ProtoReflect.Descriptor instead. +func (*IncomingSignalData) Descriptor() ([]byte, []int) { + return file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_rawDescGZIP(), []int{2} +} + +func (x *IncomingSignalData) GetEventId() int64 { + if x != nil { + return x.EventId + } + return 0 +} + var File_temporal_server_chasm_lib_workflow_proto_v1_state_proto protoreflect.FileDescriptor const file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_rawDesc = "" + @@ -134,7 +180,9 @@ const file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_rawDesc = "" "\x12scheduled_event_id\x18\x01 \x01(\x03R\x10scheduledEventId\x122\n" + "\x15scheduled_event_token\x18\x02 \x01(\fR\x13scheduledEventToken\"K\n" + "\x1bNexusCancellationParentData\x12,\n" + - "\x12requested_event_id\x18\x01 \x01(\x03R\x10requestedEventIdBDZBgo.temporal.io/server/chasm/lib/workflow/gen/workflowpb;workflowpbb\x06proto3" + "\x12requested_event_id\x18\x01 \x01(\x03R\x10requestedEventId\"/\n" + + "\x12IncomingSignalData\x12\x19\n" + + "\bevent_id\x18\x01 \x01(\x03R\aeventIdBDZBgo.temporal.io/server/chasm/lib/workflow/gen/workflowpb;workflowpbb\x06proto3" var ( file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_rawDescOnce sync.Once @@ -148,10 +196,11 @@ func file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_rawDescGZIP() return file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_rawDescData } -var file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_msgTypes = make([]protoimpl.MessageInfo, 2) +var file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_msgTypes = make([]protoimpl.MessageInfo, 3) var file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_goTypes = []any{ (*NexusOperationParentData)(nil), // 0: temporal.server.chasm.lib.workflow.proto.v1.NexusOperationParentData (*NexusCancellationParentData)(nil), // 1: temporal.server.chasm.lib.workflow.proto.v1.NexusCancellationParentData + (*IncomingSignalData)(nil), // 2: temporal.server.chasm.lib.workflow.proto.v1.IncomingSignalData } var file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_depIdxs = []int32{ 0, // [0:0] is the sub-list for method output_type @@ -172,7 +221,7 @@ func file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_rawDesc), len(file_temporal_server_chasm_lib_workflow_proto_v1_state_proto_rawDesc)), NumEnums: 0, - NumMessages: 2, + NumMessages: 3, NumExtensions: 0, NumServices: 0, }, diff --git a/chasm/lib/workflow/proto/v1/state.proto b/chasm/lib/workflow/proto/v1/state.proto index 8f82cf1af45..0ff01d116b5 100644 --- a/chasm/lib/workflow/proto/v1/state.proto +++ b/chasm/lib/workflow/proto/v1/state.proto @@ -19,3 +19,9 @@ message NexusCancellationParentData { // Event ID of the NEXUS_OPERATION_CANCEL_REQUESTED event. int64 requested_event_id = 1; } + +// IncomingSignalData records the event associated with a signal's request ID, which allows +// DescribeWorkflow to resolve RequestIDRef signal backlinks. +message IncomingSignalData { + int64 event_id = 1; +} diff --git a/chasm/lib/workflow/workflow.go b/chasm/lib/workflow/workflow.go index df0355886bb..8e3148cc03a 100644 --- a/chasm/lib/workflow/workflow.go +++ b/chasm/lib/workflow/workflow.go @@ -10,6 +10,7 @@ import ( "go.temporal.io/server/chasm/lib/callback" callbackspb "go.temporal.io/server/chasm/lib/callback/gen/callbackpb/v1" "go.temporal.io/server/chasm/lib/nexusoperation" + chasmworkflowpb "go.temporal.io/server/chasm/lib/workflow/gen/workflowpb/v1" "go.temporal.io/server/service/history/historybuilder" "google.golang.org/protobuf/types/known/emptypb" "google.golang.org/protobuf/types/known/timestamppb" @@ -30,6 +31,10 @@ type Workflow struct { // Operations map is used to store the Nexus operations for the workflow, keyed by scheduled event ID. Operations chasm.Map[int64, *nexusoperation.Operation] + + // IncomingSignals map is used to track incoming signals, keyed by request ID, + // to allow DescribeWorkflow to resolve RequestIDRef signal backlinks. + IncomingSignals chasm.Map[string, *chasmworkflowpb.IncomingSignalData] } func NewWorkflow( @@ -131,6 +136,53 @@ func addAndApplyHistoryEvent[D EventDefinition]( return event, def.Apply(ctx, w, event) } +// AddIncomingSignalEvent adds an entry for the signal requestID -> eventID mapping to +// track all signals that have been received by the workflow. +// Note that since signals are buffered, the eventID may the common.BufferedEventID, which +// will be updated to a concrete eventID once this signal is flushed to the DB. +// If caller tries to add an already-existing eventID, this function will ignore and silently return +// instead of overwriting -- use UpdateIncomingSignalEvent to update existing entries. +func (w *Workflow) AddIncomingSignalEvent( + ctx chasm.MutableContext, + requestID string, + eventID int64, +) error { + if w.IncomingSignals == nil { + w.IncomingSignals = make(chasm.Map[string, *chasmworkflowpb.IncomingSignalData]) + } + if w.HasIncomingSignalEvent(ctx, requestID) { + return nil + } + w.IncomingSignals[requestID] = chasm.NewDataField(ctx, &chasmworkflowpb.IncomingSignalData{ + // This might be common.BufferedEventID, which will be updated via UpdateIncomingSignalEvent + // once this signal is flushed to DB. + EventId: eventID, + }) + return nil +} + +// UpdateIncomingSignalEvent updates the eventID for an existing signal requestID in the map. +// If the requestID is not in the map, this is a no-op (e.g. when called for non-signal request IDs +// during buffer flush). +func (w *Workflow) UpdateIncomingSignalEvent( + ctx chasm.MutableContext, + requestID string, + eventID int64, +) error { + if w.HasIncomingSignalEvent(ctx, requestID) { + w.IncomingSignals[requestID].Get(ctx).EventId = eventID + } + + return nil +} + +// HasIncomingSignalEvent returns true if a signal with this requestID is already persisted +// in this CHASM tree. +func (w *Workflow) HasIncomingSignalEvent(_ chasm.Context, requestID string) bool { + _, exists := w.IncomingSignals[requestID] + return exists +} + // HasAnyBufferedEvent returns true if the workflow has any buffered event matching the given filter. func (w *Workflow) HasAnyBufferedEvent(filter historybuilder.BufferedEventFilter) bool { return w.MSPointer.HasAnyBufferedEvent(filter) diff --git a/common/dynamicconfig/constants.go b/common/dynamicconfig/constants.go index 1f078fff81d..e56097eecfe 100644 --- a/common/dynamicconfig/constants.go +++ b/common/dynamicconfig/constants.go @@ -2943,6 +2943,15 @@ to the CHASM (V2) implementation on active scheduler workflows.`, instead of the previous HSM backed implementation.`, ) + EnableCHASMSignalBacklinks = NewNamespaceBoolSetting( + "history.enableCHASMSignalBacklinks", + false, + `Controls whether incoming signal request IDs are tracked in the CHASM IncomingSignals +map to enable DescribeWorkflow to resolve RequestIDRef signal backlinks. Requires EnableChasm. +Only enable once all servers in the fleet have been upgraded to a version that understands +the IncomingSignals CHASM field.`, + ) + VersionMembershipCacheTTL = NewGlobalDurationSetting( "history.versionMembershipCacheTTL", 1*time.Second, diff --git a/common/metrics/metric_defs.go b/common/metrics/metric_defs.go index c4d806c277f..85e58aaacbb 100644 --- a/common/metrics/metric_defs.go +++ b/common/metrics/metric_defs.go @@ -904,6 +904,14 @@ var ( "chasm_pure_task_errors", WithDescription("The number of errors during CHASM pure task execution."), ) + ChasmIncomingSignalWritten = NewCounterDef( + "chasm_incoming_signal_written", + WithDescription("The number of signal backlinks written to the CHASM IncomingSignals map."), + ) + ChasmIncomingSignalDuplicate = NewCounterDef( + "chasm_incoming_signal_duplicate", + WithDescription("The number of duplicate signal request IDs detected when writing to the CHASM IncomingSignals map. Non-zero values indicate unexpected signal redelivery."), + ) TaskScheduleToStartLatency = NewTimerDef("task_schedule_to_start_latency") TaskBatchCompleteCounter = NewCounterDef("task_batch_complete_counter") TaskReschedulerPendingTasks = NewDimensionlessHistogramDef("task_rescheduler_pending_tasks") @@ -1006,30 +1014,34 @@ var ( "persisted_mutable_state_size", WithDescription("Size of the persisted Workflow Execution's state in DB, emitted each time a workflow execution is updated."), ) - ExecutionInfoSize = NewBytesHistogramDef("execution_info_size") - ExecutionStateSize = NewBytesHistogramDef("execution_state_size") - ActivityInfoSize = NewBytesHistogramDef("activity_info_size") - TimerInfoSize = NewBytesHistogramDef("timer_info_size") - ChildInfoSize = NewBytesHistogramDef("child_info_size") - RequestCancelInfoSize = NewBytesHistogramDef("request_cancel_info_size") - SignalInfoSize = NewBytesHistogramDef("signal_info_size") - SignalRequestIDSize = NewBytesHistogramDef("signal_request_id_size") - BufferedEventsSize = NewBytesHistogramDef("buffered_events_size") - ChasmTotalSize = NewBytesHistogramDef("chasm_total_size") - ActivityInfoCount = NewDimensionlessHistogramDef("activity_info_count") - TimerInfoCount = NewDimensionlessHistogramDef("timer_info_count") - ChildInfoCount = NewDimensionlessHistogramDef("child_info_count") - SignalInfoCount = NewDimensionlessHistogramDef("signal_info_count") - RequestCancelInfoCount = NewDimensionlessHistogramDef("request_cancel_info_count") - SignalRequestIDCount = NewDimensionlessHistogramDef("signal_request_id_count") - BufferedEventsCount = NewDimensionlessHistogramDef("buffered_events_count") - TaskCount = NewDimensionlessHistogramDef("task_count") - TotalActivityCount = NewDimensionlessHistogramDef("total_activity_count") - TotalUserTimerCount = NewDimensionlessHistogramDef("total_user_timer_count") - TotalChildExecutionCount = NewDimensionlessHistogramDef("total_child_execution_count") - TotalRequestCancelExternalCount = NewDimensionlessHistogramDef("total_request_cancel_external_count") - TotalSignalExternalCount = NewDimensionlessHistogramDef("total_signal_external_count") - TotalSignalCount = NewDimensionlessHistogramDef("total_signal_count") + ExecutionInfoSize = NewBytesHistogramDef("execution_info_size") + ExecutionStateSize = NewBytesHistogramDef("execution_state_size") + ActivityInfoSize = NewBytesHistogramDef("activity_info_size") + TimerInfoSize = NewBytesHistogramDef("timer_info_size") + ChildInfoSize = NewBytesHistogramDef("child_info_size") + RequestCancelInfoSize = NewBytesHistogramDef("request_cancel_info_size") + SignalInfoSize = NewBytesHistogramDef("signal_info_size") + SignalRequestIDSize = NewBytesHistogramDef("signal_request_id_size") + BufferedEventsSize = NewBytesHistogramDef("buffered_events_size") + ChasmTotalSize = NewBytesHistogramDef("chasm_total_size") + ActivityInfoCount = NewDimensionlessHistogramDef("activity_info_count") + TimerInfoCount = NewDimensionlessHistogramDef("timer_info_count") + ChildInfoCount = NewDimensionlessHistogramDef("child_info_count") + SignalInfoCount = NewDimensionlessHistogramDef("signal_info_count") + RequestCancelInfoCount = NewDimensionlessHistogramDef("request_cancel_info_count") + SignalRequestIDCount = NewDimensionlessHistogramDef("signal_request_id_count") + BufferedEventsCount = NewDimensionlessHistogramDef("buffered_events_count") + TaskCount = NewDimensionlessHistogramDef("task_count") + TotalActivityCount = NewDimensionlessHistogramDef("total_activity_count") + TotalUserTimerCount = NewDimensionlessHistogramDef("total_user_timer_count") + TotalChildExecutionCount = NewDimensionlessHistogramDef("total_child_execution_count") + TotalRequestCancelExternalCount = NewDimensionlessHistogramDef("total_request_cancel_external_count") + TotalSignalExternalCount = NewDimensionlessHistogramDef("total_signal_external_count") + TotalSignalCount = NewDimensionlessHistogramDef("total_signal_count") + DescribeWorkflowSignalBacklinksCount = NewCounterDef( + "describe_workflow_signal_backlinks", + WithDescription("The number of signal backlinks resolved from the CHASM IncomingSignals map in DescribeWorkflow responses."), + ) WorkflowBackoffCount = NewCounterDef("workflow_backoff_timer") WorkflowRetryBackoffTimerCount = NewCounterDef("workflow_retry_backoff_timer") WorkflowCronBackoffTimerCount = NewCounterDef("workflow_cron_backoff_timer") diff --git a/proto/internal/temporal/server/api/historyservice/v1/request_response.proto b/proto/internal/temporal/server/api/historyservice/v1/request_response.proto index 6ec02b539bc..7269a7ca33b 100644 --- a/proto/internal/temporal/server/api/historyservice/v1/request_response.proto +++ b/proto/internal/temporal/server/api/historyservice/v1/request_response.proto @@ -496,7 +496,9 @@ message SignalWorkflowExecutionRequest { bool child_workflow_only = 4; } -message SignalWorkflowExecutionResponse {} +message SignalWorkflowExecutionResponse { + temporal.api.common.v1.Link link = 1; +} message SignalWithStartWorkflowExecutionRequest { option (routing).workflow_id = "signal_with_start_request.workflow_id"; @@ -510,6 +512,7 @@ message SignalWithStartWorkflowExecutionRequest { message SignalWithStartWorkflowExecutionResponse { string run_id = 1; bool started = 2; + temporal.api.common.v1.Link signal_link = 3; } message RemoveSignalMutableStateRequest { diff --git a/service/frontend/workflow_handler.go b/service/frontend/workflow_handler.go index 47af481aa2a..d6a3d5f7f38 100644 --- a/service/frontend/workflow_handler.go +++ b/service/frontend/workflow_handler.go @@ -2320,7 +2320,7 @@ func (wh *WorkflowHandler) SignalWorkflowExecution(ctx context.Context, request return nil, err } - _, err = wh.historyClient.SignalWorkflowExecution(ctx, &historyservice.SignalWorkflowExecutionRequest{ + resp, err := wh.historyClient.SignalWorkflowExecution(ctx, &historyservice.SignalWorkflowExecutionRequest{ NamespaceId: namespaceID.String(), SignalRequest: request, }) @@ -2328,7 +2328,9 @@ func (wh *WorkflowHandler) SignalWorkflowExecution(ctx context.Context, request return nil, err } - return &workflowservice.SignalWorkflowExecutionResponse{}, nil + return &workflowservice.SignalWorkflowExecutionResponse{ + Link: resp.GetLink(), + }, nil } // SignalWithStartWorkflowExecution is used to ensure sending signal to a workflow. @@ -2446,8 +2448,9 @@ func (wh *WorkflowHandler) SignalWithStartWorkflowExecution(ctx context.Context, } return &workflowservice.SignalWithStartWorkflowExecutionResponse{ - RunId: resp.GetRunId(), - Started: resp.Started, + RunId: resp.GetRunId(), + Started: resp.Started, + SignalLink: resp.GetSignalLink(), }, nil } diff --git a/service/history/api/create_workflow_util.go b/service/history/api/create_workflow_util.go index 8f783cab75b..15f0f55258c 100644 --- a/service/history/api/create_workflow_util.go +++ b/service/history/api/create_workflow_util.go @@ -81,6 +81,7 @@ func NewWorkflowWithSignal( signalWithStartRequest.GetSignalInput(), signalWithStartRequest.GetIdentity(), signalWithStartRequest.GetHeader(), + signalWithStartRequest.GetRequestId(), signalWithStartRequest.GetLinks(), ); err != nil { return nil, err diff --git a/service/history/api/describeworkflow/api.go b/service/history/api/describeworkflow/api.go index e3255c2dc09..ca52e2ab638 100644 --- a/service/history/api/describeworkflow/api.go +++ b/service/history/api/describeworkflow/api.go @@ -6,6 +6,7 @@ import ( "fmt" "strconv" + "github.com/google/uuid" "github.com/sony/gobreaker" commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" @@ -15,14 +16,17 @@ import ( enumsspb "go.temporal.io/server/api/enums/v1" "go.temporal.io/server/api/historyservice/v1" persistencespb "go.temporal.io/server/api/persistence/v1" + "go.temporal.io/server/chasm" chasmcallback "go.temporal.io/server/chasm/lib/callback" callbackspb "go.temporal.io/server/chasm/lib/callback/gen/callbackpb/v1" "go.temporal.io/server/chasm/lib/nexusoperation" + chasmworkflow "go.temporal.io/server/chasm/lib/workflow" "go.temporal.io/server/common" "go.temporal.io/server/common/definition" "go.temporal.io/server/common/locks" "go.temporal.io/server/common/log" "go.temporal.io/server/common/log/tag" + "go.temporal.io/server/common/metrics" "go.temporal.io/server/common/namespace" "go.temporal.io/server/common/persistence/visibility/manager" "go.temporal.io/server/components/callbacks" @@ -258,10 +262,27 @@ func Invoke( // Check for CHASM callbacks (regardless of feature flag setting) // Only process CHASM callbacks if we have an actual chasm.Node (not a noopChasmTree) if mutableState.ChasmEnabled() { + wf, chasmCtx, err := mutableState.ChasmWorkflowComponentReadOnly(ctx) + if err != nil { + // Generate a requestID to tag onto errors for ease of debugging. + requestID := uuid.NewString() + shard.GetLogger().Error( + "failed to get workflow component from CHASM tree", + tag.WorkflowNamespaceID(namespaceID.String()), + tag.WorkflowID(executionInfo.WorkflowId), + tag.WorkflowRunID(executionState.RunId), + tag.Error(err), + tag.RequestID(requestID), + ) + return nil, serviceerror.NewInternal( + fmt.Sprintf("failed to construct describe response for requestID: %s", requestID), + ) + } chasmCallbackInfos, err := buildCallbackInfosFromChasm( ctx, namespaceID, - mutableState, + wf, + chasmCtx, executionInfo, executionState, outboundQueueCBPool, @@ -271,6 +292,26 @@ func Invoke( return nil, err } result.Callbacks = append(result.Callbacks, chasmCallbackInfos...) + + if wf.IncomingSignals != nil { + for requestID, incomingSignalDataField := range wf.IncomingSignals { + incomingSignalData := incomingSignalDataField.Get(chasmCtx) + buffered := incomingSignalData.EventId == common.BufferedEventID + info := &workflowpb.RequestIdInfo{ + EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, + Buffered: buffered, + } + if !buffered { + info.EventId = incomingSignalData.EventId + } + result.WorkflowExtendedInfo.RequestIdInfos[requestID] = info + } + if n := len(wf.IncomingSignals); n > 0 { + metrics.DescribeWorkflowSignalBacklinksCount.With( + shard.GetMetricsHandler().WithTags(metrics.NamespaceTag(namespaceName)), + ).Record(int64(n)) + } + } } // Check for HSM callbacks @@ -453,28 +494,18 @@ func buildCallbackInfosFromHSM( return result, nil } -// buildCallbackInfosFromChasm reads callbacks from the CHASM tree and converts them to API format. +// buildCallbackInfosFromChasm reads callbacks from the CHASM workflow component and converts them to API format. +// TODO(long-nt-tran): move this to chasm/lib/workflow/workflow.go to be within the CHASM workflow context. func buildCallbackInfosFromChasm( ctx context.Context, namespaceID namespace.ID, - mutableState historyi.MutableState, + wf *chasmworkflow.Workflow, + chasmCtx chasm.Context, executionInfo *persistencespb.WorkflowExecutionInfo, executionState *persistencespb.WorkflowExecutionState, outboundQueueCBPool *circuitbreakerpool.OutboundQueueCircuitBreakerPool, logger log.Logger, ) ([]*workflowpb.CallbackInfo, error) { - wf, chasmCtx, err := mutableState.ChasmWorkflowComponentReadOnly(ctx) - if err != nil { - logger.Error( - "failed to get workflow component from CHASM tree", - tag.WorkflowNamespaceID(namespaceID.String()), - tag.WorkflowID(executionInfo.WorkflowId), - tag.WorkflowRunID(executionState.RunId), - tag.Error(err), - ) - return nil, serviceerror.NewInternal("failed to construct describe response") - } - result := make([]*workflowpb.CallbackInfo, 0, len(wf.Callbacks)) for _, field := range wf.Callbacks { callback := field.Get(chasmCtx) diff --git a/service/history/api/link_util.go b/service/history/api/link_util.go new file mode 100644 index 00000000000..1c28ae0039f --- /dev/null +++ b/service/history/api/link_util.go @@ -0,0 +1,50 @@ +package api + +import ( + commonpb "go.temporal.io/api/common/v1" + enumspb "go.temporal.io/api/enums/v1" + "go.temporal.io/server/common" +) + +// GenerateStartedEventRefLink builds a Link pointing to the WORKFLOW_EXECUTION_STARTED event. +// Use this for backlinks to workflow start: the started event is always EventId=1 (FirstEventID) +// and is never buffered, so a concrete EventReference is appropriate. +func GenerateStartedEventRefLink(namespace, workflowID, runID string) *commonpb.Link { + return &commonpb.Link{ + Variant: &commonpb.Link_WorkflowEvent_{ + WorkflowEvent: &commonpb.Link_WorkflowEvent{ + Namespace: namespace, + WorkflowId: workflowID, + RunId: runID, + Reference: &commonpb.Link_WorkflowEvent_EventRef{ + EventRef: &commonpb.Link_WorkflowEvent_EventReference{ + EventId: common.FirstEventID, + EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_STARTED, + }, + }, + }, + }, + } +} + +// GenerateRequestIDRefLink builds a Link with a RequestIdReference. +// Use this for events that are buffered at signal time (e.g. SIGNALED), where the +// concrete EventId is not yet known. The server resolves the RequestId to a real +// EventId once the buffer flushes. +func GenerateRequestIDRefLink(namespace, workflowID, runID, requestID string, eventType enumspb.EventType) *commonpb.Link { + return &commonpb.Link{ + Variant: &commonpb.Link_WorkflowEvent_{ + WorkflowEvent: &commonpb.Link_WorkflowEvent{ + Namespace: namespace, + WorkflowId: workflowID, + RunId: runID, + Reference: &commonpb.Link_WorkflowEvent_RequestIdRef{ + RequestIdRef: &commonpb.Link_WorkflowEvent_RequestIdReference{ + RequestId: requestID, + EventType: eventType, + }, + }, + }, + }, + } +} diff --git a/service/history/api/multioperation/api.go b/service/history/api/multioperation/api.go index 332945845a1..91150502b62 100644 --- a/service/history/api/multioperation/api.go +++ b/service/history/api/multioperation/api.go @@ -10,7 +10,6 @@ import ( "go.temporal.io/api/serviceerror" "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/api/matchingservice/v1" - "go.temporal.io/server/common" "go.temporal.io/server/common/definition" "go.temporal.io/server/common/locks" "go.temporal.io/server/common/namespace" @@ -336,20 +335,7 @@ func (uws *updateWithStart) updateWorkflow( RunId: currentWorkflowLease.GetContext().GetWorkflowKey().RunID, Started: false, // set explicitly for emphasis Status: enumspb.WORKFLOW_EXECUTION_STATUS_RUNNING, - Link: &commonpb.Link{ - Variant: &commonpb.Link_WorkflowEvent_{ - WorkflowEvent: &commonpb.Link_WorkflowEvent{ - WorkflowId: wfKey.WorkflowID, - RunId: wfKey.RunID, - Reference: &commonpb.Link_WorkflowEvent_EventRef{ - EventRef: &commonpb.Link_WorkflowEvent_EventReference{ - EventId: common.FirstEventID, - EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_STARTED, - }, - }, - }, - }, - }, + Link: api.GenerateStartedEventRefLink(uws.startReq.StartRequest.GetNamespace(), wfKey.WorkflowID, wfKey.RunID), } return makeResponse(startResp, updateResp), nil diff --git a/service/history/api/removesignalmutablestate/api.go b/service/history/api/removesignalmutablestate/api.go index 9ff4691549f..d53c43204c8 100644 --- a/service/history/api/removesignalmutablestate/api.go +++ b/service/history/api/removesignalmutablestate/api.go @@ -37,8 +37,11 @@ func Invoke( } mutableState.DeleteSignalRequested(req.GetRequestId()) + // This is a no-op if signals are stored in CHASM, since we'll ignore the deletion request, + // see DeleteSignalRequested implementation. + // TODO(long-nt-tran): Clean up code once ChasmSignalBacklinksEnabled is fully rolled out. return &api.UpdateWorkflowAction{ - Noop: false, + Noop: mutableState.ChasmSignalBacklinksEnabled(), CreateWorkflowTask: false, }, nil }, diff --git a/service/history/api/signalwithstartworkflow/api.go b/service/history/api/signalwithstartworkflow/api.go index 8fc6880f797..6ccc4e50aed 100644 --- a/service/history/api/signalwithstartworkflow/api.go +++ b/service/history/api/signalwithstartworkflow/api.go @@ -92,8 +92,16 @@ func Invoke( api.ReactivateVersionWorkflowIfPinned(ctx, namespaceEntry, request.GetVersioningOverride(), reactivationSignaler, shard.GetConfig().EnableVersionReactivationSignals(), shouldSkipReactivation, revisionNumber) } + swr := signalWithStartRequest.SignalWithStartRequest return &historyservice.SignalWithStartWorkflowExecutionResponse{ RunId: runID, Started: started, + SignalLink: api.GenerateRequestIDRefLink( + swr.GetNamespace(), + swr.GetWorkflowId(), + runID, + swr.GetRequestId(), + enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, + ), }, nil } diff --git a/service/history/api/signalwithstartworkflow/signal_with_start_workflow.go b/service/history/api/signalwithstartworkflow/signal_with_start_workflow.go index 0510635ae58..c6a27ea92a3 100644 --- a/service/history/api/signalwithstartworkflow/signal_with_start_workflow.go +++ b/service/history/api/signalwithstartworkflow/signal_with_start_workflow.go @@ -307,6 +307,7 @@ func signalWorkflow( request.GetSignalInput(), request.GetIdentity(), request.GetHeader(), + request.GetRequestId(), request.GetLinks(), ); err != nil { return err diff --git a/service/history/api/signalwithstartworkflow/signal_with_start_workflow_test.go b/service/history/api/signalwithstartworkflow/signal_with_start_workflow_test.go index 3beeb2e7d35..91b89a497f6 100644 --- a/service/history/api/signalwithstartworkflow/signal_with_start_workflow_test.go +++ b/service/history/api/signalwithstartworkflow/signal_with_start_workflow_test.go @@ -140,6 +140,7 @@ func (s *signalWithStartWorkflowSuite) TestSignalWorkflow_NewWorkflowTask() { request.GetSignalInput(), request.GetIdentity(), request.GetHeader(), + request.GetRequestId(), request.GetLinks(), ).Return(&historypb.HistoryEvent{}, nil) s.currentMutableState.EXPECT().HasPendingWorkflowTask().Return(false) @@ -174,6 +175,7 @@ func (s *signalWithStartWorkflowSuite) TestSignalWorkflow_NoNewWorkflowTask() { request.GetSignalInput(), request.GetIdentity(), request.GetHeader(), + request.GetRequestId(), request.GetLinks(), ).Return(&historypb.HistoryEvent{}, nil) s.currentMutableState.EXPECT().HasPendingWorkflowTask().Return(true) @@ -207,6 +209,7 @@ func (s *signalWithStartWorkflowSuite) TestSignalWorkflow_WhenPaused() { request.GetSignalInput(), request.GetIdentity(), request.GetHeader(), + request.GetRequestId(), request.GetLinks(), ).Return(&historypb.HistoryEvent{}, nil) s.currentMutableState.EXPECT().HasPendingWorkflowTask().Return(false) diff --git a/service/history/api/signalworkflow/api.go b/service/history/api/signalworkflow/api.go index f9e112c076d..a7cd4487b83 100644 --- a/service/history/api/signalworkflow/api.go +++ b/service/history/api/signalworkflow/api.go @@ -3,6 +3,7 @@ package signalworkflow import ( "context" + enumspb "go.temporal.io/api/enums/v1" "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/common/definition" "go.temporal.io/server/common/namespace" @@ -27,6 +28,9 @@ func Invoke( externalWorkflowExecution := req.ExternalWorkflowExecution childWorkflowOnly := req.GetChildWorkflowOnly() + // Capture the currently-running workflow's runID from mutableState via our closure, + // in case the caller didn't pin the signal to a specific runID. + var runID string err = api.GetAndUpdateWorkflowWithNew( ctx, nil, @@ -37,6 +41,7 @@ func Invoke( ), func(workflowLease api.WorkflowLease) (*api.UpdateWorkflowAction, error) { mutableState := workflowLease.GetMutableState() + runID = mutableState.GetExecutionState().GetRunId() if request.GetRequestId() != "" && mutableState.IsSignalRequested(request.GetRequestId()) { return &api.UpdateWorkflowAction{ Noop: true, @@ -88,6 +93,7 @@ func Invoke( request.GetIdentity(), request.GetHeader(), externalWorkflowExecution, + request.GetRequestId(), request.GetLinks(), ) if err != nil { @@ -106,5 +112,13 @@ func Invoke( if err != nil { return nil, err } - return &historyservice.SignalWorkflowExecutionResponse{}, nil + return &historyservice.SignalWorkflowExecutionResponse{ + Link: api.GenerateRequestIDRefLink( + request.GetNamespace(), + request.GetWorkflowExecution().GetWorkflowId(), + runID, + request.GetRequestId(), + enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, + ), + }, nil } diff --git a/service/history/api/startworkflow/api.go b/service/history/api/startworkflow/api.go index f5f2d25d004..5127311a50e 100644 --- a/service/history/api/startworkflow/api.go +++ b/service/history/api/startworkflow/api.go @@ -13,7 +13,6 @@ import ( "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/api/matchingservice/v1" "go.temporal.io/server/chasm" - "go.temporal.io/server/common" "go.temporal.io/server/common/definition" "go.temporal.io/server/common/locks" "go.temporal.io/server/common/metrics" @@ -816,39 +815,21 @@ func (s *Starter) generateResponse( } func (s *Starter) generateStartedEventRefLink(runID string) *commonpb.Link { - return &commonpb.Link{ - Variant: &commonpb.Link_WorkflowEvent_{ - WorkflowEvent: &commonpb.Link_WorkflowEvent{ - Namespace: s.namespace.Name().String(), - WorkflowId: s.request.StartRequest.WorkflowId, - RunId: runID, - Reference: &commonpb.Link_WorkflowEvent_EventRef{ - EventRef: &commonpb.Link_WorkflowEvent_EventReference{ - EventId: common.FirstEventID, - EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_STARTED, - }, - }, - }, - }, - } + return api.GenerateStartedEventRefLink( + s.namespace.Name().String(), + s.request.StartRequest.WorkflowId, + runID, + ) } func (s *Starter) generateRequestIdRefLink(runID string) *commonpb.Link { - return &commonpb.Link{ - Variant: &commonpb.Link_WorkflowEvent_{ - WorkflowEvent: &commonpb.Link_WorkflowEvent{ - Namespace: s.namespace.Name().String(), - WorkflowId: s.request.StartRequest.WorkflowId, - RunId: runID, - Reference: &commonpb.Link_WorkflowEvent_RequestIdRef{ - RequestIdRef: &commonpb.Link_WorkflowEvent_RequestIdReference{ - RequestId: s.request.StartRequest.RequestId, - EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_OPTIONS_UPDATED, - }, - }, - }, - }, - } + return api.GenerateRequestIDRefLink( + s.namespace.Name().String(), + s.request.StartRequest.WorkflowId, + runID, + s.request.StartRequest.RequestId, + enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_OPTIONS_UPDATED, + ) } func (s StartOutcome) String() string { diff --git a/service/history/configs/config.go b/service/history/configs/config.go index d932f4feee5..57cc6ae1e1d 100644 --- a/service/history/configs/config.go +++ b/service/history/configs/config.go @@ -72,6 +72,7 @@ type Config struct { MaxCallbacksPerExecution dynamicconfig.IntPropertyFnWithNamespaceFilter EnableChasm dynamicconfig.BoolPropertyFnWithNamespaceFilter EnableCHASMCallbacks dynamicconfig.BoolPropertyFnWithNamespaceFilter + EnableCHASMSignalBacklinks dynamicconfig.BoolPropertyFnWithNamespaceFilter ChasmMaxInMemoryPureTasks dynamicconfig.IntPropertyFn EnableCHASMSchedulerCreation dynamicconfig.BoolPropertyFnWithNamespaceFilter EnableCHASMSchedulerMigration dynamicconfig.BoolPropertyFnWithNamespaceFilter @@ -500,8 +501,9 @@ func NewConfig( EnableCHASMSchedulerCreation: dynamicconfig.EnableCHASMSchedulerCreation.Get(dc), EnableCHASMSchedulerMigration: dynamicconfig.EnableCHASMSchedulerMigration.Get(dc), - EnableCHASMCallbacks: dynamicconfig.EnableCHASMCallbacks.Get(dc), - ExternalPayloadsEnabled: dynamicconfig.ExternalPayloadsEnabled.Get(dc), + EnableCHASMCallbacks: dynamicconfig.EnableCHASMCallbacks.Get(dc), + EnableCHASMSignalBacklinks: dynamicconfig.EnableCHASMSignalBacklinks.Get(dc), + ExternalPayloadsEnabled: dynamicconfig.ExternalPayloadsEnabled.Get(dc), EventsShardLevelCacheMaxSizeBytes: dynamicconfig.EventsCacheMaxSizeBytes.Get(dc), // 512KB EventsHostLevelCacheMaxSizeBytes: dynamicconfig.EventsHostLevelCacheMaxSizeBytes.Get(dc), // 256MB diff --git a/service/history/history_engine_test.go b/service/history/history_engine_test.go index 7cf162b2982..a201a6628a5 100644 --- a/service/history/history_engine_test.go +++ b/service/history/history_engine_test.go @@ -34,6 +34,7 @@ import ( tokenspb "go.temporal.io/server/api/token/v1" workflowspb "go.temporal.io/server/api/workflow/v1" "go.temporal.io/server/chasm" + chasmworkflow "go.temporal.io/server/chasm/lib/workflow" "go.temporal.io/server/common" "go.temporal.io/server/common/clock" "go.temporal.io/server/common/cluster" @@ -5005,97 +5006,173 @@ func (s *engineSuite) TestSignalWorkflowExecution() { // Test signal workflow task by adding request ID func (s *engineSuite) TestSignalWorkflowExecution_DuplicateRequest() { - we := commonpb.WorkflowExecution{ - WorkflowId: "wId2", - RunId: tests.RunID, - } - signalRequest := &historyservice.SignalWorkflowExecutionRequest{ - SignalRequest: &workflowservice.SignalWorkflowExecutionRequest{ - WorkflowExecution: &we, - }, - } - _, err := s.historyEngine.SignalWorkflowExecution(context.Background(), signalRequest) + // Verify error when namespace is missing (independent of CHASM flag). + _, err := s.historyEngine.SignalWorkflowExecution(context.Background(), &historyservice.SignalWorkflowExecutionRequest{ + SignalRequest: &workflowservice.SignalWorkflowExecutionRequest{WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: "wId2", + RunId: tests.RunID, + }}, + }) s.EqualError(err, "Missing namespace UUID.") - taskqueue := "testTaskQueue" - identity := "testIdentity" - signalName := "my signal name 2" - input := payloads.EncodeString("test input 2") - requestID := uuid.NewString() - signalRequest = &historyservice.SignalWorkflowExecutionRequest{ - NamespaceId: tests.NamespaceID.String(), - SignalRequest: &workflowservice.SignalWorkflowExecutionRequest{ - Namespace: tests.NamespaceID.String(), - WorkflowExecution: &we, - Identity: identity, - SignalName: signalName, - Input: input, - RequestId: requestID, - }, - } + for _, tc := range []struct { + name string + chasmEnabled bool + }{ + {name: "Legacy", chasmEnabled: false}, + {name: "Chasm", chasmEnabled: true}, + } { + tc := tc + s.Run(tc.name, func() { + // Use a unique RunId per sub-test to avoid workflow cache collisions + // between the Legacy and Chasm sub-tests. + we := commonpb.WorkflowExecution{ + WorkflowId: "wId2", + RunId: uuid.NewString(), + } - ms := workflow.TestLocalMutableState(s.historyEngine.shardContext, s.eventsCache, - tests.LocalNamespaceEntry, we.GetWorkflowId(), we.GetRunId(), log.NewTestLogger()) - addWorkflowExecutionStartedEvent(ms, &we, "wType", taskqueue, payloads.EncodeString("input"), 100*time.Second, 50*time.Second, 200*time.Second, identity) - addWorkflowTaskScheduledEvent(ms) - wfMs := workflow.TestCloneToProto(context.Background(), ms) - // assume duplicate request id - wfMs.SignalRequestedIds = []string{requestID} - wfMs.ExecutionInfo.NamespaceId = tests.NamespaceID.String() - gwmsResponse := &persistence.GetWorkflowExecutionResponse{State: wfMs} + if tc.chasmEnabled { + s.config.EnableChasm = dynamicconfig.GetBoolPropertyFnFilteredByNamespace(true) + s.config.EnableCHASMSignalBacklinks = dynamicconfig.GetBoolPropertyFnFilteredByNamespace(true) + reg := s.mockShard.ChasmRegistry() + s.NoError(reg.Register(&chasm.CoreLibrary{})) + s.NoError(reg.Register(chasmworkflow.NewLibrary(chasmworkflow.NewRegistry()))) + } - s.mockExecutionMgr.EXPECT().GetWorkflowExecution(gomock.Any(), gomock.Any()).Return(gwmsResponse, nil) + requestID := uuid.NewString() + signalRequest := &historyservice.SignalWorkflowExecutionRequest{ + NamespaceId: tests.NamespaceID.String(), + SignalRequest: &workflowservice.SignalWorkflowExecutionRequest{ + Namespace: tests.NamespaceID.String(), + WorkflowExecution: &we, + Identity: "testIdentity", + SignalName: "my signal name 2", + Input: payloads.EncodeString("test input 2"), + RequestId: requestID, + }, + } - _, err = s.historyEngine.SignalWorkflowExecution(context.Background(), signalRequest) - s.Nil(err) + ms := workflow.TestLocalMutableState(s.historyEngine.shardContext, s.eventsCache, + tests.LocalNamespaceEntry, we.GetWorkflowId(), we.GetRunId(), log.NewTestLogger()) + addWorkflowExecutionStartedEvent(ms, &we, "wType", "testTaskQueue", payloads.EncodeString("input"), 100*time.Second, 50*time.Second, 200*time.Second, "testIdentity") + addWorkflowTaskScheduledEvent(ms) + + if tc.chasmEnabled { + // CHASM path: populate the CHASM IncomingSignals map with the requestID so that + // IsSignalRequested returns true for this ID when the DB record is loaded. + s.NoError(ms.ApplyWorkflowExecutionSignaled(&historypb.HistoryEvent{ + EventId: common.BufferedEventID, + EventTime: timestamppb.New(time.Now()), + EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, + Attributes: &historypb.HistoryEvent_WorkflowExecutionSignaledEventAttributes{ + WorkflowExecutionSignaledEventAttributes: &historypb.WorkflowExecutionSignaledEventAttributes{ + SignalName: "my signal name 2", + RequestId: requestID, + }, + }, + })) + } + + wfMs := workflow.TestCloneToProto(context.Background(), ms) + if !tc.chasmEnabled { + // Legacy path: dedup via the SignalRequestedIds set field. + wfMs.SignalRequestedIds = []string{requestID} + } + wfMs.ExecutionInfo.NamespaceId = tests.NamespaceID.String() + gwmsResponse := &persistence.GetWorkflowExecutionResponse{State: wfMs} + + s.mockExecutionMgr.EXPECT().GetWorkflowExecution(gomock.Any(), gomock.Any()).Return(gwmsResponse, nil) + + _, err := s.historyEngine.SignalWorkflowExecution(context.Background(), signalRequest) + s.NoError(err) + }) + } } // Test signal workflow task by dedup request ID & workflow finished func (s *engineSuite) TestSignalWorkflowExecution_DuplicateRequest_Completed() { - we := commonpb.WorkflowExecution{ - WorkflowId: "wId2", - RunId: tests.RunID, - } - signalRequest := &historyservice.SignalWorkflowExecutionRequest{ - SignalRequest: &workflowservice.SignalWorkflowExecutionRequest{ - WorkflowExecution: &we, - }, - } - _, err := s.historyEngine.SignalWorkflowExecution(context.Background(), signalRequest) + // Verify error when namespace is missing (independent of CHASM flag). + _, err := s.historyEngine.SignalWorkflowExecution(context.Background(), &historyservice.SignalWorkflowExecutionRequest{ + SignalRequest: &workflowservice.SignalWorkflowExecutionRequest{WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: "wId2", + RunId: tests.RunID, + }}, + }) s.EqualError(err, "Missing namespace UUID.") - taskqueue := "testTaskQueue" - identity := "testIdentity" - signalName := "my signal name 2" - input := payloads.EncodeString("test input 2") - requestID := uuid.NewString() - signalRequest = &historyservice.SignalWorkflowExecutionRequest{ - NamespaceId: tests.NamespaceID.String(), - SignalRequest: &workflowservice.SignalWorkflowExecutionRequest{ - Namespace: tests.NamespaceID.String(), - WorkflowExecution: &we, - Identity: identity, - SignalName: signalName, - Input: input, - RequestId: requestID, - }, - } + for _, tc := range []struct { + name string + chasmEnabled bool + }{ + {name: "Legacy", chasmEnabled: false}, + {name: "Chasm", chasmEnabled: true}, + } { + tc := tc + s.Run(tc.name, func() { + // Use a unique RunId per sub-test to avoid workflow cache collisions + // between the Legacy and Chasm sub-tests. + we := commonpb.WorkflowExecution{ + WorkflowId: "wId2", + RunId: uuid.NewString(), + } - ms := workflow.TestLocalMutableState(s.historyEngine.shardContext, s.eventsCache, - tests.LocalNamespaceEntry, we.GetWorkflowId(), we.GetRunId(), log.NewTestLogger()) - addWorkflowExecutionStartedEvent(ms, &we, "wType", taskqueue, payloads.EncodeString("input"), 100*time.Second, 50*time.Second, 200*time.Second, identity) - addWorkflowTaskScheduledEvent(ms) - wfMs := workflow.TestCloneToProto(context.Background(), ms) - // assume duplicate request id - wfMs.SignalRequestedIds = []string{requestID} - wfMs.ExecutionInfo.NamespaceId = tests.NamespaceID.String() - wfMs.ExecutionState.State = enumsspb.WORKFLOW_EXECUTION_STATE_COMPLETED - gwmsResponse := &persistence.GetWorkflowExecutionResponse{State: wfMs} + if tc.chasmEnabled { + s.config.EnableChasm = dynamicconfig.GetBoolPropertyFnFilteredByNamespace(true) + s.config.EnableCHASMSignalBacklinks = dynamicconfig.GetBoolPropertyFnFilteredByNamespace(true) + reg := s.mockShard.ChasmRegistry() + s.NoError(reg.Register(&chasm.CoreLibrary{})) + s.NoError(reg.Register(chasmworkflow.NewLibrary(chasmworkflow.NewRegistry()))) + } - s.mockExecutionMgr.EXPECT().GetWorkflowExecution(gomock.Any(), gomock.Any()).Return(gwmsResponse, nil) + requestID := uuid.NewString() + signalRequest := &historyservice.SignalWorkflowExecutionRequest{ + NamespaceId: tests.NamespaceID.String(), + SignalRequest: &workflowservice.SignalWorkflowExecutionRequest{ + Namespace: tests.NamespaceID.String(), + WorkflowExecution: &we, + Identity: "testIdentity", + SignalName: "my signal name 2", + Input: payloads.EncodeString("test input 2"), + RequestId: requestID, + }, + } - _, err = s.historyEngine.SignalWorkflowExecution(context.Background(), signalRequest) - s.Nil(err) + ms := workflow.TestLocalMutableState(s.historyEngine.shardContext, s.eventsCache, + tests.LocalNamespaceEntry, we.GetWorkflowId(), we.GetRunId(), log.NewTestLogger()) + addWorkflowExecutionStartedEvent(ms, &we, "wType", "testTaskQueue", payloads.EncodeString("input"), 100*time.Second, 50*time.Second, 200*time.Second, "testIdentity") + addWorkflowTaskScheduledEvent(ms) + + if tc.chasmEnabled { + // CHASM path: populate the CHASM IncomingSignals map with the requestID so that + // IsSignalRequested returns true for this ID when the DB record is loaded. + s.NoError(ms.ApplyWorkflowExecutionSignaled(&historypb.HistoryEvent{ + EventId: common.BufferedEventID, + EventTime: timestamppb.New(time.Now()), + EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, + Attributes: &historypb.HistoryEvent_WorkflowExecutionSignaledEventAttributes{ + WorkflowExecutionSignaledEventAttributes: &historypb.WorkflowExecutionSignaledEventAttributes{ + SignalName: "my signal name 2", + RequestId: requestID, + }, + }, + })) + } + + wfMs := workflow.TestCloneToProto(context.Background(), ms) + if !tc.chasmEnabled { + // Legacy path: dedup via the SignalRequestedIds set field. + wfMs.SignalRequestedIds = []string{requestID} + } + wfMs.ExecutionInfo.NamespaceId = tests.NamespaceID.String() + wfMs.ExecutionState.State = enumsspb.WORKFLOW_EXECUTION_STATE_COMPLETED + gwmsResponse := &persistence.GetWorkflowExecutionResponse{State: wfMs} + + s.mockExecutionMgr.EXPECT().GetWorkflowExecution(gomock.Any(), gomock.Any()).Return(gwmsResponse, nil) + + _, err := s.historyEngine.SignalWorkflowExecution(context.Background(), signalRequest) + s.NoError(err) + }) + } } func (s *engineSuite) TestSignalWorkflowExecution_Failed() { diff --git a/service/history/historybuilder/event_factory.go b/service/history/historybuilder/event_factory.go index c6368ad9542..0d2cdb04695 100644 --- a/service/history/historybuilder/event_factory.go +++ b/service/history/historybuilder/event_factory.go @@ -819,6 +819,7 @@ func (b *EventFactory) CreateWorkflowExecutionSignaledEvent( identity string, header *commonpb.Header, externalWorkflowExecution *commonpb.WorkflowExecution, + requestID string, links []*commonpb.Link, ) *historypb.HistoryEvent { event := b.createHistoryEvent(enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, b.timeSource.Now()) @@ -829,6 +830,7 @@ func (b *EventFactory) CreateWorkflowExecutionSignaledEvent( Identity: identity, Header: header, ExternalWorkflowExecution: externalWorkflowExecution, + RequestId: requestID, }, } event.Links = links diff --git a/service/history/historybuilder/event_store.go b/service/history/historybuilder/event_store.go index 3bdec9807a2..8e00db4643e 100644 --- a/service/history/historybuilder/event_store.go +++ b/service/history/historybuilder/event_store.go @@ -405,6 +405,12 @@ func (b *EventStore) wireEventIDs( if attributes.GetAttachedRequestId() != "" { b.requestIDToEventID[attributes.AttachedRequestId] = event.GetEventId() } + + case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED: + attributes := event.GetWorkflowExecutionSignaledEventAttributes() + if attributes.GetRequestId() != "" { + b.requestIDToEventID[attributes.RequestId] = event.GetEventId() + } } } } diff --git a/service/history/historybuilder/history_builder.go b/service/history/historybuilder/history_builder.go index 6da2fc185e7..35416a48396 100644 --- a/service/history/historybuilder/history_builder.go +++ b/service/history/historybuilder/history_builder.go @@ -752,6 +752,7 @@ func (b *HistoryBuilder) AddWorkflowExecutionSignaledEvent( identity string, header *commonpb.Header, externalWorkflowExecution *commonpb.WorkflowExecution, + requestID string, links []*commonpb.Link, ) *historypb.HistoryEvent { event := b.EventFactory.CreateWorkflowExecutionSignaledEvent( @@ -760,6 +761,7 @@ func (b *HistoryBuilder) AddWorkflowExecutionSignaledEvent( identity, header, externalWorkflowExecution, + requestID, links, ) event, _ = b.EventStore.add(event) diff --git a/service/history/historybuilder/history_builder_categorization_test.go b/service/history/historybuilder/history_builder_categorization_test.go index 97e825ed7fe..14a9029a1c0 100644 --- a/service/history/historybuilder/history_builder_categorization_test.go +++ b/service/history/historybuilder/history_builder_categorization_test.go @@ -223,6 +223,29 @@ func TestHistoryBuilder_FlushBufferToCurrentBatch(t *testing.T) { } }) + t.Run("signal requestID should be wired into requestIDToEventID map after flush", func(t *testing.T) { + nextEventID := int64(12) + hb := newHistoryBuilderFromConfig(builderConfig{nextEventId: nextEventID}) + // Signal events are buffered (go to memBufferBatch) + signalEvent := hb.AddWorkflowExecutionSignaledEvent("signal-name", nil, "identity-1", nil, nil, "signal-request-id", nil) + if signalEvent.EventId != common.BufferedEventID { + t.Fatalf("expected signal to be buffered, got event id %d", signalEvent.EventId) + } + + _, requestIDToEventID := hb.FlushBufferToCurrentBatch() + + if signalEvent.EventId != nextEventID { + t.Errorf("expected signal event id %d after flush, got %d", nextEventID, signalEvent.EventId) + } + eventID, ok := requestIDToEventID["signal-request-id"] + if !ok { + t.Fatal("signal requestID not found in requestIDToEventID map after flush") + } + if eventID != nextEventID { + t.Errorf("expected requestIDToEventID[signal-request-id] == %d, got %d", nextEventID, eventID) + } + }) + t.Run("when there is ACTIVITY_TASK_COMPLETED event will move it to the end", func(t *testing.T) { hb := newHistoryBuilderFromConfig(builderConfig{nextEventId: 12}) hb.AddActivityTaskCompletedEvent(14, 13, "activity-completed", nil, defaultNamespace) @@ -1473,6 +1496,7 @@ func (s *sutTestingAdapter) AddWorkflowExecutionSignaledEvent(_ ...eventConfig) "identity-1", nil, nil, + "", nil, ) } diff --git a/service/history/historybuilder/history_builder_test.go b/service/history/historybuilder/history_builder_test.go index f87cfdf0555..1e66efb6498 100644 --- a/service/history/historybuilder/history_builder_test.go +++ b/service/history/historybuilder/history_builder_test.go @@ -332,7 +332,7 @@ func (s *historyBuilderSuite) TestWorkflowExecutionCancelRequested() { func (s *historyBuilderSuite) TestWorkflowExecutionSignaled() { signalName := "random signal name" event := s.historyBuilder.AddWorkflowExecutionSignaledEvent( - signalName, testPayloads, testIdentity, testHeader, nil, nil, + signalName, testPayloads, testIdentity, testHeader, nil, "", nil, ) s.Equal(event, s.flush()) s.Equal(&historypb.HistoryEvent{ @@ -2372,6 +2372,7 @@ func (s *historyBuilderSuite) TestBufferSize_Memory() { "identity", &commonpb.Header{}, nil, + "", nil, ) s.Assert().Equal(1, s.historyBuilder.NumBufferedEvents()) diff --git a/service/history/interfaces/mutable_state.go b/service/history/interfaces/mutable_state.go index 277368a26ed..a0b85253cf7 100644 --- a/service/history/interfaces/mutable_state.go +++ b/service/history/interfaces/mutable_state.go @@ -103,6 +103,7 @@ type ( input *commonpb.Payloads, identity string, header *commonpb.Header, + requestID string, links []*commonpb.Link, ) (*historypb.HistoryEvent, error) AddWorkflowExecutionSignaledEvent( @@ -111,6 +112,7 @@ type ( identity string, header *commonpb.Header, externalWorkflowExecution *commonpb.WorkflowExecution, + requestID string, links []*commonpb.Link, ) (*historypb.HistoryEvent, error) AddWorkflowExecutionStartedEvent(*commonpb.WorkflowExecution, *historyservice.StartWorkflowExecutionRequest) (*historypb.HistoryEvent, error) @@ -345,6 +347,7 @@ type ( IsWorkflow() bool ChasmTree() ChasmTree ChasmEnabled() bool + ChasmSignalBacklinksEnabled() bool ChasmWorkflowComponent(ctx context.Context) (*chasmworkflow.Workflow, chasm.MutableContext, error) ChasmWorkflowComponentReadOnly(ctx context.Context) (*chasmworkflow.Workflow, chasm.Context, error) // Ensures that the chasm workflow component is installed in the mutable state CHASM tree. diff --git a/service/history/interfaces/mutable_state_mock.go b/service/history/interfaces/mutable_state_mock.go index 5fcba16e810..20378e67ac4 100644 --- a/service/history/interfaces/mutable_state_mock.go +++ b/service/history/interfaces/mutable_state_mock.go @@ -708,33 +708,33 @@ func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionPausedEvent(identity } // AddWorkflowExecutionSignaled mocks base method. -func (m *MockMutableState) AddWorkflowExecutionSignaled(signalName string, input *common.Payloads, identity string, header *common.Header, links []*common.Link) (*history.HistoryEvent, error) { +func (m *MockMutableState) AddWorkflowExecutionSignaled(signalName string, input *common.Payloads, identity string, header *common.Header, requestID string, links []*common.Link) (*history.HistoryEvent, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "AddWorkflowExecutionSignaled", signalName, input, identity, header, links) + ret := m.ctrl.Call(m, "AddWorkflowExecutionSignaled", signalName, input, identity, header, requestID, links) ret0, _ := ret[0].(*history.HistoryEvent) ret1, _ := ret[1].(error) return ret0, ret1 } // AddWorkflowExecutionSignaled indicates an expected call of AddWorkflowExecutionSignaled. -func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionSignaled(signalName, input, identity, header, links any) *gomock.Call { +func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionSignaled(signalName, input, identity, header, requestID, links any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddWorkflowExecutionSignaled", reflect.TypeOf((*MockMutableState)(nil).AddWorkflowExecutionSignaled), signalName, input, identity, header, links) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddWorkflowExecutionSignaled", reflect.TypeOf((*MockMutableState)(nil).AddWorkflowExecutionSignaled), signalName, input, identity, header, requestID, links) } // AddWorkflowExecutionSignaledEvent mocks base method. -func (m *MockMutableState) AddWorkflowExecutionSignaledEvent(signalName string, input *common.Payloads, identity string, header *common.Header, externalWorkflowExecution *common.WorkflowExecution, links []*common.Link) (*history.HistoryEvent, error) { +func (m *MockMutableState) AddWorkflowExecutionSignaledEvent(signalName string, input *common.Payloads, identity string, header *common.Header, externalWorkflowExecution *common.WorkflowExecution, requestID string, links []*common.Link) (*history.HistoryEvent, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "AddWorkflowExecutionSignaledEvent", signalName, input, identity, header, externalWorkflowExecution, links) + ret := m.ctrl.Call(m, "AddWorkflowExecutionSignaledEvent", signalName, input, identity, header, externalWorkflowExecution, requestID, links) ret0, _ := ret[0].(*history.HistoryEvent) ret1, _ := ret[1].(error) return ret0, ret1 } // AddWorkflowExecutionSignaledEvent indicates an expected call of AddWorkflowExecutionSignaledEvent. -func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionSignaledEvent(signalName, input, identity, header, externalWorkflowExecution, links any) *gomock.Call { +func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionSignaledEvent(signalName, input, identity, header, externalWorkflowExecution, requestID, links any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddWorkflowExecutionSignaledEvent", reflect.TypeOf((*MockMutableState)(nil).AddWorkflowExecutionSignaledEvent), signalName, input, identity, header, externalWorkflowExecution, links) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddWorkflowExecutionSignaledEvent", reflect.TypeOf((*MockMutableState)(nil).AddWorkflowExecutionSignaledEvent), signalName, input, identity, header, externalWorkflowExecution, requestID, links) } // AddWorkflowExecutionStartedEvent mocks base method. @@ -1722,6 +1722,20 @@ func (mr *MockMutableStateMockRecorder) ChasmEnabled() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ChasmEnabled", reflect.TypeOf((*MockMutableState)(nil).ChasmEnabled)) } +// ChasmSignalBacklinksEnabled mocks base method. +func (m *MockMutableState) ChasmSignalBacklinksEnabled() bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ChasmSignalBacklinksEnabled") + ret0, _ := ret[0].(bool) + return ret0 +} + +// ChasmSignalBacklinksEnabled indicates an expected call of ChasmSignalBacklinksEnabled. +func (mr *MockMutableStateMockRecorder) ChasmSignalBacklinksEnabled() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ChasmSignalBacklinksEnabled", reflect.TypeOf((*MockMutableState)(nil).ChasmSignalBacklinksEnabled)) +} + // ChasmTree mocks base method. func (m *MockMutableState) ChasmTree() ChasmTree { m.ctrl.T.Helper() diff --git a/service/history/ndc/events_reapplier_test.go b/service/history/ndc/events_reapplier_test.go index ce3c4b88a43..2f5995dc160 100644 --- a/service/history/ndc/events_reapplier_test.go +++ b/service/history/ndc/events_reapplier_test.go @@ -217,6 +217,7 @@ func (s *nDCEventReapplicationSuite) TestReapplyEvents_AppliedEvent_Signal() { attr.GetInput(), attr.GetIdentity(), attr.GetHeader(), + "", event.Links, ).Return(event, nil) msCurrent.EXPECT().HSM().Return(s.hsmNode).AnyTimes() @@ -361,6 +362,7 @@ func (s *nDCEventReapplicationSuite) TestReapplyEvents_PartialAppliedEvent() { attr1.GetInput(), attr1.GetIdentity(), attr1.GetHeader(), + "", event1.Links, ).Return(event1, nil) msCurrent.EXPECT().IsWorkflowPendingOnWorkflowTaskBackoff().Return(true) @@ -408,6 +410,7 @@ func (s *nDCEventReapplicationSuite) TestReapplyEvents_Error() { attr.GetInput(), attr.GetIdentity(), attr.GetHeader(), + "", event.Links, ).Return(nil, fmt.Errorf("test")) dedupResource := definition.NewEventReappliedID(runID, event.GetEventId(), event.GetVersion()) @@ -507,6 +510,7 @@ func (s *nDCEventReapplicationSuite) TestReapplyEvents_AppliedEvent_NoPendingWor attr.GetInput(), attr.GetIdentity(), attr.GetHeader(), + "", event.Links, ).Return(event, nil) msCurrent.EXPECT().HSM().Return(s.hsmNode).AnyTimes() @@ -560,6 +564,7 @@ func (s *nDCEventReapplicationSuite) TestReapplyEvents_PausedWorkflow_NoWorkflow attr.GetInput(), attr.GetIdentity(), attr.GetHeader(), + "", event.Links, ).Return(event, nil) msCurrent.EXPECT().HSM().Return(s.hsmNode).AnyTimes() diff --git a/service/history/ndc/workflow_resetter.go b/service/history/ndc/workflow_resetter.go index d0e6bbf610a..26d7fb1c537 100644 --- a/service/history/ndc/workflow_resetter.go +++ b/service/history/ndc/workflow_resetter.go @@ -873,6 +873,7 @@ func reapplyEvents( attr.GetInput(), attr.GetIdentity(), attr.GetHeader(), + attr.GetRequestId(), event.Links, ); err != nil { return reappliedEvents, err diff --git a/service/history/ndc/workflow_resetter_test.go b/service/history/ndc/workflow_resetter_test.go index 21118ce1f45..4da5bcfc064 100644 --- a/service/history/ndc/workflow_resetter_test.go +++ b/service/history/ndc/workflow_resetter_test.go @@ -1045,6 +1045,7 @@ func (s *workflowResetterSuite) TestReapplyEvents() { Input: payloads.EncodeString("signal-input-1"), Identity: "signal-identity-1", Header: &commonpb.Header{Fields: map[string]*commonpb.Payload{"myheader": {Data: []byte("myheader")}}}, + RequestId: "signal-request-id-1", }}, } // This event is not reapplied @@ -1063,6 +1064,7 @@ func (s *workflowResetterSuite) TestReapplyEvents() { SignalName: "signal-name-2", Input: payloads.EncodeString("signal-input-2"), Identity: "signal-identity-2", + RequestId: "signal-request-id-2", }, }, } @@ -1213,6 +1215,7 @@ func (s *workflowResetterSuite) TestReapplyEvents() { attr.GetInput(), attr.GetIdentity(), attr.GetHeader(), + attr.GetRequestId(), event.Links, ).Return(&historypb.HistoryEvent{}, nil) case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ADMITTED: @@ -1315,7 +1318,7 @@ func (s *workflowResetterSuite) TestReapplyEvents_Excludes() { ms := historyi.NewMockMutableState(s.controller) // Assert that none of these following methods are invoked. arg := gomock.Any() - ms.EXPECT().AddWorkflowExecutionSignaled(arg, arg, arg, arg, arg).Times(0) + ms.EXPECT().AddWorkflowExecutionSignaled(arg, arg, arg, arg, arg, arg).Times(0) ms.EXPECT().AddWorkflowExecutionUpdateAdmittedEvent(arg, arg).Times(0) ms.EXPECT().AddHistoryEvent(arg, arg).Times(0) diff --git a/service/history/workflow/mutable_state_impl.go b/service/history/workflow/mutable_state_impl.go index 4339f4d39f3..1e4d679feb1 100644 --- a/service/history/workflow/mutable_state_impl.go +++ b/service/history/workflow/mutable_state_impl.go @@ -684,6 +684,11 @@ func (ms *MutableStateImpl) chasmCallbacksEnabled() bool { return ms.shard.GetConfig().EnableCHASMCallbacks(ms.GetNamespaceEntry().Name().String()) } +// ChasmSignalBacklinksEnabled returns true if CHASM-based signal requestID backlink tracking is enabled. +func (ms *MutableStateImpl) ChasmSignalBacklinksEnabled() bool { + return ms.ChasmEnabled() && ms.shard.GetConfig().EnableCHASMSignalBacklinks(ms.GetNamespaceEntry().Name().String()) +} + // ChasmWorkflowComponent gets the root workflow component from the CHASM tree. // Returns the workflow component (which is *chasmworkflow.Workflow) and the CHASM mutable context. // This method is for write operations. Callers can type assert to *chasmworkflow.Workflow if needed. @@ -2430,8 +2435,22 @@ func (ms *MutableStateImpl) IsWorkflowCloseAttempted() bool { func (ms *MutableStateImpl) IsSignalRequested( requestID string, ) bool { - _, ok := ms.pendingSignalRequestedIDs[requestID] - return ok + // First check CHASM map, then fallback to existing set fields -- will be cleaned up once we + // fully ramp the writes to CHASM only. + signalExists := false + if ms.ChasmSignalBacklinksEnabled() { + wf, chasmCtx, err := ms.ChasmWorkflowComponentReadOnly(context.Background()) + if err != nil { + softassert.Fail(ms.logger, fmt.Sprintf("Unexpected error reading CHASM component: %v", err)) + } + signalExists = wf.HasIncomingSignalEvent(chasmCtx, requestID) + } + + // TODO(long-nt-tran): Remove fallback to existing map once we fully roll out writes to CHASM signals map + if !signalExists { + _, signalExists = ms.pendingSignalRequestedIDs[requestID] + } + return signalExists } func (ms *MutableStateImpl) IsWorkflowPendingOnWorkflowTaskBackoff() bool { @@ -2455,6 +2474,12 @@ func (ms *MutableStateImpl) GetApproximatePersistedSize() int { func (ms *MutableStateImpl) AddSignalRequested( requestID string, ) { + if ms.ChasmSignalBacklinksEnabled() { + // Signal deduplication is managed by CHASM IncomingSignals; the CHASM write + // happens in ApplyWorkflowExecutionSignaled. + // TODO(long-nt-tran): Cleanup this path after ChasmSignalBacklinksEnabled is rolled out. + return + } if ms.pendingSignalRequestedIDs == nil { ms.pendingSignalRequestedIDs = make(map[string]struct{}) } @@ -2469,6 +2494,11 @@ func (ms *MutableStateImpl) AddSignalRequested( func (ms *MutableStateImpl) DeleteSignalRequested( requestID string, ) { + if ms.ChasmSignalBacklinksEnabled() { + // Signal IDs are kept in CHASM IncomingSignals for backlink resolution. + // TODO(long-nt-tran): Clean up this path after config is rolled out. + return + } delete(ms.pendingSignalRequestedIDs, requestID) delete(ms.updateSignalRequestedIDs, requestID) ms.deleteSignalRequestedIDs[requestID] = struct{}{} @@ -5770,6 +5800,7 @@ func (ms *MutableStateImpl) AddWorkflowExecutionSignaled( input *commonpb.Payloads, identity string, header *commonpb.Header, + requestID string, links []*commonpb.Link, ) (*historypb.HistoryEvent, error) { return ms.AddWorkflowExecutionSignaledEvent( @@ -5778,6 +5809,7 @@ func (ms *MutableStateImpl) AddWorkflowExecutionSignaled( identity, header, nil, + requestID, links, ) } @@ -5788,6 +5820,7 @@ func (ms *MutableStateImpl) AddWorkflowExecutionSignaledEvent( identity string, header *commonpb.Header, externalWorkflowExecution *commonpb.WorkflowExecution, + requestID string, links []*commonpb.Link, ) (*historypb.HistoryEvent, error) { opTag := tag.WorkflowActionWorkflowSignaled @@ -5801,6 +5834,7 @@ func (ms *MutableStateImpl) AddWorkflowExecutionSignaledEvent( identity, header, externalWorkflowExecution, + requestID, links, ) if err := ms.ApplyWorkflowExecutionSignaled(event); err != nil { @@ -5810,10 +5844,40 @@ func (ms *MutableStateImpl) AddWorkflowExecutionSignaledEvent( } func (ms *MutableStateImpl) ApplyWorkflowExecutionSignaled( - _ *historypb.HistoryEvent, + event *historypb.HistoryEvent, ) error { // Increment signal count in mutable state for this workflow execution ms.executionInfo.SignalCount++ + + // Add signal requestID to workflow CHASM tree (if feature is enabled) + signalEventAttrs, ok := event.GetAttributes().(*historypb.HistoryEvent_WorkflowExecutionSignaledEventAttributes) + if !ok { + return softassert.UnexpectedInternalErr( + ms.logger, + fmt.Sprintf( + "Expect ApplyWorkflowExecutionSignaled to be called only on signal events, but called from: %v", + event, + ), + nil, + ) + } + requestID := signalEventAttrs.WorkflowExecutionSignaledEventAttributes.GetRequestId() + if requestID != "" && ms.ChasmSignalBacklinksEnabled() { + ctx := context.Background() + ms.EnsureChasmWorkflowComponent(ctx) + wf, chasmCtx, err := ms.ChasmWorkflowComponent(ctx) + if err != nil { + return err + } + // Persist the signal requestID to the current eventID. + // - For buffered events (normal processing path), event.GetEventId() returns the common.BufferedEventID and will be resolved later. + // - For already-persisted events (rebuild/replay path), event.GetEventId() returns the real history event ID. + nsTag := metrics.NamespaceTag(ms.GetNamespaceEntry().Name().String()) + if err := wf.AddIncomingSignalEvent(chasmCtx, requestID, event.GetEventId()); err != nil { + return err + } + metrics.ChasmIncomingSignalWritten.With(ms.metricsHandler.WithTags(nsTag)).Record(1) + } return nil } @@ -8143,7 +8207,7 @@ func (ms *MutableStateImpl) dirtyHSMToReplicationTask( func (ms *MutableStateImpl) updatePendingEventIDs( scheduledIDToStartedID map[int64]int64, requestIDToEventID map[string]int64, -) { +) error { for scheduledEventID, startedEventID := range scheduledIDToStartedID { if activityInfo, ok := ms.GetActivityInfo(scheduledEventID); ok { activityInfo.StartedEventId = startedEventID @@ -8158,12 +8222,29 @@ func (ms *MutableStateImpl) updatePendingEventIDs( } } if len(requestIDToEventID) > 0 { + var wf *chasmworkflow.Workflow + var chasmCtx chasm.MutableContext + var err error + if ms.ChasmSignalBacklinksEnabled() { + wf, chasmCtx, err = ms.ChasmWorkflowComponent(context.Background()) + if err != nil { + return err + } + } + for requestID, eventID := range requestIDToEventID { if requestIDInfo, ok := ms.executionState.RequestIds[requestID]; ok { requestIDInfo.EventId = eventID } + if wf != nil { + // UpdateIncomingSignalEvent is a no-op for non-signal request IDs as they won't exist in the map. + if err := wf.UpdateIncomingSignalEvent(chasmCtx, requestID, eventID); err != nil { + return err + } + } } } + return nil } func (ms *MutableStateImpl) updateWithLastWriteEvent( @@ -8893,6 +8974,7 @@ func (ms *MutableStateImpl) applyUpdatesToStateMachineNodes( } func (ms *MutableStateImpl) applySignalRequestedIds(signalRequestedIds []string, incomingExecutionInfo *persistencespb.WorkflowExecutionInfo) { + // TODO(long-nt-tran): Deprecate this function once we fully ramp up writing signals to workflow CHASM component if transitionhistory.Compare( incomingExecutionInfo.SignalRequestIdsLastUpdateVersionedTransition, ms.executionInfo.SignalRequestIdsLastUpdateVersionedTransition, diff --git a/service/history/workflow/mutable_state_impl_test.go b/service/history/workflow/mutable_state_impl_test.go index 24de0d9ac43..464c90fc4e9 100644 --- a/service/history/workflow/mutable_state_impl_test.go +++ b/service/history/workflow/mutable_state_impl_test.go @@ -2763,6 +2763,7 @@ func (s *mutableStateSuite) TestTotalEntitiesCount() { &commonpb.Payloads{}, "identity", &commonpb.Header{}, + "", nil, ) s.NoError(err) @@ -3321,6 +3322,7 @@ func (s *mutableStateSuite) TestCloseTransactionUpdateTransition() { "identity", &commonpb.Header{}, nil, + "", nil, ) if err != nil { @@ -6507,6 +6509,7 @@ func (s *mutableStateSuite) TestCloseTransaction_PrincipalPreserved() { "alice-identity", &commonpb.Header{}, nil, + "", nil, ) s.NoError(err) @@ -6526,6 +6529,7 @@ func (s *mutableStateSuite) TestCloseTransaction_PrincipalPreserved() { "bob-identity", &commonpb.Header{}, nil, + "", nil, ) s.NoError(err) diff --git a/service/history/workflow/workflow_test/mutable_state_impl_test.go b/service/history/workflow/workflow_test/mutable_state_impl_test.go index 2f8ea11428a..0d47e564b5b 100644 --- a/service/history/workflow/workflow_test/mutable_state_impl_test.go +++ b/service/history/workflow/workflow_test/mutable_state_impl_test.go @@ -186,6 +186,7 @@ func addWorkflowExecutionSignaled(t *testing.T, i int, ms *workflow.MutableState payload, identity, header, + "", nil, ) if err != nil { diff --git a/tests/links_test.go b/tests/links_test.go index 210808219b1..987053bfdd8 100644 --- a/tests/links_test.go +++ b/tests/links_test.go @@ -11,6 +11,7 @@ import ( taskqueuepb "go.temporal.io/api/taskqueue/v1" "go.temporal.io/api/workflowservice/v1" "go.temporal.io/sdk/client" + "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/testing/parallelsuite" "go.temporal.io/server/common/testing/protorequire" "go.temporal.io/server/tests/testcore" @@ -36,6 +37,13 @@ var links = []*commonpb.Link{ }, } +func enableSignalBacklinkOpts() []testcore.TestOption { + return []testcore.TestOption{ + testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), + testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSignalBacklinks, true), + } +} + func (s *LinksSuite) TestTerminateWorkflow_LinksAttachedToEvent() { env := testcore.NewEnv(s.T()) ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) @@ -49,7 +57,6 @@ func (s *LinksSuite) TestTerminateWorkflow_LinksAttachedToEvent() { ) s.NoError(err) - // TODO(bergundy): Use SdkClient if and when it exposes links on TerminateWorkflow. _, err = env.FrontendClient().TerminateWorkflowExecution(ctx, &workflowservice.TerminateWorkflowExecutionRequest{ Namespace: env.Namespace().String(), WorkflowExecution: &commonpb.WorkflowExecution{ @@ -60,6 +67,7 @@ func (s *LinksSuite) TestTerminateWorkflow_LinksAttachedToEvent() { }) s.NoError(err) + // TODO(bergundy): Use SdkClient if and when it exposes links on TerminateWorkflow. history := env.SdkClient().GetWorkflowHistory(ctx, run.GetID(), "", false, enumspb.HISTORY_EVENT_FILTER_TYPE_CLOSE_EVENT) event, err := history.Next() s.NoError(err) @@ -79,7 +87,6 @@ func (s *LinksSuite) TestRequestCancelWorkflow_LinksAttachedToEvent() { ) s.NoError(err) - // TODO(bergundy): Use SdkClient if and when it exposes links on CancelWorkflow. _, err = env.FrontendClient().RequestCancelWorkflowExecution(ctx, &workflowservice.RequestCancelWorkflowExecutionRequest{ Namespace: env.Namespace().String(), WorkflowExecution: &commonpb.WorkflowExecution{ @@ -90,6 +97,7 @@ func (s *LinksSuite) TestRequestCancelWorkflow_LinksAttachedToEvent() { }) s.NoError(err) + // TODO(bergundy): Use SdkClient if and when it exposes links on CancelWorkflow. history := env.SdkClient().GetWorkflowHistory(ctx, run.GetID(), "", false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) foundEvent := false for history.HasNext() { @@ -105,7 +113,7 @@ func (s *LinksSuite) TestRequestCancelWorkflow_LinksAttachedToEvent() { } func (s *LinksSuite) TestSignalWorkflowExecution_LinksAttachedToEvent() { - env := testcore.NewEnv(s.T()) + env := testcore.NewEnv(s.T(), enableSignalBacklinkOpts()...) ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) defer cancel() run, err := env.SdkClient().ExecuteWorkflow( @@ -117,8 +125,7 @@ func (s *LinksSuite) TestSignalWorkflowExecution_LinksAttachedToEvent() { ) s.NoError(err) - // TODO(bergundy): Use SdkClient if and when it exposes links on SignalWorkflow. - _, err = env.FrontendClient().SignalWorkflowExecution(ctx, &workflowservice.SignalWorkflowExecutionRequest{ + req := &workflowservice.SignalWorkflowExecutionRequest{ Namespace: env.Namespace().String(), WorkflowExecution: &commonpb.WorkflowExecution{ WorkflowId: run.GetID(), @@ -127,31 +134,256 @@ func (s *LinksSuite) TestSignalWorkflowExecution_LinksAttachedToEvent() { Identity: "test", RequestId: uuid.NewString(), Links: links, - }) + } + expectedLink := &commonpb.Link{ + Variant: &commonpb.Link_WorkflowEvent_{ + WorkflowEvent: &commonpb.Link_WorkflowEvent{ + Namespace: env.Namespace().String(), + WorkflowId: run.GetID(), + RunId: run.GetRunID(), + Reference: &commonpb.Link_WorkflowEvent_RequestIdRef{ + RequestIdRef: &commonpb.Link_WorkflowEvent_RequestIdReference{ + RequestId: req.RequestId, + EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, + }, + }, + }, + }, + } + + // TODO(bergundy): Use SdkClient if and when it exposes links on SignalWorkflow. + resp, err := env.FrontendClient().SignalWorkflowExecution(ctx, req) + s.NoError(err) + protorequire.ProtoEqual(s.T(), expectedLink, resp.GetLink()) + + // Second call with same RequestId hits the dedup path but must still return the same link. + resp, err = env.FrontendClient().SignalWorkflowExecution(ctx, req) s.NoError(err) + protorequire.ProtoEqual(s.T(), expectedLink, resp.GetLink()) history := env.SdkClient().GetWorkflowHistory(ctx, run.GetID(), "", false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) foundEvent := false + foundDuplicatedEvent := false + var signaledEventID int64 for history.HasNext() { event, err := history.Next() s.NoError(err) if event.EventType != enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED { continue } + if foundEvent { + foundDuplicatedEvent = true + } else { + signaledEventID = event.GetEventId() + } foundEvent = true protorequire.ProtoSliceEqual(s.T(), links, event.Links) } s.True(foundEvent) + s.False(foundDuplicatedEvent, "second signal with same RequestId should be deduped and not produce a second event") + + // Verify the requestID is tracked and resolves to the correct event ID. + descResp, err := env.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + Execution: &commonpb.WorkflowExecution{ + WorkflowId: run.GetID(), + }, + }) + s.NoError(err) + requestIDInfos := descResp.GetWorkflowExtendedInfo().GetRequestIdInfos() + s.Contains(requestIDInfos, req.RequestId) + info := requestIDInfos[req.RequestId] + s.Equal(enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, info.GetEventType()) + s.Equal(signaledEventID, info.GetEventId(), "requestID map entry must point to the SIGNALED event in history") +} + +// TestSignalWorkflowExecution_BacklinkSurvivesReset verifies that after a workflow is reset, +// the new run's CHASM IncomingSignals map is rebuilt from history so that DescribeWorkflow +// continues to return a valid requestID -> event-ID backlink for signals that occurred before +// the reset point. +// +// This exercises the rebuild/replay path through ApplyWorkflowExecutionSignaled, which uses +// the event's real event ID (not common.BufferedEventID) when writing to the CHASM tree. +func (s *LinksSuite) TestSignalWorkflowExecution_BacklinkSurvivesReset() { + env := testcore.NewEnv(s.T(), enableSignalBacklinkOpts()...) + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + taskQueue := testcore.RandomizeStr(s.T().Name()) + workflowID := testcore.RandomizeStr(s.T().Name()) + + // Start the workflow. + run, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: workflowID, + TaskQueue: taskQueue, + }, "dont-care") + s.NoError(err) + runID := run.GetRunID() + + signalRequestID := uuid.NewString() + + // Signal the workflow. The signal will be included in the first WFT batch, so it will + // appear in history before the WFT completion event. + _, err = env.FrontendClient().SignalWorkflowExecution(ctx, &workflowservice.SignalWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{WorkflowId: workflowID, RunId: runID}, + SignalName: "dont-care", + Identity: "test", + RequestId: signalRequestID, + Links: links, + }) + s.NoError(err) + + // Poll and complete the WFT so the signal is flushed to history with a real event ID. + pollResp, pollErr := env.FrontendClient().PollWorkflowTaskQueue(ctx, &workflowservice.PollWorkflowTaskQueueRequest{ + Namespace: env.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: "test", + }) + s.NoError(pollErr) + s.NotNil(pollResp.GetTaskToken()) + _, completeErr := env.FrontendClient().RespondWorkflowTaskCompleted(ctx, &workflowservice.RespondWorkflowTaskCompletedRequest{ + Namespace: env.Namespace().String(), + Identity: "test", + TaskToken: pollResp.TaskToken, + }) + s.NoError(completeErr) + + // Find the WFT completed event ID in the original run's history. + var wftCompletedEventID int64 + history := env.SdkClient().GetWorkflowHistory(ctx, workflowID, runID, false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + for history.HasNext() { + event, histErr := history.Next() + s.NoError(histErr) + if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_TASK_COMPLETED { + wftCompletedEventID = event.EventId + break + } + } + s.Positive(wftCompletedEventID, "WFT completed event not found in history") + + // Reset the workflow to the first WFT completion. The signal event is before this point, + // so it will be included in the new run's replayed history. + resetResp, err := env.FrontendClient().ResetWorkflowExecution(ctx, &workflowservice.ResetWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: workflowID, + RunId: runID, + }, + Reason: "testing-backlink-survival", + RequestId: uuid.NewString(), + WorkflowTaskFinishEventId: wftCompletedEventID, + }) + s.NoError(err) + newRunID := resetResp.RunId + s.NotEmpty(newRunID) + + // During reset, ApplyWorkflowExecutionSignaled rebuilds the CHASM IncomingSignals map + // from history, so the backlink should be present once the new run is created. + descResp, descErr := env.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: workflowID, RunId: newRunID}, + }) + s.NoError(descErr) + _, signalExists := descResp.GetWorkflowExtendedInfo().GetRequestIdInfos()[signalRequestID] + s.True(signalExists) + + // Verify the backlink on the new run points to a real (non-buffered) SIGNALED event. + descResp, err = env.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: workflowID, RunId: newRunID}, + }) + s.NoError(err) + requestIDInfos := descResp.GetWorkflowExtendedInfo().GetRequestIdInfos() + s.Contains(requestIDInfos, signalRequestID) + info := requestIDInfos[signalRequestID] + s.Equal(enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, info.GetEventType()) + s.Positive(info.GetEventId(), "backlink event ID must be a real, non-buffered event ID in the new run's history") + s.False(info.GetBuffered()) +} + +// TestSignalWorkflowExecution_BufferedDuringWorkflowTask verifies that when a signal arrives +// while a workflow task is being processed, DescribeWorkflow reports the backlink as buffered. +// Once the workflow task completes and the signal is flushed to history, the backlink must +// reflect a real (non-buffered) event ID. +func (s *LinksSuite) TestSignalWorkflowExecution_BufferedDuringWorkflowTask() { + env := testcore.NewEnv(s.T(), enableSignalBacklinkOpts()...) + + taskQueue := testcore.RandomizeStr(s.T().Name()) + workflowID := testcore.RandomizeStr(s.T().Name()) + + run, err := env.SdkClient().ExecuteWorkflow(env.Context(), client.StartWorkflowOptions{ + ID: workflowID, + TaskQueue: taskQueue, + // Use a really long WFT timeout to avoid flakiness when we're checking that the signal is buffered. + WorkflowTaskTimeout: 60 * time.Second, + }, "dont-care") + s.NoError(err) + runID := run.GetRunID() + + // Poll to move the WFT into "started" state to have the server wait for us to complete it. + // This will force the signal to stay in the buffer until the task is finished. + pollResp, err := env.FrontendClient().PollWorkflowTaskQueue(env.Context(), &workflowservice.PollWorkflowTaskQueueRequest{ + Namespace: env.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: "test", + }) + s.NoError(err) + s.NotNil(pollResp.GetTaskToken()) + + // This signal will be buffered since there is a WFT in-flight. + signalRequestID := uuid.NewString() + _, err = env.FrontendClient().SignalWorkflowExecution(env.Context(), &workflowservice.SignalWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{WorkflowId: workflowID, RunId: runID}, + SignalName: "dont-care", + Identity: "test", + RequestId: signalRequestID, + Links: links, + }) + s.NoError(err) + + // WFT is still running: backlink must be present and marked buffered. + descResp, err := env.FrontendClient().DescribeWorkflowExecution(env.Context(), &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: workflowID, RunId: runID}, + }) + s.NoError(err) + requestIDInfos := descResp.GetWorkflowExtendedInfo().GetRequestIdInfos() + s.Contains(requestIDInfos, signalRequestID) + info := requestIDInfos[signalRequestID] + s.Equal(enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, info.GetEventType()) + s.True(info.GetBuffered(), "backlink must be buffered while WFT is in progress") + + // Complete the WFT, which flushes the signal to DB with a concrete EventID. + _, err = env.FrontendClient().RespondWorkflowTaskCompleted(env.Context(), &workflowservice.RespondWorkflowTaskCompletedRequest{ + Namespace: env.Namespace().String(), + Identity: "test", + TaskToken: pollResp.TaskToken, + }) + s.NoError(err) + + // After WFT completion the backlink must resolve to a real, non-buffered event. + descResp, err = env.FrontendClient().DescribeWorkflowExecution(env.Context(), &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: workflowID, RunId: runID}, + }) + s.NoError(err) + requestIDInfos = descResp.GetWorkflowExtendedInfo().GetRequestIdInfos() + s.Contains(requestIDInfos, signalRequestID) + info = requestIDInfos[signalRequestID] + s.Equal(enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, info.GetEventType()) + s.False(info.GetBuffered(), "backlink must not be buffered after WFT completion") + s.Positive(info.GetEventId(), "backlink must reference a real event ID after WFT completion") } func (s *LinksSuite) TestSignalWithStartWorkflowExecution_LinksAttachedToRelevantEvents() { - env := testcore.NewEnv(s.T()) + env := testcore.NewEnv(s.T(), enableSignalBacklinkOpts()...) ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) defer cancel() workflowID := testcore.RandomizeStr(s.T().Name()) - // TODO(bergundy): Use SdkClient if and when it exposes links on SignalWithStartWorkflow. request := &workflowservice.SignalWithStartWorkflowExecutionRequest{ Namespace: env.Namespace().String(), WorkflowId: workflowID, @@ -166,26 +398,74 @@ func (s *LinksSuite) TestSignalWithStartWorkflowExecution_LinksAttachedToRelevan RequestId: uuid.NewString(), Links: links, } - _, err := env.FrontendClient().SignalWithStartWorkflowExecution(ctx, request) + + // TODO(bergundy): Use SdkClient if and when it exposes links on SignalWithStartWorkflow. + resp, err := env.FrontendClient().SignalWithStartWorkflowExecution(ctx, request) s.NoError(err) + firstRunID := resp.GetRunId() + protorequire.ProtoEqual( + s.T(), + &commonpb.Link{ + Variant: &commonpb.Link_WorkflowEvent_{ + WorkflowEvent: &commonpb.Link_WorkflowEvent{ + Namespace: env.Namespace().String(), + WorkflowId: workflowID, + RunId: firstRunID, + Reference: &commonpb.Link_WorkflowEvent_RequestIdRef{ + RequestIdRef: &commonpb.Link_WorkflowEvent_RequestIdReference{ + RequestId: request.RequestId, + EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, + }, + }, + }, + }, + }, + resp.GetSignalLink(), + ) + + firstRequestID := request.RequestId // Send a second request and verify that the new signal has links attached to it too. request.RequestId = uuid.NewString() - _, err = env.FrontendClient().SignalWithStartWorkflowExecution(ctx, request) + resp, err = env.FrontendClient().SignalWithStartWorkflowExecution(ctx, request) s.NoError(err) + // Expect backlinks with the same RunID as before since the workflow execution didn't change, + // but the signal requestID should differ since this is a different request. + protorequire.ProtoEqual( + s.T(), + &commonpb.Link{ + Variant: &commonpb.Link_WorkflowEvent_{ + WorkflowEvent: &commonpb.Link_WorkflowEvent{ + Namespace: env.Namespace().String(), + WorkflowId: workflowID, + RunId: resp.GetRunId(), + Reference: &commonpb.Link_WorkflowEvent_RequestIdRef{ + RequestIdRef: &commonpb.Link_WorkflowEvent_RequestIdReference{ + RequestId: request.RequestId, // This requestID should differ from the first backlink. + EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, + }, + }, + }, + }, + }, + resp.GetSignalLink(), + ) history := env.SdkClient().GetWorkflowHistory(ctx, workflowID, "", false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) foundStartEvent := false foundFirstSignal := false foundSecondSignal := false + var firstSignalEventID, secondSignalEventID int64 for history.HasNext() { event, err := history.Next() s.NoError(err) if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED { if foundFirstSignal { foundSecondSignal = true + secondSignalEventID = event.GetEventId() } else { foundFirstSignal = true + firstSignalEventID = event.GetEventId() } protorequire.ProtoSliceEqual(s.T(), links, event.Links) } @@ -197,4 +477,24 @@ func (s *LinksSuite) TestSignalWithStartWorkflowExecution_LinksAttachedToRelevan s.True(foundStartEvent) s.True(foundFirstSignal) s.True(foundSecondSignal) + + // Verify both requestIDs are tracked and resolve to the correct signal event IDs. + descResp, err := env.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + Execution: &commonpb.WorkflowExecution{ + WorkflowId: workflowID, + }, + }) + s.NoError(err) + requestIDInfos := descResp.GetWorkflowExtendedInfo().GetRequestIdInfos() + + s.Contains(requestIDInfos, firstRequestID) + firstInfo := requestIDInfos[firstRequestID] + s.Equal(enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, firstInfo.GetEventType()) + s.Equal(firstSignalEventID, firstInfo.GetEventId(), "first requestID map entry must point to the first SIGNALED event in history") + + s.Contains(requestIDInfos, request.RequestId) + secondInfo := requestIDInfos[request.RequestId] + s.Equal(enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED, secondInfo.GetEventType()) + s.Equal(secondSignalEventID, secondInfo.GetEventId(), "second requestID map entry must point to the second SIGNALED event in history") } diff --git a/tests/nexus_workflow_test.go b/tests/nexus_workflow_test.go index a17dacbfb9c..35e3b3a59bc 100644 --- a/tests/nexus_workflow_test.go +++ b/tests/nexus_workflow_test.go @@ -2208,11 +2208,30 @@ func (s *NexusWorkflowTestSuite) TestNexusAsyncOperationWithMultipleCallers(chas s.NoError(err) requestIDInfos := descResp.GetWorkflowExtendedInfo().GetRequestIdInfos() s.NotNil(requestIDInfos) - s.Len(requestIDInfos, 1) + cntStarted := 0 + cntSignaled := 0 for _, info := range requestIDInfos { s.False(info.Buffered) s.GreaterOrEqual(info.EventId, common.FirstEventID) - s.Equal(enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_STARTED, info.EventType) + switch info.EventType { + case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_STARTED: + cntStarted++ + case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED: + // The Signal event's request ID is attached to the requestIDInfos map for backlinking, + // so it should be present here. + cntSignaled++ + default: + s.Fail("Unexpected event type in request ID info") + } + } + s.Equal(1, cntStarted) + + // DescribeWorkflowExecution only incorporates signal events if CHASM is enabled since these signal requests + // are stored under the Workflow's CHASM component. + if chasmEnabled { + s.Equal(1, cntSignaled) + } else { + s.Equal(0, cntSignaled) } }, }, @@ -2230,6 +2249,7 @@ func (s *NexusWorkflowTestSuite) TestNexusAsyncOperationWithMultipleCallers(chas s.NotNil(requestIDInfos) cntStarted := 0 cntAttached := 0 + cntSignaled := 0 for _, info := range requestIDInfos { s.False(info.Buffered) s.GreaterOrEqual(info.EventId, common.FirstEventID) @@ -2238,11 +2258,23 @@ func (s *NexusWorkflowTestSuite) TestNexusAsyncOperationWithMultipleCallers(chas cntStarted++ case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_OPTIONS_UPDATED: cntAttached++ + case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED: + // The Signal event's request ID is attached to the requestIDInfos map for backlinking, + // so it should be present here. + cntSignaled++ default: s.Fail("Unexpected event type in request ID info") } } s.Equal(1, cntStarted) + + // DescribeWorkflowExecution only incorporates signal events if CHASM is enabled since these signal requests + // are stored under the Workflow's CHASM component. + if chasmEnabled { + s.Equal(1, cntSignaled) + } else { + s.Equal(0, cntSignaled) + } s.Equal(numCalls-1, cntAttached) }, }, diff --git a/tests/signal_workflow_test.go b/tests/signal_workflow_test.go index 7e764d0dac0..0c782e91a5a 100644 --- a/tests/signal_workflow_test.go +++ b/tests/signal_workflow_test.go @@ -34,12 +34,23 @@ type SignalWorkflowTestSuite struct { parallelsuite.Suite[*SignalWorkflowTestSuite] } -func TestSignalWorkflowTestSuite(t *testing.T) { - parallelsuite.Run(t, &SignalWorkflowTestSuite{}) +func TestSignalWorkflowTestSuiteLegacy(t *testing.T) { + parallelsuite.Run(t, &SignalWorkflowTestSuite{}, []testcore.TestOption{}) } -func (s *SignalWorkflowTestSuite) TestSignalWorkflow() { - env := testcore.NewEnv(s.T()) +func TestSignalWorkflowTestSuiteChasm(t *testing.T) { + parallelsuite.Run( + t, + &SignalWorkflowTestSuite{}, + []testcore.TestOption{ + testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), + testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSignalBacklinks, true), + }, + ) +} + +func (s *SignalWorkflowTestSuite) TestSignalWorkflow(opts []testcore.TestOption) { + env := testcore.NewEnv(s.T(), opts...) id := "functional-signal-workflow-test" wt := "functional-signal-workflow-test-type" tl := "functional-signal-workflow-test-taskqueue" @@ -231,8 +242,8 @@ func (s *SignalWorkflowTestSuite) TestSignalWorkflow() { s.IsType(&serviceerror.NotFound{}, err) } -func (s *SignalWorkflowTestSuite) TestSignalWorkflow_DuplicateRequest() { - env := testcore.NewEnv(s.T()) +func (s *SignalWorkflowTestSuite) TestSignalWorkflow_DuplicateRequest(opts []testcore.TestOption) { + env := testcore.NewEnv(s.T(), opts...) id := "functional-signal-workflow-test-duplicate" wt := "functional-signal-workflow-test-duplicate-type" tl := "functional-signal-workflow-test-duplicate-taskqueue" @@ -372,9 +383,15 @@ func (s *SignalWorkflowTestSuite) TestSignalWorkflow_DuplicateRequest() { s.Equal(0, numOfSignaledEvent) } -func (s *SignalWorkflowTestSuite) TestSignalExternalWorkflowCommand() { - env := testcore.NewEnv(s.T(), testcore.WithDedicatedCluster()) - env.OverrideDynamicConfig(dynamicconfig.EnableCrossNamespaceCommands, true) // explicitly enable cross namespace commands for this test +func (s *SignalWorkflowTestSuite) TestSignalExternalWorkflowCommand(opts []testcore.TestOption) { + // Explicitly enable cross namespace commands for this test, + // need a dedicated cluster to enable cross namespace commands + opts = append( + opts, + testcore.WithDedicatedCluster(), + testcore.WithDynamicConfig(dynamicconfig.EnableCrossNamespaceCommands, true), + ) + env := testcore.NewEnv(s.T(), opts...) id := "functional-signal-external-workflow-test" wt := "functional-signal-external-workflow-test-type" tl := "functional-signal-external-workflow-test-taskqueue" @@ -590,8 +607,8 @@ CheckHistoryLoopForSignalSent: s.Equal("history-service", signalEvent.GetWorkflowExecutionSignaledEventAttributes().Identity) } -func (s *SignalWorkflowTestSuite) TestSignalWorkflow_Cron_NoWorkflowTaskCreated() { - env := testcore.NewEnv(s.T()) +func (s *SignalWorkflowTestSuite) TestSignalWorkflow_Cron_NoWorkflowTaskCreated(opts []testcore.TestOption) { + env := testcore.NewEnv(s.T(), opts...) id := "functional-signal-workflow-test-cron" wt := "functional-signal-workflow-test-cron-type" tl := "functional-signal-workflow-test-cron-taskqueue" @@ -667,8 +684,8 @@ func (s *SignalWorkflowTestSuite) TestSignalWorkflow_Cron_NoWorkflowTaskCreated( s.Greater(workflowTaskDelay, time.Second*2) } -func (s *SignalWorkflowTestSuite) TestSignalWorkflow_WorkflowCloseAttempted() { - env := testcore.NewEnv(s.T()) +func (s *SignalWorkflowTestSuite) TestSignalWorkflow_WorkflowCloseAttempted(opts []testcore.TestOption) { + env := testcore.NewEnv(s.T(), opts...) id := "functional-signal-workflow-workflow-close-attempted-test" wt := "functional-signal-workflow-workflow-close-attempted-test-type" tl := "functional-signal-workflow-workflow-close-attempted-test-taskqueue" @@ -751,9 +768,15 @@ func (s *SignalWorkflowTestSuite) TestSignalWorkflow_WorkflowCloseAttempted() { s.NoError(err) } -func (s *SignalWorkflowTestSuite) TestSignalExternalWorkflowCommand_WithoutRunID() { - env := testcore.NewEnv(s.T(), testcore.WithDedicatedCluster()) - env.OverrideDynamicConfig(dynamicconfig.EnableCrossNamespaceCommands, true) // explicitly enable cross namespace commands for this test +func (s *SignalWorkflowTestSuite) TestSignalExternalWorkflowCommand_WithoutRunID(opts []testcore.TestOption) { + // Explicitly enable cross namespace commands for this test, + // need a dedicated cluster to enable cross namespace commands + opts = append( + opts, + testcore.WithDedicatedCluster(), + testcore.WithDynamicConfig(dynamicconfig.EnableCrossNamespaceCommands, true), + ) + env := testcore.NewEnv(s.T(), opts...) id := "functional-signal-external-workflow-test-without-run-id" wt := "functional-signal-external-workflow-test-without-run-id-type" tl := "functional-signal-external-workflow-test-without-run-id-taskqueue" @@ -965,9 +988,15 @@ CheckHistoryLoopForSignalSent: s.Equal("history-service", signalEvent.GetWorkflowExecutionSignaledEventAttributes().Identity) } -func (s *SignalWorkflowTestSuite) TestSignalExternalWorkflowCommand_UnKnownTarget() { - env := testcore.NewEnv(s.T(), testcore.WithDedicatedCluster()) - env.OverrideDynamicConfig(dynamicconfig.EnableCrossNamespaceCommands, true) // explicitly enable cross namespace commands for this test +func (s *SignalWorkflowTestSuite) TestSignalExternalWorkflowCommand_UnKnownTarget(opts []testcore.TestOption) { + // Explicitly enable cross namespace commands for this test, + // need a dedicated cluster to enable cross namespace commands + opts = append( + opts, + testcore.WithDedicatedCluster(), + testcore.WithDynamicConfig(dynamicconfig.EnableCrossNamespaceCommands, true), + ) + env := testcore.NewEnv(s.T(), opts...) id := "functional-signal-unknown-workflow-command-test" wt := "functional-signal-unknown-workflow-command-test-type" tl := "functional-signal-unknown-workflow-command-test-taskqueue" @@ -1089,8 +1118,8 @@ CheckHistoryLoopForCancelSent: 12 WorkflowTaskScheduled`, we.RunId), historyEvents) } -func (s *SignalWorkflowTestSuite) TestSignalExternalWorkflowCommand_SignalSelf() { - env := testcore.NewEnv(s.T()) +func (s *SignalWorkflowTestSuite) TestSignalExternalWorkflowCommand_SignalSelf(opts []testcore.TestOption) { + env := testcore.NewEnv(s.T(), opts...) id := "functional-signal-self-workflow-command-test" wt := "functional-signal-self-workflow-command-test-type" tl := "functional-signal-self-workflow-command-test-taskqueue" @@ -1212,8 +1241,8 @@ CheckHistoryLoopForCancelSent: 12 WorkflowTaskScheduled`, we.RunId, id), historyEvents) } -func (s *SignalWorkflowTestSuite) TestSignalWithStartWorkflow() { - env := testcore.NewEnv(s.T()) +func (s *SignalWorkflowTestSuite) TestSignalWithStartWorkflow(opts []testcore.TestOption) { + env := testcore.NewEnv(s.T(), opts...) id := "functional-signal-with-start-workflow-test" wt := "functional-signal-with-start-workflow-test-type" tl := "functional-signal-with-start-workflow-test-taskqueue" @@ -1488,8 +1517,8 @@ func (s *SignalWorkflowTestSuite) TestSignalWithStartWorkflow() { s.Len(listClosedResp.Executions, 1) } -func (s *SignalWorkflowTestSuite) TestSignalWithStartWorkflow_ResolveIDDeduplication() { - env := testcore.NewEnv(s.T()) +func (s *SignalWorkflowTestSuite) TestSignalWithStartWorkflow_ResolveIDDeduplication(opts []testcore.TestOption) { + env := testcore.NewEnv(s.T(), opts...) // setting this to 0 to be sure we are terminating the current workflow env.OverrideDynamicConfig(dynamicconfig.WorkflowIdReuseMinimalInterval, 0) @@ -1683,8 +1712,8 @@ func (s *SignalWorkflowTestSuite) TestSignalWithStartWorkflow_ResolveIDDeduplica s.Equal(enumspb.WORKFLOW_EXECUTION_STATUS_RUNNING, descResp.WorkflowExecutionInfo.Status) } -func (s *SignalWorkflowTestSuite) TestSignalWithStartWorkflow_StartDelay() { - env := testcore.NewEnv(s.T()) +func (s *SignalWorkflowTestSuite) TestSignalWithStartWorkflow_StartDelay(opts []testcore.TestOption) { + env := testcore.NewEnv(s.T(), opts...) id := "functional-signal-with-start-workflow-start-delay-test" wt := "functional-signal-with-start-workflow-start-delay-test-type" tl := "functional-signal-with-start-workflow-start-delay-test-taskqueue" diff --git a/tests/update_workflow_test.go b/tests/update_workflow_test.go index 3bcc88742fb..e16367866b7 100644 --- a/tests/update_workflow_test.go +++ b/tests/update_workflow_test.go @@ -5168,6 +5168,13 @@ func (s *UpdateWithStartSuite) TestWorkflowIsRunning() { startResp := uwsRes.response.Responses[0].GetStartWorkflow() updateRep := uwsRes.response.Responses[1].GetUpdateWorkflow() requireNotStartedButRunning(s.T(), startResp) + s.NotNil(startResp.Link) + wfEvent := startResp.Link.GetWorkflowEvent() + s.Equal(env.Namespace().String(), wfEvent.GetNamespace()) + s.Equal(env.Tv().WorkflowID(), wfEvent.GetWorkflowId()) + s.Equal(startResp.RunId, wfEvent.GetRunId()) + s.Equal(int64(common.FirstEventID), wfEvent.GetEventRef().GetEventId()) + s.Equal(enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_STARTED, wfEvent.GetEventRef().GetEventType()) s.Equal("success-result-of-"+env.Tv().UpdateID(), testcore.DecodeString(s.T(), updateRep.GetOutcome().GetSuccess())) // poll update to ensure same outcome is returned @@ -5218,6 +5225,13 @@ func (s *UpdateWithStartSuite) TestWorkflowIsRunning() { startResp := uwsRes.response.Responses[0].GetStartWorkflow() updateRep := uwsRes.response.Responses[1].GetUpdateWorkflow() requireNotStartedButRunning(s.T(), startResp) + s.NotNil(startResp.Link) + wfEvent := startResp.Link.GetWorkflowEvent() + s.Equal(env.Namespace().String(), wfEvent.GetNamespace()) + s.Equal(env.Tv().WorkflowID(), wfEvent.GetWorkflowId()) + s.Equal(startResp.RunId, wfEvent.GetRunId()) + s.Equal(int64(common.FirstEventID), wfEvent.GetEventRef().GetEventId()) + s.Equal(enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_STARTED, wfEvent.GetEventRef().GetEventType()) s.Equal("rejection-of-"+env.Tv().UpdateID(), updateRep.GetOutcome().GetFailure().GetMessage()) // poll update to ensure same outcome is returned From c714f202e93e010ab40c2bb93cfa825eb293b3f7 Mon Sep 17 00:00:00 2001 From: feiyang Date: Fri, 15 May 2026 10:23:37 -0700 Subject: [PATCH 35/73] ts-patch-3: time skipping halts when there are pending external transfer tasks (#10279) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What changed? When there are pending external transfer tasks (signals/cancellation), time doesn't skip. ## Why? 1. This follows the foundational principle that time skipping only occurs when there are no in-flight tasks. 2. In scenarios where a workflow selector is waiting on both a timer and a SignalExternalWorkflow, the timer should not fire before the signal is delivered — see the functional test added in this pr for a concrete example. ## How did you test it? - [x] built - [x] run locally and tested manually - [ ] covered by existing tests - [x] added new unit test(s) - [x] added new functional test(s) --- ...ng_timer.go => timeskipping_timer_task.go} | 0 .../history/workflow/mutable_state_impl.go | 19 ++-- .../workflow/mutable_state_impl_test.go | 14 +++ tests/timeskipping_test.go | 94 +++++++++++++++++++ 4 files changed, 116 insertions(+), 11 deletions(-) rename service/history/tasks/{timeskipping_timer.go => timeskipping_timer_task.go} (100%) diff --git a/service/history/tasks/timeskipping_timer.go b/service/history/tasks/timeskipping_timer_task.go similarity index 100% rename from service/history/tasks/timeskipping_timer.go rename to service/history/tasks/timeskipping_timer_task.go diff --git a/service/history/workflow/mutable_state_impl.go b/service/history/workflow/mutable_state_impl.go index 1e4d679feb1..e5d410475cf 100644 --- a/service/history/workflow/mutable_state_impl.go +++ b/service/history/workflow/mutable_state_impl.go @@ -9752,7 +9752,6 @@ func snapshotTimeSkippingInfo(source *persistencespb.WorkflowExecutionInfo) (*wo return tsc, initialSkipped } -// hasInflightWorkToPreventTimeSkipping checks if there is no-inflight work and time can skip. func (ms *MutableStateImpl) hasInflightWorkToPreventTimeSkipping() (bool, string) { if ms.HasPendingWorkflowTask() { return true, "has pending workflow task" @@ -9760,17 +9759,18 @@ func (ms *MutableStateImpl) hasInflightWorkToPreventTimeSkipping() (bool, string if len(ms.GetPendingActivityInfos()) > 0 { return true, "has pending activity" } + if nexusoperations.MachineCollection(ms.HSM()).Size() > 0 { + return true, "has pending nexus operations" + } if len(ms.GetPendingChildExecutionInfos()) > 0 { return true, "has pending child execution" } - if nexusoperations.MachineCollection(ms.HSM()).Size() > 0 { - return true, "has pending nexus operations" + if len(ms.GetPendingSignalExternalInfos()) > 0 { + return true, "has pending signal external" + } + if len(ms.GetPendingRequestCancelExternalInfos()) > 0 { + return true, "has pending request cancel external" } - - // TODO@time-skipping: handle pending external transfer tasks - // (signals and cancel requests), their completion is not guaranteed to trigger - // a mutable state mutation — and without one, time skipping won't be re-triggered. - // We need a separate mechanism to catch these missed trigger opportunities. return false, "" } @@ -9797,13 +9797,10 @@ func (ms *MutableStateImpl) shouldExecuteTimeSkipping() (bool, *timeSkippingTran ) } }() - if !ms.IsWorkflowExecutionRunning() { noSkippingReason = "workflow is not running" return false, nil } - - // pending work exists if hasPendingWork, detailedReason := ms.hasInflightWorkToPreventTimeSkipping(); hasPendingWork { noSkippingReason = fmt.Sprintf("pending work: %s", detailedReason) return false, nil diff --git a/service/history/workflow/mutable_state_impl_test.go b/service/history/workflow/mutable_state_impl_test.go index 464c90fc4e9..df62e74f7d2 100644 --- a/service/history/workflow/mutable_state_impl_test.go +++ b/service/history/workflow/mutable_state_impl_test.go @@ -6617,6 +6617,20 @@ func (s *mutableStateSuite) TestHasInflightWorkToPreventTimeSkipping() { s.True(hasPendingWork) s.Equal("has pending nexus operations", reason) }) + + s.Run("TrueWhenPendingSignalExternal", func() { + s.mutableState.pendingSignalInfoIDs[1] = &persistencespb.SignalInfo{} + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.True(hasPendingWork) + s.Equal("has pending signal external", reason) + }) + + s.Run("TrueWhenPendingRequestCancelExternal", func() { + s.mutableState.pendingRequestCancelInfoIDs[1] = &persistencespb.RequestCancelInfo{} + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.True(hasPendingWork) + s.Equal("has pending request cancel external", reason) + }) } func (s *mutableStateSuite) TestShouldExecuteTimeSkipping() { diff --git a/tests/timeskipping_test.go b/tests/timeskipping_test.go index 191101493d0..7b968f72d61 100644 --- a/tests/timeskipping_test.go +++ b/tests/timeskipping_test.go @@ -13,6 +13,8 @@ import ( updatepb "go.temporal.io/api/update/v1" workflowpb "go.temporal.io/api/workflow/v1" "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/sdk/converter" + "go.temporal.io/sdk/workflow" enumsspb "go.temporal.io/server/api/enums/v1" "go.temporal.io/server/chasm" "go.temporal.io/server/common" @@ -496,6 +498,98 @@ func (s *TimeSkippingTestSuite) TestTimeSkipping_TimerAndActivity() { s.True(hasEventType(history, enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_COMPLETED), "workflow must complete") } +func (s *TimeSkippingTestSuite) TestTimeSkipping_PendingSignalExternalBlocksSkip() { + env := testcore.NewEnv(s.T()) + env.OverrideDynamicConfig(dynamicconfig.TimeSkippingEnabled, true) + tv := testvars.New(s.T()) + ctx := env.Context() + + // Target workflow B. No worker polls B; the SignalExternal RPC will land a + // WorkflowExecutionSignaled event in B's history directly. Distinct task + // queue so B's idle first WT can't interfere with the SDK worker. + tvB := tv.WithWorkflowIDNumber(2).WithTaskQueueNumber(2) + _, err := env.FrontendClient().StartWorkflowExecution(ctx, &workflowservice.StartWorkflowExecutionRequest{ + RequestId: uuid.NewString(), + Namespace: env.Namespace().String(), + WorkflowId: tvB.WorkflowID(), + WorkflowType: tvB.WorkflowType(), + TaskQueue: tvB.TaskQueue(), + WorkflowRunTimeout: durationpb.New(2 * time.Hour), + WorkflowTaskTimeout: durationpb.New(10 * time.Second), + }) + s.NoError(err) + + // CoordinatorWorkflow: emits the 1h timer and the SignalExternal command in + // the same WFT response, then waits for whichever future resolves first. + coordinatorWorkflow := func(wfCtx workflow.Context, targetWorkflowID string) error { + timerFuture := workflow.NewTimer(wfCtx, time.Hour) + signalFuture := workflow.SignalExternalWorkflow( + wfCtx, targetWorkflowID, "", "test-pending-signal", nil) + + workflow.NewSelector(wfCtx). + AddFuture(timerFuture, func(_ workflow.Future) {}). + AddFuture(signalFuture, func(_ workflow.Future) {}). + Select(wfCtx) + return nil + } + const coordinatorTypeName = "CoordinatorWorkflow" + env.SdkWorker().RegisterWorkflowWithOptions(coordinatorWorkflow, workflow.RegisterOptions{ + Name: coordinatorTypeName, + }) + + // SDK's StartWorkflowOptions doesn't expose TimeSkippingConfig (as of SDK + // v1.41), so start workflow A directly through the frontend. Use the SDK + // worker's task queue so the registered coordinator picks up the WT. + input, err := converter.GetDefaultDataConverter().ToPayloads(tvB.WorkflowID()) + s.NoError(err) + + tvA := tv.WithWorkflowIDNumber(1) + wallStart := time.Now() + aResp, err := env.FrontendClient().StartWorkflowExecution(ctx, &workflowservice.StartWorkflowExecutionRequest{ + RequestId: uuid.NewString(), + Namespace: env.Namespace().String(), + WorkflowId: tvA.WorkflowID(), + WorkflowType: &commonpb.WorkflowType{Name: coordinatorTypeName}, + TaskQueue: &taskqueuepb.TaskQueue{Name: env.WorkerTaskQueue(), Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Input: input, + WorkflowRunTimeout: durationpb.New(2 * time.Hour), + WorkflowTaskTimeout: durationpb.New(10 * time.Second), + TimeSkippingConfig: &workflowpb.TimeSkippingConfig{Enabled: true}, + }) + s.NoError(err) + + // Wait for A to finish through the SDK. + err = env.SdkClient().GetWorkflow(ctx, tvA.WorkflowID(), aResp.RunId).Get(ctx, nil) + s.NoError(err) + wallElapsed := time.Since(wallStart) + + history := env.GetHistory(env.Namespace().String(), &commonpb.WorkflowExecution{ + WorkflowId: tvA.WorkflowID(), + RunId: aResp.RunId, + }) + + // The signal future resolved (its completion event landed in A's history). + s.True(hasEventType(history, enumspb.EVENT_TYPE_EXTERNAL_WORKFLOW_EXECUTION_SIGNALED), + "ExternalWorkflowExecutionSignaled event must appear in A's history") + + // The timer never fired — the signal won the Selector. If the new branch in + // hasInflightWorkToPreventTimeSkipping were missing, skip could fire at WT1 + // close, shift the timer to near-now, and race the signal — making this + // assertion flaky. + s.False(hasEventType(history, enumspb.EVENT_TYPE_TIMER_FIRED), + "TimerFired must NOT appear — the signal future must resolve before the 1h timer fires") + + // Workflow A closed via the signal branch. + s.True(hasEventType(history, enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_COMPLETED), + "workflow A must complete") + + // Wall elapsed must be well under the 1h timer — the workflow should + // finish as soon as the signal completes (sub-second on a healthy cluster). + s.Less(wallElapsed, 5*time.Minute, + "test wall elapsed = %v; the workflow should complete promptly after the signal succeeds, well before the 1h timer would fire", + wallElapsed) +} + // TestTimeSkipping_StartWithDelay_NoBound verifies that time-skipping with no // bound shifts a WorkflowStartDelay backoff into the near-now wall-clock window: // the first WT becomes available immediately instead of waiting wallStart + 1h. From d8536562da403e5a9f767a666c9209e4984e798b Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 15 May 2026 10:34:54 -0700 Subject: [PATCH 36/73] Add standalone Nexus operation namespace capability (#10206) ## What changed? Add standalone Nexus operation namespace capability --- go.mod | 2 +- go.sum | 4 ++-- service/frontend/namespace_handler.go | 1 + service/frontend/namespace_handler_test.go | 4 ++++ service/frontend/service.go | 2 ++ 5 files changed, 10 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index dbe232a6248..6612146055b 100644 --- a/go.mod +++ b/go.mod @@ -63,7 +63,7 @@ require ( go.opentelemetry.io/otel/sdk v1.43.0 go.opentelemetry.io/otel/sdk/metric v1.43.0 go.opentelemetry.io/otel/trace v1.43.0 - go.temporal.io/api v1.62.12-0.20260430203359-15c391664683 + go.temporal.io/api v1.62.12-0.20260511225354-0a978d4fd72c go.temporal.io/auto-scaled-workers v0.0.0-20260407181057-edd947d743d2 go.temporal.io/sdk v1.41.1 go.uber.org/fx v1.24.0 diff --git a/go.sum b/go.sum index 957bed1529f..6a6dba82142 100644 --- a/go.sum +++ b/go.sum @@ -469,8 +469,8 @@ go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.3.0 h1:R go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.3.0/go.mod h1:I89cynRj8y+383o7tEQVg2SVA6SRgDVIouWPUVXjx0U= go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.3.0 h1:CQvJSldHRUN6Z8jsUeYv8J0lXRvygALXIzsmAeCcZE0= go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.3.0/go.mod h1:xSQ+mEfJe/GjK1LXEyVOoSI1N9JV9ZI923X5kup43W4= -go.temporal.io/api v1.62.12-0.20260430203359-15c391664683 h1:GtwQjX9hN0pRjuneBpl/xvcu9Xl9llAt4GjKrlpP0sg= -go.temporal.io/api v1.62.12-0.20260430203359-15c391664683/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.62.12-0.20260511225354-0a978d4fd72c h1:ADDxNS26VTfDWmW55zYgAFkG6WEU83RHv0HwrarHXtk= +go.temporal.io/api v1.62.12-0.20260511225354-0a978d4fd72c/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= go.temporal.io/auto-scaled-workers v0.0.0-20260407181057-edd947d743d2 h1:1hKeH3GyR6YD6LKMHGCZ76t6h1Sgha0hXVQBxWi3dlQ= go.temporal.io/auto-scaled-workers v0.0.0-20260407181057-edd947d743d2/go.mod h1:T8dnzVPeO+gaUTj9eDgm/lT2lZH4+JXNvrGaQGyVi50= go.temporal.io/sdk v1.41.1 h1:yOpvsHyDD1lNuwlGBv/SUodCPhjv9nDeC9lLHW/fJUA= diff --git a/service/frontend/namespace_handler.go b/service/frontend/namespace_handler.go index 6ea50afa5b2..087bc5f7de9 100644 --- a/service/frontend/namespace_handler.go +++ b/service/frontend/namespace_handler.go @@ -903,6 +903,7 @@ func (d *namespaceHandler) createResponse( WorkerHeartbeats: d.config.WorkerHeartbeatsEnabled(info.Name), WorkflowPause: d.config.WorkflowPauseEnabled(info.Name), StandaloneActivities: d.config.Activity.Enabled(info.Name), + StandaloneNexusOperation: d.config.EnableChasm(info.Name) && d.config.StandaloneNexusOperationsEnabled(info.Name), WorkerPollCompleteOnShutdown: d.config.EnableCancelWorkerPollsOnShutdown(info.Name), WorkerCommands: d.config.WorkerCommandsEnabled(info.Name), PollerAutoscaling: true, diff --git a/service/frontend/namespace_handler_test.go b/service/frontend/namespace_handler_test.go index 8d504de04f8..c08e458f58b 100644 --- a/service/frontend/namespace_handler_test.go +++ b/service/frontend/namespace_handler_test.go @@ -388,6 +388,7 @@ func (s *namespaceHandlerCommonSuite) TestCapabilitiesAndLimits() { s.True(resp.NamespaceInfo.Capabilities.WorkerHeartbeats) s.False(resp.NamespaceInfo.Capabilities.WorkflowPause) s.False(resp.NamespaceInfo.Capabilities.StandaloneActivities) + s.False(resp.NamespaceInfo.Capabilities.StandaloneNexusOperation) s.False(resp.NamespaceInfo.Capabilities.WorkerPollCompleteOnShutdown) s.False(resp.NamespaceInfo.Capabilities.WorkerCommands) s.True(resp.NamespaceInfo.Capabilities.PollerAutoscaling) @@ -402,6 +403,8 @@ func (s *namespaceHandlerCommonSuite) TestCapabilitiesAndLimits() { s.config.WorkerHeartbeatsEnabled = dc.GetBoolPropertyFnFilteredByNamespace(false) s.config.WorkflowPauseEnabled = dc.GetBoolPropertyFnFilteredByNamespace(true) s.config.Activity.Enabled = dc.GetBoolPropertyFnFilteredByNamespace(true) + s.config.EnableChasm = dc.GetBoolPropertyFnFilteredByNamespace(true) + s.config.StandaloneNexusOperationsEnabled = dc.GetBoolPropertyFnFilteredByNamespace(true) s.config.BlobSizeLimitError = dc.GetIntPropertyFnFilteredByNamespace(1024) s.config.MemoSizeLimitError = dc.GetIntPropertyFnFilteredByNamespace(512) s.config.EnableCancelWorkerPollsOnShutdown = dc.GetBoolPropertyFnFilteredByNamespace(true) @@ -418,6 +421,7 @@ func (s *namespaceHandlerCommonSuite) TestCapabilitiesAndLimits() { s.False(resp.NamespaceInfo.Capabilities.WorkerHeartbeats) s.True(resp.NamespaceInfo.Capabilities.WorkflowPause) s.True(resp.NamespaceInfo.Capabilities.StandaloneActivities) + s.True(resp.NamespaceInfo.Capabilities.StandaloneNexusOperation) s.True(resp.NamespaceInfo.Capabilities.WorkerPollCompleteOnShutdown) s.True(resp.NamespaceInfo.Capabilities.WorkerCommands) s.Equal(int64(1024), resp.NamespaceInfo.Limits.BlobSizeLimitError) diff --git a/service/frontend/service.go b/service/frontend/service.go index ac561bec1a1..07635c7ce17 100644 --- a/service/frontend/service.go +++ b/service/frontend/service.go @@ -231,6 +231,7 @@ type Config struct { WorkerCommandsEnabled dynamicconfig.BoolPropertyFnWithNamespaceFilter WorkflowPauseEnabled dynamicconfig.BoolPropertyFnWithNamespaceFilter TimeSkippingEnabled dynamicconfig.BoolPropertyFnWithNamespaceFilter + StandaloneNexusOperationsEnabled dynamicconfig.BoolPropertyFnWithNamespaceFilter HTTPAllowedHosts dynamicconfig.TypedPropertyFn[*regexp.Regexp] AllowedExperiments dynamicconfig.TypedPropertyFnWithNamespaceFilter[[]string] @@ -400,6 +401,7 @@ func NewConfig( WorkerCommandsEnabled: dynamicconfig.WorkerCommandsEnabled.Get(dc), WorkflowPauseEnabled: dynamicconfig.WorkflowPauseEnabled.Get(dc), TimeSkippingEnabled: dynamicconfig.TimeSkippingEnabled.Get(dc), + StandaloneNexusOperationsEnabled: chasmnexus.Enabled.Get(dc), HTTPAllowedHosts: dynamicconfig.FrontendHTTPAllowedHosts.Get(dc), AllowedExperiments: dynamicconfig.FrontendAllowedExperiments.Get(dc), From c87a5dc11be139cf6779728dd32e2b3b2642cfb7 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Fri, 15 May 2026 12:25:50 -0600 Subject: [PATCH 37/73] tests: fix flakes in WorkflowUpdateSuite (#10277) ## What changed? Increasing the "short context" in workflow update tests where short contexts were consistently erroring out with `context deadline exceeded` in CI, causing flakes. For `TestSpeculativeWorkflowTask_QueryFailureClearsWFContext`: the two `QueryWorkflow` callers share a context that will cancel the shared context when one of the queries returns failed and clears the workflow context. The query buffer error will clear the workflow context and clear the update registry. Then poll the workflow again to validate the update is sent on a new speculative workflow task, this last portion is unchanged. ## Why? Reduce flakiness. ## How did you test it? - [ ] built - [ ] run locally and tested manually - [X] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) ## Potential risks NA, tests only. --- tests/update_workflow_test.go | 37 ++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/tests/update_workflow_test.go b/tests/update_workflow_test.go index e16367866b7..295fa408607 100644 --- a/tests/update_workflow_test.go +++ b/tests/update_workflow_test.go @@ -1286,9 +1286,9 @@ func (s *WorkflowUpdateSuite) TestValidateWorkerMessages() { T: s.T(), } - halfSecondTimeoutCtx, cancel := context.WithTimeout(env.Context(), 500*time.Millisecond) + fiveSecondTimeoutCtx, cancel := context.WithTimeout(env.Context(), 5*time.Second) defer cancel() - updateResultCh := sendUpdate(halfSecondTimeoutCtx, env, env.Tv()) + updateResultCh := sendUpdate(fiveSecondTimeoutCtx, env, env.Tv()) // Process update in workflow. _, err := poller.PollAndProcessWorkflowTask() @@ -4575,7 +4575,7 @@ func (s *WorkflowUpdateSuite) TestLastWorkflowTask_HasUpdateMessage() { `, env.GetHistory(env.Namespace().String(), env.Tv().WorkflowExecution())) } -func (s *WorkflowUpdateSuite) TestSpeculativeWorkflowTask_QueryFailureClearsWFContext() { +func (s *WorkflowUpdateSuite) TestSpeculativeWorkflowTask_QueryBufferFullDoesNotBreakPendingUpdate() { env := testcore.NewEnv(s.T()) mustStartWorkflow(env, env.Tv()) @@ -4644,12 +4644,16 @@ func (s *WorkflowUpdateSuite) TestSpeculativeWorkflowTask_QueryFailureClearsWFCo Resp *workflowservice.QueryWorkflowResponse Err error } + + queryCtx, cancelQueries := context.WithCancel(env.Context()) + defer cancelQueries() + queryFn := func(resCh chan<- QueryResult) { // There is no query handler, and query timeout is ok for this test. // But first query must not time out before 2nd query reached server, // because 2 queries overflow the query buffer (default size 1), // which leads to clearing of WF context. - shortCtx, cancel := context.WithTimeout(env.Context(), 100*time.Millisecond) + shortCtx, cancel := context.WithTimeout(queryCtx, 5*time.Second) defer cancel() queryResp, err := env.FrontendClient().QueryWorkflow(shortCtx, &workflowservice.QueryWorkflowRequest{ Namespace: env.Namespace().String(), @@ -4661,26 +4665,37 @@ func (s *WorkflowUpdateSuite) TestSpeculativeWorkflowTask_QueryFailureClearsWFCo resCh <- QueryResult{Resp: queryResp, Err: err} } - query1ResultCh := make(chan QueryResult) - query2ResultCh := make(chan QueryResult) + query1ResultCh := make(chan QueryResult, 1) + query2ResultCh := make(chan QueryResult, 1) go queryFn(query1ResultCh) go queryFn(query2ResultCh) - query1Res := <-query1ResultCh - query2Res := <-query2ResultCh + + var query1Res, query2Res QueryResult + select { + case query1Res = <-query1ResultCh: + cancelQueries() // Cancel 2nd query to avoid waiting for it after 1st query already failed and cleared WF context. + query2Res = <-query2ResultCh + case query2Res = <-query2ResultCh: + cancelQueries() // Cancel 1st query to avoid waiting for it after 2nd query already failed and cleared WF context. + query1Res = <-query1ResultCh + } + s.Error(query1Res.Err) s.Error(query2Res.Err) s.Nil(query1Res.Resp) s.Nil(query2Res.Resp) + isBufferedErr := func(err error) bool { + return common.IsContextCanceledErr(err) || common.IsContextDeadlineExceededErr(err) + } + var queryBufferFullErr *serviceerror.ResourceExhausted - if common.IsContextDeadlineExceededErr(query1Res.Err) { - s.True(common.IsContextDeadlineExceededErr(query1Res.Err), "one of query errors must be CDE") + if isBufferedErr(query1Res.Err) { s.ErrorAs(query2Res.Err, &queryBufferFullErr, "one of query errors must `query buffer is full`") s.Contains(query2Res.Err.Error(), "query buffer is full", "one of query errors must `query buffer is full`") } else { s.ErrorAs(query1Res.Err, &queryBufferFullErr, "one of query errors must `query buffer is full`") s.Contains(query1Res.Err.Error(), "query buffer is full", "one of query errors must `query buffer is full`") - s.True(common.IsContextDeadlineExceededErr(query2Res.Err), "one of query errors must be CDE") } // "query buffer is full" error clears WF context. If update registry is not cleared together with context (old behaviour), From 85e8db7268708f8f3dcd9e95b5ab08b50c1a28c5 Mon Sep 17 00:00:00 2001 From: Lina Jodoin Date: Fri, 15 May 2026 11:29:48 -0700 Subject: [PATCH 38/73] [Scheduler] Rework DeleteSchedule dual-mode delete to not share context metadata (#10278) - **Isolate V1 cleanup context in DeleteSchedule** - **refactor: DeleteSchedule uses CHASM-first routing with V1 fallback** - **fix: isolate context metadata in dual-stack DeleteSchedule** - **Bit of test cleanup** ## What changed? This is the cleanup of https://github.com/chaptersix/temporal/pull/23 . ## Why? - Fixes a bug in the metadata trailer. --------- Co-authored-by: Fred Tzeng Co-authored-by: alex.stanfield <13949480+chaptersix@users.noreply.github.com> --- service/frontend/workflow_handler.go | 23 +- tests/schedule_migration_test.go | 465 ++++++++++++++++++++------- tests/testcore/onebox.go | 2 +- tests/testcore/test_cluster_pool.go | 23 +- tests/testcore/test_env.go | 20 +- 5 files changed, 394 insertions(+), 139 deletions(-) diff --git a/service/frontend/workflow_handler.go b/service/frontend/workflow_handler.go index d6a3d5f7f38..b888eaae80e 100644 --- a/service/frontend/workflow_handler.go +++ b/service/frontend/workflow_handler.go @@ -49,6 +49,7 @@ import ( "go.temporal.io/server/common/clock" "go.temporal.io/server/common/cluster" "go.temporal.io/server/common/collection" + "go.temporal.io/server/common/contextutil" "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/enums" "go.temporal.io/server/common/failure" @@ -4985,16 +4986,32 @@ func (wh *WorkflowHandler) DeleteSchedule(ctx context.Context, request *workflow // Always attempt deletion in both stacks. A schedule may exist in either or // both during dual-stack migration (and a V1 sentinel may linger after a // CHASM-only create). Surface an error only when neither stack succeeded. + // + // Each path gets its own metadata context so that downstream gRPC trailers + // (propagated by TrailerToContextMetadataInterceptor) don't clobber each + // other. After both paths complete, the winning side's metadata is copied + // into the original context for upstream consumers (e.g. metering). chasmEnabled := wh.chasmSchedulerEnabled(ctx, request.Namespace) + chasmCtx := contextutil.WithMetadataContext(ctx) var chasmErr error if chasmEnabled { - _, chasmErr = wh.deleteScheduleCHASM(ctx, request) + _, chasmErr = wh.deleteScheduleCHASM(chasmCtx, request) } - _, v1Err := wh.deleteScheduleWorkflow(ctx, request) + v1Ctx := contextutil.WithMetadataContext(ctx) + _, v1Err := wh.deleteScheduleWorkflow(v1Ctx, request) - // At least one side actually deleted → success. + // At least one side actually deleted -> success. if (chasmEnabled && chasmErr == nil) || v1Err == nil { + // CHASM owns the schedule unless it returned a routable error + // (NotFound/sentinel/closed), in which case V1 is the owner. + winnerCtx := v1Ctx + if chasmEnabled && (chasmErr == nil || !isSchedulerErrorLegacyRoutable(chasmErr)) { + winnerCtx = chasmCtx + } + for k, v := range contextutil.ContextMetadataGetAll(winnerCtx) { + contextutil.ContextMetadataSet(ctx, k, v) + } return &workflowservice.DeleteScheduleResponse{}, nil } diff --git a/tests/schedule_migration_test.go b/tests/schedule_migration_test.go index fd4e9de4550..966d00b937c 100644 --- a/tests/schedule_migration_test.go +++ b/tests/schedule_migration_test.go @@ -23,11 +23,17 @@ import ( schedulerpb "go.temporal.io/server/chasm/lib/scheduler/gen/schedulerpb/v1" "go.temporal.io/server/common" "go.temporal.io/server/common/dynamicconfig" + "go.temporal.io/server/common/log" "go.temporal.io/server/common/primitives" + "go.temporal.io/server/common/rpc/interceptor" "go.temporal.io/server/common/sdk" "go.temporal.io/server/common/testing/parallelsuite" + "go.temporal.io/server/service/worker/dummy" "go.temporal.io/server/service/worker/scheduler" "go.temporal.io/server/tests/testcore" + "go.uber.org/fx" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -1307,157 +1313,370 @@ func TestScheduleMigrationV1ToV2NoDuplicateRecentActions(t *testing.T) { require.NoError(t, err) } -// TestDeleteScheduleClearsBothStacks verifies that when a schedule exists in -// both the CHASM (V2) and workflow-backed (V1) stacks for the same scheduleId -// — as can happen during dual-stack migration — a single frontend -// DeleteSchedule call removes it from both stacks. -func (s *ScheduleMigrationTestSuite) TestDeleteScheduleClearsBothStacks() { +// TestDeleteScheduleContextMetadata verifies that DeleteSchedule propagates the +// correct context metadata (workflow-type, workflow-task-queue) for every +// combination of CHASM and V1 state. This metadata is read by saas-temporal's +// metering interceptor for action attribution. +// +// We assert by reading gRPC response trailers: the frontend's +// ContextMetadataInterceptor is decorated to setTrailer=true for this test, +// so any context metadata set during the handler is emitted as trailers that +// the client can read directly. +func (s *ScheduleMigrationTestSuite) TestDeleteScheduleContextMetadata() { env := testcore.NewEnv( s.T(), testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerRouting, true), + testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerSentinels, true), + testcore.WithFxOptions(primitives.FrontendService, + fx.Decorate(func(logger log.Logger) *interceptor.ContextMetadataInterceptor { + return interceptor.NewContextMetadataInterceptor(true, logger) + }), + ), ) - ctx := testcore.NewContext() - sid := testcore.RandomizeStr("sched-delete-both-stacks") - wid := testcore.RandomizeStr("sched-delete-both-stacks-wf") - wt := testcore.RandomizeStr("sched-delete-both-stacks-wt") - tq := testcore.RandomizeStr("tq") + newSched := func() (sid, wt, tq string, sched *schedulepb.Schedule) { + sid = testcore.RandomizeStr("sid") + wt = testcore.RandomizeStr("wt") + tq = testcore.RandomizeStr("tq") + sched = &schedulepb.Schedule{ + Spec: &schedulepb.ScheduleSpec{ + Interval: []*schedulepb.IntervalSpec{ + {Interval: durationpb.New(1 * time.Hour)}, + }, + }, + Action: &schedulepb.ScheduleAction{ + Action: &schedulepb.ScheduleAction_StartWorkflow{ + StartWorkflow: &workflowpb.NewWorkflowExecutionInfo{ + WorkflowId: testcore.RandomizeStr("wid"), + WorkflowType: &commonpb.WorkflowType{Name: wt}, + TaskQueue: &taskqueuepb.TaskQueue{Name: tq, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }, + }, + }, + } + return + } - nsName := env.Namespace().String() - nsID := env.NamespaceID().String() - sched := &schedulepb.Schedule{ - Spec: &schedulepb.ScheduleSpec{ - Interval: []*schedulepb.IntervalSpec{ - {Interval: durationpb.New(1 * time.Hour)}, + createCHASMSchedule := func(t *testing.T, sid string, sched *schedulepb.Schedule) { + _, err := env.GetTestCluster().SchedulerClient().CreateSchedule( + testcore.NewContext(), + &schedulerpb.CreateScheduleRequest{ + NamespaceId: env.NamespaceID().String(), + FrontendRequest: &workflowservice.CreateScheduleRequest{ + Namespace: env.Namespace().String(), + ScheduleId: sid, + Schedule: sched, + Identity: "test", + RequestId: testcore.RandomizeStr("req"), + }, }, - }, - Action: &schedulepb.ScheduleAction{ - Action: &schedulepb.ScheduleAction_StartWorkflow{ - StartWorkflow: &workflowpb.NewWorkflowExecutionInfo{ - WorkflowId: wid, - WorkflowType: &commonpb.WorkflowType{Name: wt}, - TaskQueue: &taskqueuepb.TaskQueue{Name: tq, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + ) + require.NoError(t, err) + } + + createCHASMSentinel := func(t *testing.T, sid string) { + _, err := env.GetTestCluster().SchedulerClient().CreateSentinel( + testcore.NewContext(), + &schedulerpb.CreateSentinelRequest{ + NamespaceId: env.NamespaceID().String(), + Namespace: env.Namespace().String(), + ScheduleId: sid, + }, + ) + require.NoError(t, err) + } + + createV1Scheduler := func(t *testing.T, sid string, sched *schedulepb.Schedule) { + startArgs := &schedulespb.StartScheduleArgs{ + Schedule: sched, + State: &schedulespb.InternalState{ + Namespace: env.Namespace().String(), + NamespaceId: env.NamespaceID().String(), + ScheduleId: sid, + ConflictToken: scheduler.InitialConflictToken, + }, + } + inputPayloads, err := sdk.PreferProtoDataConverter.ToPayloads(startArgs) + require.NoError(t, err) + _, err = env.GetTestCluster().HistoryClient().StartWorkflowExecution( + testcore.NewContext(), + common.CreateHistoryStartWorkflowRequest( + env.NamespaceID().String(), + &workflowservice.StartWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowId: scheduler.WorkflowIDPrefix + sid, + WorkflowType: &commonpb.WorkflowType{Name: scheduler.WorkflowType}, + TaskQueue: &taskqueuepb.TaskQueue{Name: primitives.PerNSWorkerTaskQueue}, + Input: inputPayloads, + Identity: "test", + RequestId: testcore.RandomizeStr("req"), + WorkflowIdReusePolicy: enumspb.WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE, + WorkflowIdConflictPolicy: enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL, }, + nil, nil, time.Now().UTC(), + ), + ) + require.NoError(t, err) + } + + createV1DummySentinel := func(t *testing.T, sid string) { + _, err := env.GetTestCluster().HistoryClient().StartWorkflowExecution( + testcore.NewContext(), + common.CreateHistoryStartWorkflowRequest( + env.NamespaceID().String(), + &workflowservice.StartWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowId: scheduler.WorkflowIDPrefix + sid, + WorkflowType: &commonpb.WorkflowType{Name: dummy.DummyWFTypeName}, + TaskQueue: &taskqueuepb.TaskQueue{Name: primitives.PerNSWorkerTaskQueue}, + Identity: "test", + RequestId: testcore.RandomizeStr("req"), + WorkflowIdReusePolicy: enumspb.WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE, + WorkflowIdConflictPolicy: enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL, + }, + nil, nil, time.Now().UTC(), + ), + ) + require.NoError(t, err) + } + + deleteAndAssertMetadata := func(t *testing.T, sid, expectedWfType, expectedTQ string) { + var trailer metadata.MD + _, err := env.FrontendClient().DeleteSchedule( + testcore.NewContext(), + &workflowservice.DeleteScheduleRequest{ + Namespace: env.Namespace().String(), + ScheduleId: sid, + Identity: "test", }, - }, + grpc.Trailer(&trailer), + ) + require.NoError(t, err) + require.Equal(t, []string{expectedWfType}, trailer.Get("workflow-type"), + "workflow-type should match the owning stack's metadata") + require.Equal(t, []string{expectedTQ}, trailer.Get("workflow-task-queue"), + "workflow-task-queue should match the owning stack's metadata") } - // Create the CHASM schedule directly. - _, err := env.GetTestCluster().SchedulerClient().CreateSchedule( - ctx, - &schedulerpb.CreateScheduleRequest{ - NamespaceId: nsID, - FrontendRequest: &workflowservice.CreateScheduleRequest{ - Namespace: nsName, + // Subtest: Both stacks have real entries. CHASM metadata wins. + s.Run("BothStacks", func(s *ScheduleMigrationTestSuite) { + sid, wt, tq, sched := newSched() + createCHASMSchedule(s.T(), sid, sched) + createV1Scheduler(s.T(), sid, sched) + deleteAndAssertMetadata(s.T(), sid, wt, tq) + }) + + // Subtest: CHASM has real schedule, V1 has dummy sentinel. CHASM metadata wins. + s.Run("CHASMOnly_V1Sentinel", func(s *ScheduleMigrationTestSuite) { + sid, wt, tq, sched := newSched() + createCHASMSchedule(s.T(), sid, sched) + createV1DummySentinel(s.T(), sid) + deleteAndAssertMetadata(s.T(), sid, wt, tq) + }) + + // Subtest: CHASM has sentinel, V1 has real scheduler. V1 metadata wins. + s.Run("CHASMSentinel_V1Real", func(s *ScheduleMigrationTestSuite) { + sid, _, _, sched := newSched() + createCHASMSentinel(s.T(), sid) + createV1Scheduler(s.T(), sid, sched) + deleteAndAssertMetadata(s.T(), sid, scheduler.WorkflowType, primitives.PerNSWorkerTaskQueue) + }) + + // Subtest: No CHASM entry, V1 has real scheduler. V1 metadata wins. + s.Run("V1Only_NoCHASM", func(s *ScheduleMigrationTestSuite) { + sid, _, _, sched := newSched() + createV1Scheduler(s.T(), sid, sched) + deleteAndAssertMetadata(s.T(), sid, scheduler.WorkflowType, primitives.PerNSWorkerTaskQueue) + }) + + // Subtest: CHASM has sentinel, V1 has nothing. Delete returns error. + // Metering skips error responses so metadata content is irrelevant. + s.Run("CHASMSentinel_V1Gone", func(s *ScheduleMigrationTestSuite) { + sid := testcore.RandomizeStr("sid") + createCHASMSentinel(s.T(), sid) + _, err := env.FrontendClient().DeleteSchedule( + testcore.NewContext(), + &workflowservice.DeleteScheduleRequest{ + Namespace: env.Namespace().String(), ScheduleId: sid, - Schedule: sched, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), }, - }, + ) + var notFoundErr *serviceerror.NotFound + s.ErrorAs(err, ¬FoundErr) + s.NotContains(notFoundErr.Message, "sentinel", + "sentinel error should not leak to the client") + }) + + // Subtest: Neither stack has the schedule. Delete returns error. + s.Run("NeitherStack", func(s *ScheduleMigrationTestSuite) { + sid := testcore.RandomizeStr("nonexistent") + _, err := env.FrontendClient().DeleteSchedule( + testcore.NewContext(), + &workflowservice.DeleteScheduleRequest{ + Namespace: env.Namespace().String(), + ScheduleId: sid, + Identity: "test", + }, + ) + var notFoundErr *serviceerror.NotFound + s.ErrorAs(err, ¬FoundErr) + s.NotContains(notFoundErr.Message, "sentinel", + "sentinel error should not leak to the client") + }) +} + +// TestPatchScheduleContextMetadata verifies that PatchSchedule propagates the +// correct context metadata for CHASM and V1 schedules. +func (s *ScheduleMigrationTestSuite) TestPatchScheduleContextMetadata() { + env := testcore.NewEnv( + s.T(), + testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), + testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerRouting, true), + testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerSentinels, true), + testcore.WithFxOptions(primitives.FrontendService, + fx.Decorate(func(logger log.Logger) *interceptor.ContextMetadataInterceptor { + return interceptor.NewContextMetadataInterceptor(true, logger) + }), + ), ) - s.NoError(err) - // Create the V1 (workflow-backed) scheduler directly with the same ID. - startArgs := &schedulespb.StartScheduleArgs{ - Schedule: sched, - State: &schedulespb.InternalState{ - Namespace: nsName, - NamespaceId: nsID, - ScheduleId: sid, - ConflictToken: scheduler.InitialConflictToken, - }, + newSched := func() (sid, wt, tq string, sched *schedulepb.Schedule) { + sid = testcore.RandomizeStr("sid") + wt = testcore.RandomizeStr("wt") + tq = testcore.RandomizeStr("tq") + sched = &schedulepb.Schedule{ + Spec: &schedulepb.ScheduleSpec{ + Interval: []*schedulepb.IntervalSpec{ + {Interval: durationpb.New(1 * time.Hour)}, + }, + }, + Action: &schedulepb.ScheduleAction{ + Action: &schedulepb.ScheduleAction_StartWorkflow{ + StartWorkflow: &workflowpb.NewWorkflowExecutionInfo{ + WorkflowId: testcore.RandomizeStr("wid"), + WorkflowType: &commonpb.WorkflowType{Name: wt}, + TaskQueue: &taskqueuepb.TaskQueue{Name: tq, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }, + }, + }, + } + return } - inputPayloads, err := sdk.PreferProtoDataConverter.ToPayloads(startArgs) - s.NoError(err) - v1WorkflowID := scheduler.WorkflowIDPrefix + sid - startReq := &workflowservice.StartWorkflowExecutionRequest{ - Namespace: nsName, - WorkflowId: v1WorkflowID, - WorkflowType: &commonpb.WorkflowType{Name: scheduler.WorkflowType}, - TaskQueue: &taskqueuepb.TaskQueue{Name: primitives.PerNSWorkerTaskQueue}, - Input: inputPayloads, - Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), - WorkflowIdReusePolicy: enumspb.WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE, - WorkflowIdConflictPolicy: enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL, + + createCHASMSchedule := func(t *testing.T, sid string, sched *schedulepb.Schedule) { + _, err := env.GetTestCluster().SchedulerClient().CreateSchedule( + testcore.NewContext(), + &schedulerpb.CreateScheduleRequest{ + NamespaceId: env.NamespaceID().String(), + FrontendRequest: &workflowservice.CreateScheduleRequest{ + Namespace: env.Namespace().String(), + ScheduleId: sid, + Schedule: sched, + Identity: "test", + RequestId: testcore.RandomizeStr("req"), + }, + }, + ) + require.NoError(t, err) } - _, err = env.GetTestCluster().HistoryClient().StartWorkflowExecution( - ctx, - common.CreateHistoryStartWorkflowRequest(nsID, startReq, nil, nil, time.Now().UTC()), - ) - s.NoError(err) - // Sanity-check: both stacks have an entry for this scheduleId. - _, err = env.GetTestCluster().SchedulerClient().DescribeSchedule( - ctx, - &schedulerpb.DescribeScheduleRequest{ - NamespaceId: nsID, - FrontendRequest: &workflowservice.DescribeScheduleRequest{Namespace: nsName, ScheduleId: sid}, - }, - ) - s.NoError(err) - v1Desc, err := env.GetTestCluster().HistoryClient().DescribeWorkflowExecution( - ctx, - &historyservice.DescribeWorkflowExecutionRequest{ - NamespaceId: nsID, - Request: &workflowservice.DescribeWorkflowExecutionRequest{ - Namespace: nsName, - Execution: &commonpb.WorkflowExecution{WorkflowId: v1WorkflowID}, + createV1Scheduler := func(t *testing.T, sid string, sched *schedulepb.Schedule) { + startArgs := &schedulespb.StartScheduleArgs{ + Schedule: sched, + State: &schedulespb.InternalState{ + Namespace: env.Namespace().String(), + NamespaceId: env.NamespaceID().String(), + ScheduleId: sid, + ConflictToken: scheduler.InitialConflictToken, }, - }, - ) - s.NoError(err) - s.Equal(enumspb.WORKFLOW_EXECUTION_STATUS_RUNNING, v1Desc.GetWorkflowExecutionInfo().GetStatus()) + } + inputPayloads, err := sdk.PreferProtoDataConverter.ToPayloads(startArgs) + require.NoError(t, err) + _, err = env.GetTestCluster().HistoryClient().StartWorkflowExecution( + testcore.NewContext(), + common.CreateHistoryStartWorkflowRequest( + env.NamespaceID().String(), + &workflowservice.StartWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowId: scheduler.WorkflowIDPrefix + sid, + WorkflowType: &commonpb.WorkflowType{Name: scheduler.WorkflowType}, + TaskQueue: &taskqueuepb.TaskQueue{Name: primitives.PerNSWorkerTaskQueue}, + Input: inputPayloads, + Identity: "test", + RequestId: testcore.RandomizeStr("req"), + WorkflowIdReusePolicy: enumspb.WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE, + WorkflowIdConflictPolicy: enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL, + }, + nil, nil, time.Now().UTC(), + ), + ) + require.NoError(t, err) + } - // Single frontend DeleteSchedule call should clear both stacks. - _, err = env.FrontendClient().DeleteSchedule(ctx, &workflowservice.DeleteScheduleRequest{ - Namespace: nsName, - ScheduleId: sid, - Identity: "test", + patchAndAssertMetadata := func(t *testing.T, sid, expectedWfType, expectedTQ string) { + var trailer metadata.MD + _, err := env.FrontendClient().PatchSchedule( + testcore.NewContext(), + &workflowservice.PatchScheduleRequest{ + Namespace: env.Namespace().String(), + ScheduleId: sid, + Patch: &schedulepb.SchedulePatch{Pause: "test pause"}, + Identity: "test", + RequestId: uuid.NewString(), + }, + grpc.Trailer(&trailer), + ) + require.NoError(t, err) + require.Equal(t, []string{expectedWfType}, trailer.Get("workflow-type"), + "workflow-type should match the owning stack's metadata") + require.Equal(t, []string{expectedTQ}, trailer.Get("workflow-task-queue"), + "workflow-task-queue should match the owning stack's metadata") + } + + // CHASM schedule: metadata should reflect the schedule's action target. + s.Run("CHASMSchedule", func(s *ScheduleMigrationTestSuite) { + sid, wt, tq, sched := newSched() + createCHASMSchedule(s.T(), sid, sched) + patchAndAssertMetadata(s.T(), sid, wt, tq) }) - s.NoError(err) - // CHASM side: the scheduler is marked closed; direct describe rejects with - // FailedPrecondition (ErrClosed). - _, err = env.GetTestCluster().SchedulerClient().DescribeSchedule( - ctx, - &schedulerpb.DescribeScheduleRequest{ - NamespaceId: nsID, - FrontendRequest: &workflowservice.DescribeScheduleRequest{Namespace: nsName, ScheduleId: sid}, - }, - ) - var failedPreconditionErr *serviceerror.FailedPrecondition - s.ErrorAs(err, &failedPreconditionErr) + // V1 schedule: metadata should reflect the V1 scheduler workflow. + s.Run("V1Schedule", func(s *ScheduleMigrationTestSuite) { + sid, _, _, sched := newSched() + createV1Scheduler(s.T(), sid, sched) + patchAndAssertMetadata(s.T(), sid, scheduler.WorkflowType, primitives.PerNSWorkerTaskQueue) + }) - // V1 side: the workflow is terminated. - s.Eventually(func() bool { - desc, descErr := env.GetTestCluster().HistoryClient().DescribeWorkflowExecution( - ctx, - &historyservice.DescribeWorkflowExecutionRequest{ - NamespaceId: nsID, - Request: &workflowservice.DescribeWorkflowExecutionRequest{ - Namespace: nsName, - Execution: &commonpb.WorkflowExecution{WorkflowId: v1WorkflowID}, - }, + // CHASM sentinel with no V1 workflow: patch should fail. + s.Run("CHASMSentinel_V1Gone", func(s *ScheduleMigrationTestSuite) { + sid := testcore.RandomizeStr("sid") + _, err := env.GetTestCluster().SchedulerClient().CreateSentinel( + testcore.NewContext(), + &schedulerpb.CreateSentinelRequest{ + NamespaceId: env.NamespaceID().String(), + Namespace: env.Namespace().String(), + ScheduleId: sid, }, ) - if descErr != nil { - return false - } - return desc.GetWorkflowExecutionInfo().GetStatus() == enumspb.WORKFLOW_EXECUTION_STATUS_TERMINATED - }, 10*time.Second, 200*time.Millisecond, "V1 schedule workflow should be terminated") + s.NoError(err) - // Frontend describe should also report the schedule as gone. - var notFoundErr *serviceerror.NotFound - s.Eventually(func() bool { - _, descErr := env.FrontendClient().DescribeSchedule(ctx, &workflowservice.DescribeScheduleRequest{ - Namespace: nsName, - ScheduleId: sid, - }) - return errors.As(descErr, ¬FoundErr) - }, 10*time.Second, 200*time.Millisecond, "frontend DescribeSchedule should return NotFound") + _, err = env.FrontendClient().PatchSchedule( + testcore.NewContext(), + &workflowservice.PatchScheduleRequest{ + Namespace: env.Namespace().String(), + ScheduleId: sid, + Patch: &schedulepb.SchedulePatch{Pause: "test"}, + Identity: "test", + RequestId: uuid.NewString(), + }, + ) + var notFoundErr *serviceerror.NotFound + s.ErrorAs(err, ¬FoundErr) + s.NotContains(notFoundErr.Message, "sentinel", + "sentinel error should not leak to the client") + }) } // TestScheduleMigration_StaleRunningDoesNotSkipPending guards the race fix in diff --git a/tests/testcore/onebox.go b/tests/testcore/onebox.go index 7118dd2aef6..68f23f03eec 100644 --- a/tests/testcore/onebox.go +++ b/tests/testcore/onebox.go @@ -829,7 +829,7 @@ func (c *TemporalImpl) newRPCFactory( int(httpPort), frontendTLSConfig, options, - map[primitives.ServiceName][]grpc.DialOption{}, + resource.PerServiceDialOptionsProvider(logger), monitor, ), nil } diff --git a/tests/testcore/test_cluster_pool.go b/tests/testcore/test_cluster_pool.go index 28fa0dd8460..89dc6ae0f37 100644 --- a/tests/testcore/test_cluster_pool.go +++ b/tests/testcore/test_cluster_pool.go @@ -136,24 +136,24 @@ type clusterPool struct { dedicated *pool } -func (p *clusterPool) get(t *testing.T, dedicated bool, dynamicConfig map[dynamicconfig.Key]any) *FunctionalTestBase { - if dedicated || len(dynamicConfig) > 0 { - return p.getDedicated(t, dynamicConfig) +func (p *clusterPool) get(t *testing.T, dedicated bool, dynamicConfig map[dynamicconfig.Key]any, clusterOpts []TestClusterOption) *FunctionalTestBase { + if dedicated || len(dynamicConfig) > 0 || len(clusterOpts) > 0 { + return p.getDedicated(t, dynamicConfig, clusterOpts) } return p.getShared(t) } func (p *clusterPool) getShared(t *testing.T) *FunctionalTestBase { return p.shared.get(t, func() *FunctionalTestBase { - return p.createCluster(t, nil, true) + return p.createCluster(t, nil, true, nil) }) } -func (p *clusterPool) getDedicated(t *testing.T, dynamicConfig map[dynamicconfig.Key]any) *FunctionalTestBase { - if len(dynamicConfig) > 0 { - // Custom dynamic config requires a fresh cluster (can't reuse). +func (p *clusterPool) getDedicated(t *testing.T, dynamicConfig map[dynamicconfig.Key]any, clusterOpts []TestClusterOption) *FunctionalTestBase { + if len(dynamicConfig) > 0 || len(clusterOpts) > 0 { + // Custom config or fx options require a fresh cluster (can't reuse). p.dedicated.acquireSlot(t) - cluster := p.createCluster(t, dynamicConfig, false) + cluster := p.createCluster(t, dynamicConfig, false, clusterOpts) // Register cleanup to tear down the cluster when the test completes. t.Cleanup(func() { @@ -165,13 +165,13 @@ func (p *clusterPool) getDedicated(t *testing.T, dynamicConfig map[dynamicconfig return cluster } - // If no custom dynamic config is provided, reuse an existing cluster. + // If no custom config is provided, reuse an existing cluster. return p.dedicated.get(t, func() *FunctionalTestBase { - return p.createCluster(t, nil, false) + return p.createCluster(t, nil, false, nil) }) } -func (p *clusterPool) createCluster(t *testing.T, dynamicConfig map[dynamicconfig.Key]any, shared bool) *FunctionalTestBase { +func (p *clusterPool) createCluster(t *testing.T, dynamicConfig map[dynamicconfig.Key]any, shared bool, clusterOpts []TestClusterOption) *FunctionalTestBase { tbase := &FunctionalTestBase{} tbase.SetT(t) @@ -182,6 +182,7 @@ func (p *clusterPool) createCluster(t *testing.T, dynamicConfig map[dynamicconfi if len(dynamicConfig) > 0 { opts = append(opts, WithDynamicConfigOverrides(dynamicConfig)) } + opts = append(opts, clusterOpts...) tbase.setupCluster(opts...) diff --git a/tests/testcore/test_env.go b/tests/testcore/test_env.go index 405624993a6..90104fe0b5a 100644 --- a/tests/testcore/test_env.go +++ b/tests/testcore/test_env.go @@ -25,9 +25,11 @@ import ( "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/log" "go.temporal.io/server/common/namespace" + "go.temporal.io/server/common/primitives" "go.temporal.io/server/common/testing/taskpoller" "go.temporal.io/server/common/testing/testhooks" "go.temporal.io/server/common/testing/testvars" + "go.uber.org/fx" "google.golang.org/grpc" ) @@ -87,7 +89,9 @@ type TestOption func(*testOptions) type testOptions struct { dedicatedCluster bool + dedicatedReason string dynamicConfigSettings []dynamicConfigOverride + clusterOptions []TestClusterOption } type dynamicConfigOverride struct { @@ -109,6 +113,17 @@ func WithSdkWorker() TestOption { } } +// WithFxOptions appends fx options to a specific service's fx graph. This +// implies a dedicated cluster because custom fx options cannot be shared +// across tests. +func WithFxOptions(serviceName primitives.ServiceName, opts ...fx.Option) TestOption { + return func(o *testOptions) { + o.dedicatedCluster = true + o.clusterOptions = append(o.clusterOptions, WithFxOptionsForService(serviceName, opts...)) + o.dedicatedReason = "custom fx options used" + } +} + // WithDynamicConfig overrides a dynamic config setting for the test. // For settings that can be namespace-scoped, a namespace constraint is applied. // For all others that require a dedicated cluster, this implies `WithDedicatedCluster`. @@ -136,6 +151,9 @@ func NewEnv(t *testing.T, opts ...TestOption) *TestEnv { opt(&options) } dedicatedGuard := newDedicatedClusterGuard(options.dedicatedCluster) + if options.dedicatedReason != "" { + dedicatedGuard.record(options.dedicatedReason) + } // For dedicated clusters, pass all dynamic config settings at cluster creation. var startupConfig map[dynamicconfig.Key]any @@ -150,7 +168,7 @@ func NewEnv(t *testing.T, opts ...TestOption) *TestEnv { } // Obtain the test cluster from the pool. - base := testClusterPool.get(t, options.dedicatedCluster, startupConfig) + base := testClusterPool.get(t, options.dedicatedCluster, startupConfig, options.clusterOptions) cluster := base.GetTestCluster() // Create a dedicated namespace for the test to help with test isolation. From 44014249779fb7b0a14256498aae032239e51789 Mon Sep 17 00:00:00 2001 From: Shivam <57200924+Shivs11@users.noreply.github.com> Date: Fri, 15 May 2026 14:56:45 -0400 Subject: [PATCH 39/73] Worker-Versioning: Bug fix (and flaky test fix) for fetching the right current/ramping version (#10252) ## What changed? - This is a continuation from one of my earlier efforts: https://github.com/temporalio/temporal/pull/8643 - Solves a real legitimate bug in the code. - Also added a regression styled unit test which iterates the map 100 times to spot the bug. The total test time of this new test, in the worst case, is 3 seconds so not a lot of overhead imo. ## Why? - because i recently learned that go uses non-deterministic routing when traversing maps and we can't rely on timestamps for the right order of iteration - the property of versioning we preserve with this change: "if you place a task queue in a new worker deployment which does not have a current version, and the task queue was present in a different worker deployment with a current version, the current version of the task queue is the previously set current version" ## **Why this isn't a hotfix candidate** The bug only surfaces under a specific combination of conditions: 1. A task queue is migrated from one worker deployment to another, **and** 2. No `setXXX` operation is called on the new deployment Looking at current usage patterns, nearly all users in this flow are doing one of the following instead: - Bumping to a new **version** (not a raw deployment swap) - Pairing the deployment move with a `setXXX` call - Explicitly unsetting the current version on the new deployment before making a switch Because none of those paths hit the problematic code, the blast radius is small enough that this doesn't warrant an expedited release imo ## How did you test it? - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) ## Potential risks - I would love a thorough review on this one, since it does tackle the core of routing for worker-versioning --- > [!NOTE] > **Medium Risk** > Touches task-queue routing selection logic for worker versioning; incorrect ordering or nil handling could misroute traffic, though the change is localized and covered by a regression test. > > **Overview** > Fixes a bug in `CalculateTaskQueueVersioningInfo` where iterating `DeploymentsData` could non-deterministically pick an *unversioned-but-newer* `RoutingConfig` over a *versioned-and-member* one due to Go map iteration order. > > The selection now tracks **versioned-member** and **unversioned** candidates separately for both *current* and *ramping*, and only falls back to unversioned when no valid versioned candidate exists. Adds `TestCalculateTaskQueueVersioningInfo_MapIterationOrderRegression` to repeatedly exercise the prior probabilistic failure and prevent regressions. > > Reviewed by [Cursor Bugbot](https://cursor.com/bugbot) for commit 20fea44c430c028fc1592aeb85d8d53438607408. Bugbot is set up for automated code reviews on this repo. Configure [here](https://www.cursor.com/dashboard/bugbot). --- common/worker_versioning/worker_versioning.go | 75 +++++++++++-------- .../worker_versioning_test.go | 49 ++++++++++++ 2 files changed, 93 insertions(+), 31 deletions(-) diff --git a/common/worker_versioning/worker_versioning.go b/common/worker_versioning/worker_versioning.go index a0d31082171..9be76a58421 100644 --- a/common/worker_versioning/worker_versioning.go +++ b/common/worker_versioning/worker_versioning.go @@ -928,44 +928,57 @@ func CalculateTaskQueueVersioningInfo(deployments *persistencespb.DeploymentData var routingConfigLatestCurrentVersion *deploymentpb.RoutingConfig var routingConfigLatestRampingVersion *deploymentpb.RoutingConfig - isPartOfSomeCurrentVersion := false - isPartOfSomeRampingVersion := false - - if deployments.GetDeploymentsData() != nil { - - for _, deploymentInfo := range deployments.GetDeploymentsData() { - routingConfig := deploymentInfo.GetRoutingConfig() - if routingConfig == nil { - continue + // Track the latest "versioned and TQ is a member" and "unversioned" routing configs + // separately so a versioned current/ramping always wins over an unversioned-but-newer + // entry in another deployment bucket, independent of map iteration order. + // + // Only chose those RoutingConfigs which pass the HasDeploymentVersion check due to the following example case: + // t0: TQ "foo" is in current version A with other TQ's + // t1: All other TQ's are moved to new version B except for "foo". + // t2: New version B is set as the current version. + // + // When this happens, we sync to "foo" that A is no longer the current version by passing in the new routing config. However, + // version B should not be considered as the current version for "foo" because the task-queue is not part of version B. + var latestVersionedCurrent, latestUnversionedCurrent *deploymentpb.RoutingConfig + var latestVersionedRamping, latestUnversionedRamping *deploymentpb.RoutingConfig + + for _, deploymentInfo := range deployments.GetDeploymentsData() { + rc := deploymentInfo.GetRoutingConfig() + + tCurrent := rc.GetCurrentVersionChangedTime().AsTime() + if HasDeploymentVersion(deployments, DeploymentVersionFromDeployment(DeploymentFromExternalDeploymentVersion(rc.GetCurrentDeploymentVersion()))) { + if tCurrent.After(latestVersionedCurrent.GetCurrentVersionChangedTime().AsTime()) { + latestVersionedCurrent = rc } - - // Only chose those RoutingConfigs which pass the HasDeploymentVersion check due to the following example case: - // t0: TQ "foo" is in current version A with other TQ's - // t1: All other TQ's are moved to new version B except for "foo". - // t2: New version B is set as the current version. - // - // When this happens, we sync to "foo" that A is no longer the current version by passing in the new routing config. However, - // version B should not be considered as the current version for "foo" because the task-queue is not part of version B. - if t := routingConfig.GetCurrentVersionChangedTime().AsTime(); t.After(routingConfigLatestCurrentVersion.GetCurrentVersionChangedTime().AsTime()) { - if HasDeploymentVersion(deployments, DeploymentVersionFromDeployment(DeploymentFromExternalDeploymentVersion(routingConfig.GetCurrentDeploymentVersion()))) { - routingConfigLatestCurrentVersion = routingConfig - isPartOfSomeCurrentVersion = true - } else if !isPartOfSomeCurrentVersion && routingConfig.GetCurrentDeploymentVersion() == nil { - routingConfigLatestCurrentVersion = routingConfig - } + } else if rc.GetCurrentDeploymentVersion() == nil { + if tCurrent.After(latestUnversionedCurrent.GetCurrentVersionChangedTime().AsTime()) { + latestUnversionedCurrent = rc } + } - if t := routingConfig.GetRampingVersionPercentageChangedTime().AsTime(); t.After(routingConfigLatestRampingVersion.GetRampingVersionPercentageChangedTime().AsTime()) { - if HasDeploymentVersion(deployments, DeploymentVersionFromDeployment(DeploymentFromExternalDeploymentVersion(routingConfig.GetRampingDeploymentVersion()))) { - routingConfigLatestRampingVersion = routingConfig - isPartOfSomeRampingVersion = true - } else if !isPartOfSomeRampingVersion && routingConfig.GetRampingDeploymentVersion() == nil { - routingConfigLatestRampingVersion = routingConfig - } + tRamping := rc.GetRampingVersionPercentageChangedTime().AsTime() + if HasDeploymentVersion(deployments, DeploymentVersionFromDeployment(DeploymentFromExternalDeploymentVersion(rc.GetRampingDeploymentVersion()))) { + if tRamping.After(latestVersionedRamping.GetRampingVersionPercentageChangedTime().AsTime()) { + latestVersionedRamping = rc + } + } else if rc.GetRampingDeploymentVersion() == nil { + if tRamping.After(latestUnversionedRamping.GetRampingVersionPercentageChangedTime().AsTime()) { + latestUnversionedRamping = rc } } } + if latestVersionedCurrent != nil { + routingConfigLatestCurrentVersion = latestVersionedCurrent + } else { + routingConfigLatestCurrentVersion = latestUnversionedCurrent + } + if latestVersionedRamping != nil { + routingConfigLatestRampingVersion = latestVersionedRamping + } else { + routingConfigLatestRampingVersion = latestUnversionedRamping + } + if routingConfigLatestCurrentVersion.GetCurrentDeploymentVersion() == nil && current.GetVersion() != nil { // The new current version is not unversioned but belongs to a versioned deployment which synced to the task-queue using the old deployment data format. routingConfigLatestCurrentVersion = nil diff --git a/common/worker_versioning/worker_versioning_test.go b/common/worker_versioning/worker_versioning_test.go index 2a42176a724..ff1d4eb8407 100644 --- a/common/worker_versioning/worker_versioning_test.go +++ b/common/worker_versioning/worker_versioning_test.go @@ -755,6 +755,55 @@ func TestCalculateTaskQueueVersioningInfo(t *testing.T) { } } +// TestCalculateTaskQueueVersioningInfo_MapIterationOrderRegression guards against +// reintroducing a bug where the per-deployment loop in CalculateTaskQueueVersioningInfo +// would pick an unversioned-but-newer RoutingConfig over a versioned-and-member +// RoutingConfig depending on Go map iteration order. Go re-randomizes map iteration +// per range, so calling the function many times on the same input makes a +// ~50%-per-call probabilistic bug practically deterministic. +func TestCalculateTaskQueueVersioningInfo_MapIterationOrderRegression(t *testing.T) { + t1 := timestamp.TimePtr(time.Now().Add(-time.Hour)) + t2 := timestamp.TimePtr(time.Now()) + + data := &persistencespb.DeploymentData{ + DeploymentsData: map[string]*persistencespb.WorkerDeploymentData{ + "foo": { + RoutingConfig: &deploymentpb.RoutingConfig{ + CurrentDeploymentVersion: &deploymentpb.WorkerDeploymentVersion{DeploymentName: "foo", BuildId: v1.GetBuildId()}, + CurrentVersionChangedTime: t1, + RampingDeploymentVersion: &deploymentpb.WorkerDeploymentVersion{DeploymentName: "foo", BuildId: v1.GetBuildId()}, + RampingVersionPercentage: 30, + RampingVersionPercentageChangedTime: t1, + }, + Versions: map[string]*deploymentspb.WorkerDeploymentVersionData{ + v1.GetBuildId(): {}, + }, + }, + "bar": { + RoutingConfig: &deploymentpb.RoutingConfig{ + CurrentDeploymentVersion: nil, + CurrentVersionChangedTime: t2, + RampingDeploymentVersion: nil, + RampingVersionPercentage: 20, + RampingVersionPercentageChangedTime: t2, + }, + Versions: map[string]*deploymentspb.WorkerDeploymentVersionData{}, + }, + }, + } + + const N = 100 + for i := range N { + current, _, _, ramping, _, _, _, _ := CalculateTaskQueueVersioningInfo(data) + if !current.Equal(v1) { + t.Fatalf("iteration %d: got current = %v, want %v (map iteration order regression)", i, current, v1) + } + if !ramping.Equal(v1) { + t.Fatalf("iteration %d: got ramping = %v, want %v (map iteration order regression)", i, ramping, v1) + } + } +} + func TestFindDeploymentVersionForWorkflowID(t *testing.T) { tests := []struct { name string From f902be3413495ca9e944e93395da2b4ac01c3d07 Mon Sep 17 00:00:00 2001 From: feiyang Date: Fri, 15 May 2026 12:38:25 -0700 Subject: [PATCH 40/73] ts/patch2: bound sanity check (#10287) ## What changed? 1. rm hardcoded min bound 2. updated maxSkip bound should be larger current accumulated duration ## Why The current user timer allows sub-1s settings with best efforts promise, and time skipping stays in line with current design. ## How did you test it? - [x] built - [ ] run locally and tested manually - [x] covered by existing tests - [x] added new unit test(s) - [ ] added new functional test(s) --- common/namespace/const.go | 3 - service/frontend/workflow_handler.go | 20 ----- service/frontend/workflow_handler_test.go | 28 ------- .../history/api/updateworkflowoptions/api.go | 26 +++++++ .../api/updateworkflowoptions/api_test.go | 78 +++++++++++++++++++ 5 files changed, 104 insertions(+), 51 deletions(-) diff --git a/common/namespace/const.go b/common/namespace/const.go index 0b6b8e696ae..9d3f48b22c9 100644 --- a/common/namespace/const.go +++ b/common/namespace/const.go @@ -11,7 +11,4 @@ const ( // namespaces. Allow short values but disallow zero to avoid confusion with // interpreting zero as infinite. MinRetentionLocal = 1 * time.Hour - - // MinTimeSkippingDuration is the minimum duration for time skipping. - MinTimeSkippingDuration = 1 * time.Minute ) diff --git a/service/frontend/workflow_handler.go b/service/frontend/workflow_handler.go index b888eaae80e..9736b4979a3 100644 --- a/service/frontend/workflow_handler.go +++ b/service/frontend/workflow_handler.go @@ -722,26 +722,6 @@ func (wh *WorkflowHandler) validateTimeSkippingConfig( ) } - if timeSkippingConfig.GetBound() != nil { - switch bound := timeSkippingConfig.GetBound().(type) { - case *workflowpb.TimeSkippingConfig_MaxSkippedDuration: - if bound.MaxSkippedDuration.AsDuration() < namespace.MinTimeSkippingDuration { - return serviceerror.NewInvalidArgumentf( - "Max skipped duration must be at least %s", - namespace.MinTimeSkippingDuration, - ) - } - case *workflowpb.TimeSkippingConfig_MaxElapsedDuration: - if bound.MaxElapsedDuration.AsDuration() < namespace.MinTimeSkippingDuration { - return serviceerror.NewInvalidArgumentf( - "Max elapsed duration must be at least %s", - namespace.MinTimeSkippingDuration, - ) - } - default: - return serviceerror.NewInvalidArgumentf("unsupported time skipping bound type: %T", bound) - } - } return nil } diff --git a/service/frontend/workflow_handler_test.go b/service/frontend/workflow_handler_test.go index 5ed7bcba34b..5d66a229221 100644 --- a/service/frontend/workflow_handler_test.go +++ b/service/frontend/workflow_handler_test.go @@ -3336,7 +3336,6 @@ func (s *WorkflowHandlerSuite) TestGetWorkflowExecutionHistory_InternalRawHistor func (s *WorkflowHandlerSuite) TestValidateTimeSkippingConfig() { config := s.newConfig() wh := s.getWorkflowHandler(config) - var invalidArgumentErr *serviceerror.InvalidArgument var unimplementedErr *serviceerror.Unimplemented // nil config is valid @@ -3355,33 +3354,6 @@ func (s *WorkflowHandlerSuite) TestValidateTimeSkippingConfig() { // config with enabled=true and dynamic config enabled is valid s.Require().NoError(wh.validateTimeSkippingConfig(&workflowpb.TimeSkippingConfig{Enabled: true}, s.testNamespace)) - - // MaxSkippedDuration below 1 minute is rejected - // error type is InvalidArgument - halfMinDuration := time.Duration(0.5 * float64(namespace.MinTimeSkippingDuration)) - s.Require().ErrorAs(wh.validateTimeSkippingConfig(&workflowpb.TimeSkippingConfig{ - Enabled: true, - Bound: &workflowpb.TimeSkippingConfig_MaxSkippedDuration{MaxSkippedDuration: durationpb.New(halfMinDuration)}, - }, s.testNamespace), &invalidArgumentErr) - - // MaxSkippedDuration exactly 1 minute is valid - s.Require().NoError(wh.validateTimeSkippingConfig(&workflowpb.TimeSkippingConfig{ - Enabled: true, - Bound: &workflowpb.TimeSkippingConfig_MaxSkippedDuration{MaxSkippedDuration: durationpb.New(namespace.MinTimeSkippingDuration)}, - }, s.testNamespace)) - - // MaxElapsedDuration below 1 minute is rejected - s.Require().ErrorAs(wh.validateTimeSkippingConfig(&workflowpb.TimeSkippingConfig{ - Enabled: true, - Bound: &workflowpb.TimeSkippingConfig_MaxElapsedDuration{MaxElapsedDuration: durationpb.New(halfMinDuration)}, - }, s.testNamespace), &invalidArgumentErr) - - // MaxElapsedDuration exactly 1 minute is valid - s.Require().NoError(wh.validateTimeSkippingConfig(&workflowpb.TimeSkippingConfig{ - Enabled: true, - Bound: &workflowpb.TimeSkippingConfig_MaxElapsedDuration{MaxElapsedDuration: durationpb.New(namespace.MinTimeSkippingDuration)}, - }, s.testNamespace)) - } // TestExecuteMultiOperation_TimeSkipping_DCDisabled verifies that when the DC gate is off, diff --git a/service/history/api/updateworkflowoptions/api.go b/service/history/api/updateworkflowoptions/api.go index b3407f9fdf0..a2defe8f766 100644 --- a/service/history/api/updateworkflowoptions/api.go +++ b/service/history/api/updateworkflowoptions/api.go @@ -84,6 +84,10 @@ func Invoke( if err != nil { return nil, err } + err = validateTimeSkippingConfig(requestedOptions.GetTimeSkippingConfig(), mutableState) + if err != nil { + return nil, err + } mergedOpts, hasChanges, err := MergeAndApply(mutableState, requestedOptions, req.GetUpdateMask(), req.GetIdentity()) if err != nil { @@ -124,6 +128,28 @@ func Invoke( return ret, nil } +// validateTimeSkippingConfig rejects an update whose MaxSkippedDuration is +// below what the workflow has already skipped — the new bound would be +// retroactively violated. Validated against current MS state, before merge. +func validateTimeSkippingConfig(cfg *workflowpb.TimeSkippingConfig, ms historyi.MutableState) error { + if !cfg.GetEnabled() { + return nil + } + bound, ok := cfg.GetBound().(*workflowpb.TimeSkippingConfig_MaxSkippedDuration) + if !ok { + return nil + } + maxSkipped := bound.MaxSkippedDuration.AsDuration() + accumulated := ms.GetExecutionInfo().GetTimeSkippingInfo().GetAccumulatedSkippedDuration().AsDuration() + if maxSkipped <= accumulated { + return serviceerror.NewInvalidArgumentf( + "max skipped duration must be greater than skipped duration: %v <= %v", + maxSkipped, accumulated, + ) + } + return nil +} + // MergeAndApply merges the requested options mentioned in the field mask with the current options in the mutable state // and applies the changes to the mutable state. Returns the merged options and a boolean indicating if there were any changes. func MergeAndApply( diff --git a/service/history/api/updateworkflowoptions/api_test.go b/service/history/api/updateworkflowoptions/api_test.go index 9132c164de2..07d31ec7289 100644 --- a/service/history/api/updateworkflowoptions/api_test.go +++ b/service/history/api/updateworkflowoptions/api_test.go @@ -11,6 +11,7 @@ import ( deploymentpb "go.temporal.io/api/deployment/v1" enumspb "go.temporal.io/api/enums/v1" historypb "go.temporal.io/api/history/v1" + "go.temporal.io/api/serviceerror" workflowpb "go.temporal.io/api/workflow/v1" "go.temporal.io/api/workflowservice/v1" "go.temporal.io/server/api/historyservice/v1" @@ -333,6 +334,83 @@ func (s *updateWorkflowOptionsSuite) TestInvoke_Success() { proto.Equal(expectedOverrideOptions, resp.GetWorkflowExecutionOptions()) } +func TestValidateTimeSkippingConfig(t *testing.T) { + tenMin := durationpb.New(10 * time.Minute) + twentyMin := durationpb.New(20 * time.Minute) + maxSkippedTen := &workflowpb.TimeSkippingConfig_MaxSkippedDuration{MaxSkippedDuration: tenMin} + maxSkippedTwenty := &workflowpb.TimeSkippingConfig_MaxSkippedDuration{MaxSkippedDuration: twentyMin} + maxElapsedTen := &workflowpb.TimeSkippingConfig_MaxElapsedDuration{MaxElapsedDuration: tenMin} + + tcs := []struct { + name string + config *workflowpb.TimeSkippingConfig + accumulated *durationpb.Duration + wantErr bool + }{ + { + name: "nil config", + config: nil, + }, + { + name: "disabled short-circuits even when bound would be violated", + config: &workflowpb.TimeSkippingConfig{Enabled: false, Bound: maxSkippedTen}, + accumulated: twentyMin, + }, + { + name: "enabled, no bound", + config: &workflowpb.TimeSkippingConfig{Enabled: true}, + }, + { + name: "MaxElapsedDuration bound is not validated here", + config: &workflowpb.TimeSkippingConfig{Enabled: true, Bound: maxElapsedTen}, + accumulated: twentyMin, + }, + { + name: "MaxSkipped set, nil accumulated treated as zero", + config: &workflowpb.TimeSkippingConfig{Enabled: true, Bound: maxSkippedTen}, + }, + { + name: "MaxSkipped > accumulated", + config: &workflowpb.TimeSkippingConfig{Enabled: true, Bound: maxSkippedTwenty}, + accumulated: tenMin, + }, + { + name: "MaxSkipped == accumulated is rejected (must be strictly greater)", + config: &workflowpb.TimeSkippingConfig{Enabled: true, Bound: maxSkippedTen}, + accumulated: tenMin, + wantErr: true, + }, + { + name: "MaxSkipped < accumulated is rejected", + config: &workflowpb.TimeSkippingConfig{Enabled: true, Bound: maxSkippedTen}, + accumulated: twentyMin, + wantErr: true, + }, + } + + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + ms := historyi.NewMockMutableState(ctrl) + info := &persistencespb.WorkflowExecutionInfo{} + if tc.accumulated != nil { + info.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + AccumulatedSkippedDuration: tc.accumulated, + } + } + ms.EXPECT().GetExecutionInfo().Return(info).AnyTimes() + + err := validateTimeSkippingConfig(tc.config, ms) + if tc.wantErr { + var invalidArg *serviceerror.InvalidArgument + require.ErrorAs(t, err, &invalidArg) + } else { + require.NoError(t, err) + } + }) + } +} + func TestMergeAndApply_TimeSkippingConfig(t *testing.T) { oneHour := durationpb.New(time.Hour) twoHours := durationpb.New(2 * time.Hour) From 8a904bd8005a5ab41f91e3953a1c8ad5af608ea8 Mon Sep 17 00:00:00 2001 From: Yichao Yang Date: Fri, 15 May 2026 14:21:42 -0700 Subject: [PATCH 41/73] Add per namespace rate limiter for matching service (#10069) ## What changed? - Add per namespace rate limiter for matching service - Same as https://github.com/temporalio/temporal/pull/9884 but for matching. ## Why? - Noisy neighbor protection. Provide knob for preventing one namespace from consuming all available matching host rps. ## How did you test it? - [x] built - [ ] run locally and tested manually - [x] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) --- client/admin/metric_client.go | 3 +- client/frontend/metric_client.go | 3 +- client/matching/metric_client.go | 3 +- common/dynamicconfig/constants.go | 25 +++++++---- service/frontend/fx.go | 9 +++- service/frontend/service.go | 2 +- service/matching/config.go | 4 ++ service/matching/configs/quotas.go | 67 ++++++++++++++++++------------ service/matching/fx.go | 28 +++++++++++++ 9 files changed, 103 insertions(+), 41 deletions(-) diff --git a/client/admin/metric_client.go b/client/admin/metric_client.go index 73c7b5e6975..a53eb707667 100644 --- a/client/admin/metric_client.go +++ b/client/admin/metric_client.go @@ -57,7 +57,8 @@ func (c *metricClient) finishMetricsRecording( *serviceerror.QueryFailed, *serviceerror.NamespaceNotFound, *serviceerror.WorkflowNotReady, - *serviceerror.WorkflowExecutionAlreadyStarted: + *serviceerror.WorkflowExecutionAlreadyStarted, + *serviceerror.ResourceExhausted: // noop - not interest and too many logs default: c.throttledLogger.Info("admin client encountered error", tag.Error(err), tag.ServiceErrorType(err)) diff --git a/client/frontend/metric_client.go b/client/frontend/metric_client.go index fc6f282dfb7..f9c204a83c5 100644 --- a/client/frontend/metric_client.go +++ b/client/frontend/metric_client.go @@ -56,7 +56,8 @@ func (c *metricClient) finishMetricsRecording( *serviceerror.QueryFailed, *serviceerror.NamespaceNotFound, *serviceerror.WorkflowNotReady, - *serviceerror.WorkflowExecutionAlreadyStarted: + *serviceerror.WorkflowExecutionAlreadyStarted, + *serviceerror.ResourceExhausted: // noop - not interest and too many logs default: c.throttledLogger.Info("frontend client encountered error", tag.Error(err), tag.ServiceErrorType(err)) diff --git a/client/matching/metric_client.go b/client/matching/metric_client.go index f2a274ff451..53edd2b34b7 100644 --- a/client/matching/metric_client.go +++ b/client/matching/metric_client.go @@ -232,7 +232,8 @@ func (c *metricClient) finishMetricsRecording( *serviceerror.QueryFailed, *serviceerror.NamespaceNotFound, *serviceerror.NewerBuildExists, - *serviceerror.WorkflowExecutionAlreadyStarted: + *serviceerror.WorkflowExecutionAlreadyStarted, + *serviceerror.ResourceExhausted: // noop - not interest and too many logs default: c.throttledLogger.Info("matching client encountered error", tag.Error(err), tag.ServiceErrorType(err)) diff --git a/common/dynamicconfig/constants.go b/common/dynamicconfig/constants.go index e56097eecfe..2b02cc27114 100644 --- a/common/dynamicconfig/constants.go +++ b/common/dynamicconfig/constants.go @@ -321,6 +321,16 @@ operator API calls (highest priority). Should be >0.0 and <= 1.0 (defaults to 20 Setting this to 0 prevents the search attribute from being set when a problem is detected, and unset when the problem is resolved.`, ) + PollWaitForNamespaceRateLimitToken = NewNamespaceBoolSetting( + "system.pollWaitForNamespaceRateLimitToken", + false, + `PollWaitForNamespaceRateLimitToken controls whether poll requests wait for +a namespace RPS rate limit token to become available instead of immediately rejecting +with ResourceExhausted. When enabled, poll requests block until a token is available +or the request context deadline is reached. The concurrent request rate limiter fires +before this limiter and will still reject requests that exceed the concurrent limit.`, + ) + // keys for size limit BlobSizeLimitError = NewNamespaceIntSetting( @@ -697,15 +707,6 @@ exceeded, not when it is only reached.`, instances in the cluster, for a given namespace, per-API method. If this is set to 0 (the default), then it is ignored. The name 'frontend.globalNamespaceCount' is kept for consistency with the per-instance limit name, 'frontend.namespaceCount'.`, - ) - FrontendPollWaitForNamespaceRateLimitToken = NewNamespaceBoolSetting( - "frontend.pollWaitForNamespaceRateLimitToken", - false, - `FrontendPollWaitForNamespaceRateLimitToken controls whether poll requests wait for -a namespace RPS rate limit token to become available instead of immediately rejecting -with ResourceExhausted. When enabled, poll requests block until a token is available -or the request context deadline is reached. The concurrent request rate limiter fires -before this limiter and will still reject requests that exceed the concurrent limit.`, ) FrontendMaxNamespaceVisibilityRPSPerInstance = NewNamespaceIntSetting( "frontend.namespaceRPS.visibility", @@ -1116,6 +1117,12 @@ Default is 0, means, namespace will be deleted immediately.`, 1200, `MatchingRPS is request rate per second for each matching host`, ) + MatchingNamespaceRPS = NewNamespaceIntSetting( + "matching.namespaceRPS", + 0, + `MatchingNamespaceRPS is namespace rate limit per second for each matching host. +If value less or equal to 0, will fall back to MatchingRPS`, + ) MatchingPersistenceMaxQPS = NewGlobalIntSetting( "matching.persistenceMaxQPS", 3000, diff --git a/service/frontend/fx.go b/service/frontend/fx.go index a40077a8aa8..e3a46524103 100644 --- a/service/frontend/fx.go +++ b/service/frontend/fx.go @@ -577,7 +577,14 @@ func NamespaceRateLimitInterceptorProvider( ) }, ) - return interceptor.NewNamespaceRateLimitInterceptor(namespaceRegistry, namespaceRateLimiter, map[string]int{}, configs.PollTaskAPISet, serviceConfig.PollWaitForNamespaceRateLimitToken, metricsHandler) + return interceptor.NewNamespaceRateLimitInterceptor( + namespaceRegistry, + namespaceRateLimiter, + map[string]int{}, // no token overrides + configs.PollTaskAPISet, + serviceConfig.PollWaitForNamespaceRateLimitToken, + metricsHandler, + ) } func NamespaceCountLimitInterceptorProvider( diff --git a/service/frontend/service.go b/service/frontend/service.go index 07635c7ce17..04b5a1f3ded 100644 --- a/service/frontend/service.go +++ b/service/frontend/service.go @@ -290,7 +290,7 @@ func NewConfig( MaxNamespaceBurstRatioPerInstance: dynamicconfig.FrontendMaxNamespaceBurstRatioPerInstance.Get(dc), MaxConcurrentLongRunningRequestsPerInstance: dynamicconfig.FrontendMaxConcurrentLongRunningRequestsPerInstance.Get(dc), MaxGlobalConcurrentLongRunningRequests: dynamicconfig.FrontendGlobalMaxConcurrentLongRunningRequests.Get(dc), - PollWaitForNamespaceRateLimitToken: dynamicconfig.FrontendPollWaitForNamespaceRateLimitToken.Get(dc), + PollWaitForNamespaceRateLimitToken: dynamicconfig.PollWaitForNamespaceRateLimitToken.Get(dc), MaxNamespaceVisibilityRPSPerInstance: dynamicconfig.FrontendMaxNamespaceVisibilityRPSPerInstance.Get(dc), MaxNamespaceVisibilityBurstRatioPerInstance: dynamicconfig.FrontendMaxNamespaceVisibilityBurstRatioPerInstance.Get(dc), MaxNamespaceNamespaceReplicationInducingAPIsRPSPerInstance: dynamicconfig.FrontendMaxNamespaceNamespaceReplicationInducingAPIsRPSPerInstance.Get(dc), diff --git a/service/matching/config.go b/service/matching/config.go index e5ef6fba1d1..54fcae8e757 100644 --- a/service/matching/config.go +++ b/service/matching/config.go @@ -26,7 +26,9 @@ type ( PersistenceQPSBurstRatio dynamicconfig.FloatPropertyFn SyncMatchWaitDuration dynamicconfig.DurationPropertyFnWithTaskQueueFilter RPS dynamicconfig.IntPropertyFn + NamespaceRPS dynamicconfig.IntPropertyFnWithNamespaceFilter OperatorRPSRatio dynamicconfig.FloatPropertyFn + PollWaitForNamespaceRateLimitToken dynamicconfig.BoolPropertyFnWithNamespaceFilter AlignMembershipChange dynamicconfig.DurationPropertyFn ShutdownDrainDuration dynamicconfig.DurationPropertyFn HistoryMaxPageSize dynamicconfig.IntPropertyFnWithNamespaceFilter @@ -274,7 +276,9 @@ func NewConfig( MaxTaskQueuesInDeployment: dynamicconfig.MatchingMaxTaskQueuesInDeployment.Get(dc), MaxVersionsInTaskQueue: dynamicconfig.MatchingMaxVersionsInTaskQueue.Get(dc), RPS: dynamicconfig.MatchingRPS.Get(dc), + NamespaceRPS: dynamicconfig.MatchingNamespaceRPS.Get(dc), OperatorRPSRatio: dynamicconfig.OperatorRPSRatio.Get(dc), + PollWaitForNamespaceRateLimitToken: dynamicconfig.PollWaitForNamespaceRateLimitToken.Get(dc), RangeSize: 100000, NewMatcherSub: dynamicconfig.MatchingUseNewMatcher.Subscribe(dc), EnableFairnessSub: dynamicconfig.MatchingEnableFairness.Subscribe(dc), diff --git a/service/matching/configs/quotas.go b/service/matching/configs/quotas.go index bb6c12f9fa7..82e0e082890 100644 --- a/service/matching/configs/quotas.go +++ b/service/matching/configs/quotas.go @@ -6,11 +6,6 @@ import ( "go.temporal.io/server/common/quotas" ) -const ( - // OperatorPriority is used to give precedence to calls coming from web UI or tctl - OperatorPriority = 0 -) - var ( APIToPriority = map[string]int{ "/temporal.server.api.matchingservice.v1.MatchingService/AddActivityTask": 1, @@ -56,36 +51,54 @@ var ( } APIPrioritiesOrdered = []int{0, 1, 2} + + PollTaskAPISet = map[string]struct{}{ + "/temporal.server.api.matchingservice.v1.MatchingService/PollActivityTaskQueue": {}, + "/temporal.server.api.matchingservice.v1.MatchingService/PollWorkflowTaskQueue": {}, + "/temporal.server.api.matchingservice.v1.MatchingService/PollNexusTaskQueue": {}, + } ) func NewPriorityRateLimiter( rateFn quotas.RateFn, operatorRPSRatio dynamicconfig.FloatPropertyFn, ) quotas.RequestRateLimiter { - rateLimiters := make(map[int]quotas.RequestRateLimiter) - for priority := range APIPrioritiesOrdered { - if priority == OperatorPriority { - rateLimiters[priority] = quotas.NewRequestRateLimiterAdapter(quotas.NewDefaultIncomingRateLimiter(operatorRateFn(rateFn, operatorRPSRatio))) - } else { - rateLimiters[priority] = quotas.NewRequestRateLimiterAdapter(quotas.NewDefaultIncomingRateLimiter(rateFn)) - } - } - return quotas.NewPriorityRateLimiter(func(req quotas.Request) int { - if req.CallerType == headers.CallerTypeOperator { - return OperatorPriority - } - if priority, ok := APIToPriority[req.API]; ok { - return priority - } - return APIPrioritiesOrdered[len(APIPrioritiesOrdered)-1] - }, rateLimiters) + return quotas.NewPriorityRateLimiterHelper( + quotas.NewDefaultIncomingRateBurst(rateFn), + operatorRPSRatio, + RequestToPriority, + APIPrioritiesOrdered, + ) } -func operatorRateFn( - rateFn quotas.RateFn, +func NewNamespaceRateLimiter( + namespaceRateFn quotas.NamespaceRateFn, operatorRPSRatio dynamicconfig.FloatPropertyFn, -) quotas.RateFn { - return func() float64 { - return operatorRPSRatio() * rateFn() +) quotas.RequestRateLimiter { + return quotas.NewNamespaceRequestRateLimiter( + func(req quotas.Request) quotas.RequestRateLimiter { + return quotas.NewPriorityRateLimiterHelper( + quotas.NewNamespaceRateBurst( + req.Caller, + namespaceRateFn, + // TODO: We can consider adding a separate burst ratio dynamic config + // on namespace level rate limiter if needed. + quotas.DefaultIncomingNamespaceBurstRatioFn, + ), + operatorRPSRatio, + RequestToPriority, + APIPrioritiesOrdered, + ) + }, + ) +} + +func RequestToPriority(req quotas.Request) int { + if req.CallerType == headers.CallerTypeOperator { + return quotas.OperatorPriority + } + if priority, ok := APIToPriority[req.API]; ok { + return priority } + return APIPrioritiesOrdered[len(APIPrioritiesOrdered)-1] } diff --git a/service/matching/fx.go b/service/matching/fx.go index 2332c832e4e..516748cfcd4 100644 --- a/service/matching/fx.go +++ b/service/matching/fx.go @@ -40,6 +40,7 @@ var Module = fx.Options( fx.Provide(RetryableInterceptorProvider), fx.Provide(ErrorHandlerProvider), fx.Provide(TelemetryInterceptorProvider), + fx.Provide(NamespaceRateLimitInterceptorProvider), fx.Provide(RateLimitInterceptorProvider), fx.Provide(VisibilityManagerProvider), fx.Provide(WorkersRegistryProvider), @@ -108,6 +109,33 @@ func ThrottledLoggerRpsFnProvider(serviceConfig *Config) resource.ThrottledLogge return func() float64 { return float64(serviceConfig.ThrottledLogRPS()) } } +func NamespaceRateLimitInterceptorProvider( + serviceConfig *Config, + namespaceRegistry namespace.Registry, + metricsHandler metrics.Handler, +) interceptor.NamespaceRateLimitInterceptor { + + namespaceRateFn := func(namespaceName string) float64 { + if namespaceRPS := serviceConfig.NamespaceRPS(namespaceName); namespaceRPS > 0 { + return float64(namespaceRPS) + } + // This fallback to host level rps limit when NamespaceRPS is not configured (i.e. 0) + return float64(serviceConfig.RPS()) + } + + return interceptor.NewNamespaceRateLimitInterceptor( + namespaceRegistry, + configs.NewNamespaceRateLimiter( + namespaceRateFn, + serviceConfig.OperatorRPSRatio, + ), + map[string]int{}, // no token overrides + configs.PollTaskAPISet, // set of APIs that will wait for token instead of immediate rejection + serviceConfig.PollWaitForNamespaceRateLimitToken, + metricsHandler, + ) +} + func RateLimitInterceptorProvider( serviceConfig *Config, ) *interceptor.RateLimitInterceptor { From debbe326af1c267710aa463940da75be4b49cd44 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Fri, 15 May 2026 15:25:48 -0600 Subject: [PATCH 42/73] fix: PauseWorkflowExecution bypassed when constantly completing activities (#10292) ## What changed? In the `RespondWorkflowTaskCompleted` api added a `!ms.IsWorkflowExecutionPaused` to the gate creating a new workflow task, without this check the workflow will schedule new work while keeping `PauseInfo` on the workflow making it show up as `Paused` in the UI but unable to be unpaused. ## Why? Fix #10239 ## How did you test it? - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [X] added new functional test(s) ## Potential risks Minimal, pause is behind a dynamic config that is defaulted to false --- .../api/respondworkflowtaskcompleted/api.go | 7 +- tests/pause_workflow_execution_test.go | 144 ++++++++++++++++++ 2 files changed, 150 insertions(+), 1 deletion(-) diff --git a/service/history/api/respondworkflowtaskcompleted/api.go b/service/history/api/respondworkflowtaskcompleted/api.go index e0cf1db4f26..eed36a54c80 100644 --- a/service/history/api/respondworkflowtaskcompleted/api.go +++ b/service/history/api/respondworkflowtaskcompleted/api.go @@ -516,7 +516,12 @@ func (handler *WorkflowTaskCompletedHandler) Invoke( } newWorkflowTaskType := enumsspb.WORKFLOW_TASK_TYPE_UNSPECIFIED - if ms.IsWorkflowExecutionRunning() { + // Do not schedule a new workflow task if the workflow is paused. Accepting the in-flight + // WT completion is intentional (see HistoryBuilder buffering of WORKFLOW_EXECUTION_PAUSED), + // but scheduling a follow-up WT would call ApplyWorkflowTaskScheduledEvent, which resets + // Status to RUNNING while leaving executionInfo.PauseInfo set — desyncing pause state. + // Mirrors the gate in closeTransactionHandleWorkflowTaskScheduling. + if ms.IsWorkflowExecutionRunning() && !ms.IsWorkflowExecutionStatusPaused() { if request.GetForceCreateNewWorkflowTask() || // Heartbeat WT is always of Normal type. wtFailedShouldCreateNewTask || hasBufferedEventsOrMessages || diff --git a/tests/pause_workflow_execution_test.go b/tests/pause_workflow_execution_test.go index 58d081d5247..c47d6ed5c4d 100644 --- a/tests/pause_workflow_execution_test.go +++ b/tests/pause_workflow_execution_test.go @@ -19,6 +19,7 @@ import ( querypb "go.temporal.io/api/query/v1" "go.temporal.io/api/serviceerror" "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/sdk/activity" sdkclient "go.temporal.io/sdk/client" "go.temporal.io/sdk/temporal" "go.temporal.io/sdk/workflow" @@ -831,6 +832,149 @@ func (s *PauseWorkflowExecutionSuite) TestPauseWorkflowExecutionAlreadyPaused() }, 5*time.Second, 200*time.Millisecond) } +// TestPauseDuringInFlightWorkflowTask reproduces the race described in +// https://github.com/temporalio/temporal/issues/10239: if +// PauseWorkflowExecution arrives while a worker has a workflow task in flight, +// the worker's RespondWorkflowTaskCompleted can still be accepted after the +// WORKFLOW_EXECUTION_PAUSED event is appended. A follow-up WORKFLOW_TASK_SCHEDULED +// is then written and the next workflow task completion resets Status to RUNNING +// without clearing executionInfo.PauseInfo. The workflow ends up stuck: +// Status=RUNNING with pauseInfo set, and UnpauseWorkflowExecution rejects with +// FailedPrecondition because it only inspects Status. +// +// The bug is timing-sensitive. It often does not reproduce on a single run. +// Run with -count=N (e.g. -count=50) to reliably observe failures. +func (s *PauseWorkflowExecutionSuite) TestPauseDuringInFlightWorkflowTask() { + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + const ( + tickActivityName = "pause-race-tick-activity" + busyWorkflowName = "pause-race-busy-workflow" + iterations = 200 + ) + + // tickActivity completes immediately so the workflow keeps cycling + // through workflow tasks with minimal wall time between them. + tickActivity := func(ctx context.Context) error { + return nil + } + + // busyWorkflow runs a long tight loop of short activities so that workflow + // tasks are being scheduled/started/completed continuously. This widens + // the chance of a Pause RPC arriving while a WT is in flight on the worker. + busyWorkflow := func(ctx workflow.Context) error { + ao := workflow.ActivityOptions{ + StartToCloseTimeout: 5 * time.Second, + ScheduleToCloseTimeout: 30 * time.Second, + } + ctx = workflow.WithActivityOptions(ctx, ao) + for range iterations { + if err := workflow.ExecuteActivity(ctx, tickActivityName).Get(ctx, nil); err != nil { + return err + } + } + return nil + } + + s.SdkWorker().RegisterWorkflowWithOptions(busyWorkflow, workflow.RegisterOptions{Name: busyWorkflowName}) + s.SdkWorker().RegisterActivityWithOptions(tickActivity, activity.RegisterOptions{Name: tickActivityName}) + + workflowOptions := sdkclient.StartWorkflowOptions{ + ID: testcore.RandomizeStr("pause-race-" + s.T().Name()), + TaskQueue: s.TaskQueue(), + } + + workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, busyWorkflowName) + s.NoError(err) + workflowID := workflowRun.GetID() + runID := workflowRun.GetRunID() + + // Wait until the workflow has progressed a few iterations so workflow + // tasks are actively flowing through the worker. + s.EventuallyWithT(func(t *assert.CollectT) { + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowID, runID) + require.NoError(t, err) + info := desc.GetWorkflowExecutionInfo() + require.NotNil(t, info) + require.Equal(t, enumspb.WORKFLOW_EXECUTION_STATUS_RUNNING, info.GetStatus()) + require.GreaterOrEqual(t, info.GetHistoryLength(), int64(15), + "workflow has not started cycling through tasks yet") + }, 10*time.Second, 50*time.Millisecond) + + // Issue the Pause while the worker is still busy. Repeat until either we + // observe Status=PAUSED or we hit the desync state (Status=RUNNING with + // pauseInfo set). The race window is small, so we don't always hit it on + // the first pause/unpause cycle. + pauseResp, err := s.FrontendClient().PauseWorkflowExecution(ctx, &workflowservice.PauseWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowId: workflowID, + RunId: runID, + Identity: s.pauseIdentity, + Reason: s.pauseReason, + RequestId: uuid.NewString(), + }) + s.NoError(err) + s.NotNil(pauseResp) + + // Eventually the workflow should reach a stable PAUSED state. The bug + // manifests as Status=RUNNING with pauseInfo still populated; this assertion + // is what fails when the race fires. + s.EventuallyWithT(func(t *assert.CollectT) { + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowID, runID) + require.NoError(t, err) + info := desc.GetWorkflowExecutionInfo() + require.NotNil(t, info) + require.Equal(t, enumspb.WORKFLOW_EXECUTION_STATUS_PAUSED, info.GetStatus(), + "workflow ended up desynced: Status=%s, pauseInfo=%v (issue #10239 race)", + info.GetStatus(), desc.GetWorkflowExtendedInfo().GetPauseInfo()) + }, 10*time.Second, 100*time.Millisecond) + + // Verify history contains no WORKFLOW_TASK_SCHEDULED event after + // WORKFLOW_EXECUTION_PAUSED — that event (eventId #1963 in the issue + // reproduction) is the smoking gun for the race. + hist := s.SdkClient().GetWorkflowHistory(ctx, workflowID, runID, false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + inPaused := false + for hist.HasNext() { + event, herr := hist.Next() + s.NoError(herr) + switch event.EventType { + case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_PAUSED: + inPaused = true + case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UNPAUSED: + inPaused = false + case enumspb.EVENT_TYPE_WORKFLOW_TASK_SCHEDULED: + s.False(inPaused, + "WORKFLOW_TASK_SCHEDULED at eventId=%d appended after WORKFLOW_EXECUTION_PAUSED (issue #10239 race)", + event.EventId) + default: + } + } + + // Unpause should succeed. When the race fires, Status is RUNNING and the + // unpause API rejects with FailedPrecondition: "workflow is not paused". + unpauseResp, err := s.FrontendClient().UnpauseWorkflowExecution(ctx, &workflowservice.UnpauseWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowId: workflowID, + RunId: runID, + Identity: s.pauseIdentity, + Reason: s.pauseReason, + RequestId: uuid.NewString(), + }) + s.NoError(err, "UnpauseWorkflowExecution failed; workflow is stuck with Status=RUNNING and pauseInfo set (issue #10239 race)") + s.NotNil(unpauseResp) + + // Cleanup: terminate so the busy loop doesn't run to completion. + _, _ = s.FrontendClient().TerminateWorkflowExecution(ctx, &workflowservice.TerminateWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: workflowID, + RunId: runID, + }, + Reason: "cleanup after pause race test", + }) +} + // hasActivityPauseEntries checks if the TemporalPauseInfo search attribute contains any activity pause entries. func (s *PauseWorkflowExecutionSuite) hasActivityPauseEntries(desc *workflowservice.DescribeWorkflowExecutionResponse) bool { searchAttrs := desc.GetWorkflowExecutionInfo().GetSearchAttributes() From c2afbd480e647dede80cdfc7d622d70e72f5572e Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 15 May 2026 14:26:20 -0700 Subject: [PATCH 43/73] fix LongPollTimeoutReturnsEmptyResponse test (#10276) ## What changed? Fix race and incomplete tests in `LongPollTimeoutReturnsEmptyResponse`. --- tests/nexus_standalone_test.go | 114 ++++++++++++++++++++------------- 1 file changed, 69 insertions(+), 45 deletions(-) diff --git a/tests/nexus_standalone_test.go b/tests/nexus_standalone_test.go index 4376ce5e753..e5fbe2957fa 100644 --- a/tests/nexus_standalone_test.go +++ b/tests/nexus_standalone_test.go @@ -21,6 +21,7 @@ import ( "go.temporal.io/api/workflowservice/v1" persistencespb "go.temporal.io/server/api/persistence/v1" "go.temporal.io/server/chasm/lib/nexusoperation" + "go.temporal.io/server/common" "go.temporal.io/server/common/dynamicconfig" commonnexus "go.temporal.io/server/common/nexus" "go.temporal.io/server/common/nexus/nexusrpc" @@ -29,6 +30,7 @@ import ( "go.temporal.io/server/common/testing/parallelsuite" "go.temporal.io/server/common/testing/protorequire" "go.temporal.io/server/tests/testcore" + "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/durationpb" ) @@ -311,64 +313,86 @@ func (s *NexusStandaloneTestSuite) TestDescribeStandaloneNexusOperation() { }) s.Run("LongPollTimeoutReturnsEmptyResponse", func(s *NexusStandaloneTestSuite) { - env := s.newTestEnv() - endpointName := env.createRandomExternalNexusServer(env.Context(), s.T(), nexustest.Handler{ - OnStartOperation: func(ctx context.Context, service, operation string, input *nexus.LazyValue, options nexus.StartOperationOptions) (nexus.HandlerStartOperationResult[any], error) { - return &nexus.HandlerStartOperationResultAsync{OperationToken: "test-operation-token"}, nil + // The timeout imposed by the server is essentially + // Min(CallerTimeout - LongPollBuffer, LongPollTimeout). + for _, tc := range []struct { + name string + setup func(*NexusTestEnv) + callerCtx func(*NexusTestEnv) (context.Context, context.CancelFunc) + }{ + { + name: "LongPollTimeoutExpiresBeforeCallerDeadline", + setup: func(env *NexusTestEnv) { + env.OverrideDynamicConfig(nexusoperation.LongPollTimeout, 10*time.Millisecond) + }, + callerCtx: func(env *NexusTestEnv) (context.Context, context.CancelFunc) { + return context.WithTimeout(env.Context(), 9999*time.Millisecond) + }, }, - }) - - startResp, err := s.startNexusOperation(env, &workflowservice.StartNexusOperationExecutionRequest{ - OperationId: "test-op", - Endpoint: endpointName, - }) - s.NoError(err) - - // Ensure the operation is in a stable STARTED state. - _, err = env.FrontendClient().PollNexusOperationExecution(env.Context(), &workflowservice.PollNexusOperationExecutionRequest{ - Namespace: env.Namespace().String(), - OperationId: "test-op", - RunId: startResp.RunId, - WaitStage: enumspb.NEXUS_OPERATION_WAIT_STAGE_STARTED, - }) - s.NoError(err) + { + name: "LongPollTimeoutExpiresWithoutCallerDeadline", + setup: func(env *NexusTestEnv) { + env.OverrideDynamicConfig(nexusoperation.LongPollTimeout, 10*time.Millisecond) + }, + callerCtx: func(env *NexusTestEnv) (context.Context, context.CancelFunc) { + return env.Context(), func() {} + }, + }, + { + name: "LongPollBufferForcesResponse", + setup: func(env *NexusTestEnv) { + env.OverrideDynamicConfig(nexusoperation.LongPollBuffer, common.DefaultLongPollTimeout-time.Second) + }, + callerCtx: func(env *NexusTestEnv) (context.Context, context.CancelFunc) { + return env.Context(), func() {} + }, + }, + } { + env := s.newTestEnv() + endpointName := env.createRandomExternalNexusServer(env.Context(), s.T(), nexustest.Handler{ + OnStartOperation: func(ctx context.Context, service, operation string, input *nexus.LazyValue, options nexus.StartOperationOptions) (nexus.HandlerStartOperationResult[any], error) { + return &nexus.HandlerStartOperationResultAsync{OperationToken: "test-operation-token"}, nil + }, + }) - firstResp, err := env.FrontendClient().DescribeNexusOperationExecution(env.Context(), &workflowservice.DescribeNexusOperationExecutionRequest{ - Namespace: env.Namespace().String(), - OperationId: "test-op", - RunId: startResp.RunId, - }) - s.NoError(err) - s.NotEmpty(firstResp.GetLongPollToken()) + startResp, err := s.startNexusOperation(env, &workflowservice.StartNexusOperationExecutionRequest{ + OperationId: "test-op", + Endpoint: endpointName, + }) + s.NoError(err) - s.Run("CallerDeadlineNotExceeded", func(s *NexusStandaloneTestSuite) { - env.OverrideDynamicConfig(nexusoperation.LongPollBuffer, time.Second) - env.OverrideDynamicConfig(nexusoperation.LongPollTimeout, 10*time.Millisecond) + // Ensure the operation is in a stable STARTED state. + _, err = env.FrontendClient().PollNexusOperationExecution(env.Context(), &workflowservice.PollNexusOperationExecutionRequest{ + Namespace: env.Namespace().String(), + OperationId: "test-op", + RunId: startResp.RunId, + WaitStage: enumspb.NEXUS_OPERATION_WAIT_STAGE_STARTED, + }) + s.NoError(err) - longPollResp, err := env.FrontendClient().DescribeNexusOperationExecution(env.Context(), &workflowservice.DescribeNexusOperationExecutionRequest{ - Namespace: env.Namespace().String(), - OperationId: "test-op", - RunId: startResp.RunId, - LongPollToken: firstResp.GetLongPollToken(), + firstResp, err := env.FrontendClient().DescribeNexusOperationExecution(env.Context(), &workflowservice.DescribeNexusOperationExecutionRequest{ + Namespace: env.Namespace().String(), + OperationId: "test-op", + RunId: startResp.RunId, }) s.NoError(err) - protorequire.ProtoEqual(s.T(), &workflowservice.DescribeNexusOperationExecutionResponse{}, longPollResp) - }) + s.NotEmpty(firstResp.GetLongPollToken()) - s.Run("NoCallerDeadline", func(s *NexusStandaloneTestSuite) { - // Frontend still imposes its own deadline upstream, so the buffer must fit within that. - env.OverrideDynamicConfig(nexusoperation.LongPollBuffer, 29*time.Second) - env.OverrideDynamicConfig(nexusoperation.LongPollTimeout, 10*time.Millisecond) + tc.setup(env) + ctx, cancel := tc.callerCtx(env) - longPollResp, err := env.FrontendClient().DescribeNexusOperationExecution(context.Background(), &workflowservice.DescribeNexusOperationExecutionRequest{ + startTime := time.Now() + longPollResp, err := env.FrontendClient().DescribeNexusOperationExecution(ctx, &workflowservice.DescribeNexusOperationExecutionRequest{ Namespace: env.Namespace().String(), OperationId: "test-op", RunId: startResp.RunId, LongPollToken: firstResp.GetLongPollToken(), }) - s.NoError(err) - protorequire.ProtoEqual(s.T(), &workflowservice.DescribeNexusOperationExecutionResponse{}, longPollResp) - }) + cancel() + s.NoError(err, tc.name) + s.Less(time.Since(startTime), 5*time.Second, "%s took too long to timeout", tc.name) + s.True(proto.Equal(&workflowservice.DescribeNexusOperationExecutionResponse{}, longPollResp), tc.name) + } }) s.Run("IncludeOutcome_Success", func(s *NexusStandaloneTestSuite) { From 7c022fbdb22eea77fd1c7342e01b0a873b17e899 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 15 May 2026 14:55:32 -0700 Subject: [PATCH 44/73] add `await.Require` and `await.RequireTrue` (#9490) ## What changed? Introduces `common/testing/await`, a polling-based test helper that replaces testify's `Eventually` / `EventuallyWithT`. ## Why? From the [package doc](https://github.com/temporalio/temporal/blob/stephanos/all-require/common/testing/await/await.go): ``` Improvements over testify's eventually functions: - Misuse detection: accidentally using the real *testing.T (e.g. s.T() or suite assertion methods) instead of the callback's collect T is a common mistake. This package detects it and fails with a clear message. - Safer bool predicates: unlike testify's Eventually, [RequireTrue] only accepts func() bool, so returning false is the sole retry signal. If the predicate accidentally marks the real test failed, it reports that immediately instead of polling until timeout. - Timeout-aware callbacks: callbacks receive a context derived from the parent context and canceled when the await timeout or test deadline is reached, so RPCs and blocking waits can exit instead of continuing after the retry window has expired. - Panic propagation: if the condition panics (e.g. nil dereference), the panic is propagated immediately rather than being silently swallowed or retried until timeout. See https:github.com/stretchr/testify/issues/1810 - Bounded goroutine lifetime: each attempt completes before the next starts, avoiding the overlapping-attempt data races and "panic: Fail in goroutine after Test has completed" crashes seen with testify's Eventually. See https:github.com/stretchr/testify/issues/1611 - Deadlock detection: a condition that ignores t.Context() is abandoned after a grace period, producing a clear "does it honor t.Context()?" failure instead of hanging until go test -timeout. - Condition always runs: testify's Eventually can fail without ever running the condition due to a timer/ticker race with short timeouts. This package runs the condition immediately on the first iteration. See https://github.com/stretchr/testify/issues/1652 ``` The upstream fix ([testify#1657](https://github.com/stretchr/testify/pull/1657)) aims to address several of these but has been open since Oct 2024 without merging. ## How did you test it? - [ ] built - [ ] run locally and tested manually - [x] covered by existing tests - [x] added new unit test(s) - [ ] added new functional test(s) ## Potential risks The biggest downside is that it adds more code that we own. But apart from any unknown bugs, this package should rarely/never change. --- .github/.golangci.yml | 15 +- common/testing/await/doc.go | 39 ++ common/testing/await/report.go | 65 +++ common/testing/await/require_ctx.go | 282 +++++++++++ common/testing/await/require_ctx_test.go | 536 +++++++++++++++++++++ common/testing/await/require_true.go | 36 ++ common/testing/await/require_true_test.go | 133 +++++ common/testing/await/t.go | 77 +++ common/testing/await/t_test.go | 80 +++ common/testing/parallelsuite/suite.go | 91 +++- common/testing/parallelsuite/suite_test.go | 58 +++ common/testing/testcontext/context.go | 40 +- docs/development/testing.md | 39 ++ tests/premature_eos_test.go | 8 +- tests/query_workflow_test.go | 2 +- 15 files changed, 1455 insertions(+), 46 deletions(-) create mode 100644 common/testing/await/doc.go create mode 100644 common/testing/await/report.go create mode 100644 common/testing/await/require_ctx.go create mode 100644 common/testing/await/require_ctx_test.go create mode 100644 common/testing/await/require_true.go create mode 100644 common/testing/await/require_true_test.go create mode 100644 common/testing/await/t.go create mode 100644 common/testing/await/t_test.go diff --git a/.github/.golangci.yml b/.github/.golangci.yml index dec1fe6911c..46e9021bddf 100644 --- a/.github/.golangci.yml +++ b/.github/.golangci.yml @@ -33,7 +33,7 @@ linters: forbidigo: forbid: - pattern: time.Sleep - msg: "Please use require.Eventually or assert.Eventually instead unless you've no other option" + msg: "Please use await.Require / s.Await unless there's no better option" - pattern: "^panic$" msg: "Please avoid using panic in application code" - pattern: time\.Now @@ -48,8 +48,10 @@ linters: msg: "FunctionalTestBase is deprecated. Use testcore.NewEnv(t) instead. See docs/development/testing.md for details." - pattern: context\.Background\(\) msg: "Avoid context.Background() in tests; use t.Context() to respect test timeouts and cancellation" + - pattern: '(^|\.)(Eventually|Eventuallyf|EventuallyWithT|EventuallyWithTf)(\(|$)' + msg: "Use await.Require / s.Await for assertion conditions, or await.RequireTrue / s.AwaitTrue for bool predicates, instead of testify Eventually helpers" - pattern: 'assert\.\w+' - msg: "Use require.X / protorequire.X instead of assert.X / protoassert.X — assert doesn't stop the test on failure. assert.CollectT is still allowed for EventuallyWithT callbacks." + msg: "Use require.X / protorequire.X instead of assert.X / protoassert.X — assert doesn't stop the test on failure." depguard: rules: main: @@ -202,7 +204,14 @@ linters: text: "context.Background" linters: - forbidigo - - text: "use of `assert\\.CollectT`" # allowed for EventuallyWithT callbacks + # Existing legacy call sites are tracked separately; keep this PR scoped + # to preventing new usage while migrating touched tests. + - path: tests/(nexus_standalone|nexus_workflow|schedule|schedule_migration)_test\.go$ + text: "Eventually" + linters: + - forbidigo + - path: tests/(nexus_standalone|nexus_workflow)_test\.go$ + text: "assert\\.CollectT" linters: - forbidigo - text: "use of `softassert\\.\\w+`" diff --git a/common/testing/await/doc.go b/common/testing/await/doc.go new file mode 100644 index 00000000000..fa86bad7d5f --- /dev/null +++ b/common/testing/await/doc.go @@ -0,0 +1,39 @@ +// Package await provides polling-based test assertions as a replacement +// for testify's Eventually, EventuallyWithT, and their formatted variants. +// +// Improvements over testify's eventually functions: +// +// - Misuse detection: accidentally using the real *testing.T (e.g. s.T() or +// suite assertion methods) instead of the callback's collect T is a +// common mistake. This package detects it and fails with a clear message. +// +// - Safer bool predicates: unlike testify's Eventually, [RequireTrue] only +// accepts func() bool, so returning false is the sole retry signal. If the +// predicate accidentally marks the real test failed, it reports that +// immediately instead of polling until timeout. +// +// - Timeout-aware callbacks: callbacks receive a context derived from the +// parent context and canceled when the await timeout or test deadline is +// reached, so RPCs and blocking waits can exit instead of continuing after +// the retry window has expired. +// +// - Panic propagation: if the condition panics (e.g. nil dereference), the +// panic is propagated immediately rather than being silently swallowed +// or retried until timeout. +// See https://github.com/stretchr/testify/issues/1810 +// +// - Bounded goroutine lifetime: each attempt completes before the next +// starts, avoiding the overlapping-attempt data races and "panic: Fail +// in goroutine after Test has completed" crashes seen with testify's +// Eventually. +// See https://github.com/stretchr/testify/issues/1611 +// +// - Deadlock detection: a condition that ignores t.Context() is abandoned +// after a grace period, producing a clear "does it honor t.Context()?" +// failure instead of hanging until go test -timeout. +// +// - Condition always runs: testify's Eventually can fail without ever +// running the condition due to a timer/ticker race with short timeouts. +// This package runs the condition immediately on the first iteration. +// See https://github.com/stretchr/testify/issues/1652 +package await diff --git a/common/testing/await/report.go b/common/testing/await/report.go new file mode 100644 index 00000000000..094ebe73fd7 --- /dev/null +++ b/common/testing/await/report.go @@ -0,0 +1,65 @@ +package await + +import ( + "fmt" + "strings" + "testing" + "time" +) + +// reportAttemptErrors emits the collected attempt failures. When there are +// many, only the first and the last few are shown — long polls would +// otherwise produce hundreds of duplicate lines. +const ( + reportHeadAttempts = 1 + reportTailAttempts = 3 +) + +type attemptFailure struct { + attempt int + errors []string +} + +// reportTimeout reports the timeout failure plus collected attempt errors. +func reportTimeout(tb testing.TB, failures []attemptFailure, funcName, timeoutMsg string, effectiveTimeout time.Duration, polls int) { + reportAttemptErrors(tb, failures) + if timeoutMsg != "" { + tb.Fatalf("%s: %s (not satisfied after %v, %d polls)", funcName, timeoutMsg, effectiveTimeout, polls) + } else { + tb.Fatalf("%s: condition not satisfied after %v (%d polls)", funcName, effectiveTimeout, polls) + } +} + +func reportAttemptErrors(tb testing.TB, failures []attemptFailure) { + if len(failures) == 0 { + return + } + + var b strings.Builder + b.WriteString("attempt errors:") + if len(failures) <= reportHeadAttempts+reportTailAttempts { + for _, f := range failures { + writeAttemptFailure(&b, f) + } + } else { + for _, f := range failures[:reportHeadAttempts] { + writeAttemptFailure(&b, f) + } + omitted := len(failures) - reportHeadAttempts - reportTailAttempts + fmt.Fprintf(&b, "\n ... %d attempts omitted ...", omitted) + for _, f := range failures[len(failures)-reportTailAttempts:] { + writeAttemptFailure(&b, f) + } + } + tb.Errorf("%s", b.String()) +} + +func writeAttemptFailure(b *strings.Builder, f attemptFailure) { + fmt.Fprintf(b, "\n attempt %d:", f.attempt) + for _, e := range f.errors { + for line := range strings.SplitSeq(e, "\n") { + b.WriteString("\n ") + b.WriteString(line) + } + } +} diff --git a/common/testing/await/require_ctx.go b/common/testing/await/require_ctx.go new file mode 100644 index 00000000000..a830d551d27 --- /dev/null +++ b/common/testing/await/require_ctx.go @@ -0,0 +1,282 @@ +package await + +import ( + "context" + "fmt" + "os" + "testing" + "time" +) + +const requireMisuseHint = "use the *await.T passed to the callback, not s.T() or suite assertion methods" + +// softDeadlockTimeoutEnvVar overrides the default soft-deadlock timeout. +// Parsed as a Go duration, e.g. "10s". +const softDeadlockTimeoutEnvVar = "TEMPORAL_AWAIT_SOFT_DEADLOCK_TIMEOUT" + +// defaultSoftDeadlockTimeout caps how long a single attempt can run before its +// context is cancelled (soft deadlock). Capped further by the overall await +// deadline. Each new attempt gets a fresh context with this same cap. +const defaultSoftDeadlockTimeout = 30 * time.Second + +func softDeadlockTimeout() time.Duration { + if s := os.Getenv(softDeadlockTimeoutEnvVar); s != "" { + if d, err := time.ParseDuration(s); err == nil { + return d + } + } + return defaultSoftDeadlockTimeout +} + +// hardDeadlockTimeoutEnvVar overrides the default hard-deadlock timeout. +// Parsed as a Go duration, e.g. "100ms". +const hardDeadlockTimeoutEnvVar = "TEMPORAL_AWAIT_HARD_DEADLOCK_TIMEOUT" + +// defaultHardDeadlockTimeout is how long runAttempt waits AFTER cancelling the +// attempt context (soft deadlock) for the condition goroutine to honor the +// cancellation. If it doesn't terminate by then, the goroutine is declared +// hard-deadlocked and abandoned. Without it, a condition that ignores +// t.Context() would hang the test until go test -timeout fires. +const defaultHardDeadlockTimeout = 10 * time.Second + +func hardDeadlockTimeout() time.Duration { + if s := os.Getenv(hardDeadlockTimeoutEnvVar); s != "" { + if d, err := time.ParseDuration(s); err == nil { + return d + } + } + return defaultHardDeadlockTimeout +} + +// Require polls condition until it returns without assertion failures, or +// until ctx is canceled or timeout expires (whichever is earliest). +// +// Pass the *await.T to require.*/assert.* — failures cause a retry, not a +// test failure. Use t.Context() inside the callback to honor the timeout. +func Require(ctx context.Context, tb testing.TB, condition func(*T), timeout, pollInterval time.Duration) { + tb.Helper() + run(ctx, tb, condition, timeout, pollInterval, "", "Require", requireMisuseHint, true) +} + +// Requiref is like [Require] but adds a formatted message to the timeout +// failure. +func Requiref(ctx context.Context, tb testing.TB, condition func(*T), timeout, pollInterval time.Duration, msg string, args ...any) { + tb.Helper() + run(ctx, tb, condition, timeout, pollInterval, fmt.Sprintf(msg, args...), "Requiref", requireMisuseHint, true) +} + +func run( + parentCtx context.Context, + tb testing.TB, + condition func(*T), + timeout, + pollInterval time.Duration, + timeoutMsg string, + funcName string, + misuseHint string, + cancellable bool, +) { + tb.Helper() + + // Skip if the test already failed — no point polling. + if tb.Failed() { + tb.Logf("%s: skipping (test already failed)", funcName) + return + } + // Guard: context.WithDeadline panics on a nil parent. + if parentCtx == nil { + tb.Fatalf("%s: nil context", funcName) + return + } + + deadline := time.Now().Add(timeout) + + // Cap at the parent context's deadline if it's earlier than our timeout. + if parentDeadline, hasDeadline := parentCtx.Deadline(); hasDeadline && parentDeadline.Before(deadline) { + deadline = parentDeadline + } + + // Cap at the test's deadline if it's earlier than our deadline. + // Ideally, the parent context already accounts for the test's deadline - but we are being defensive. + if d, ok := tb.(interface{ Deadline() (time.Time, bool) }); ok { + if testDeadline, hasDeadline := d.Deadline(); hasDeadline && testDeadline.Before(deadline) { + deadline = testDeadline + } + } + + effectiveTimeout := max(0, time.Until(deadline)) + awaitCtx, awaitCancel := context.WithDeadline(parentCtx, deadline) + defer awaitCancel() + + var failures []attemptFailure + polls := 0 + + for { + // Parent context was canceled while we were sleeping (not our deadline). + if err := awaitCtx.Err(); err != nil && !deadlineReached(deadline) { + reportAttemptErrors(tb, failures) + tb.Fatalf("%s: context canceled before condition was satisfied: %v", funcName, err) + return + } + + polls++ + + // Fresh context per attempt, scoped to the run-level ctx. runAttempt + // owns the soft timeout and the corresponding cancel. + attemptCtx, attemptCancel := context.WithCancel(awaitCtx) + t := &T{tb: tb, ctx: attemptCtx} + + // Run attempt. + res := runAttempt(t, condition, attemptCancel, funcName, cancellable) + attemptCancel() + if res.panicVal != nil { + panic(res.panicVal) // propagate to caller + } + if res.deadlocked { + reportAttemptErrors(tb, failures) + if cancellable { + tb.Fatalf("%s: condition still running %v past context cancellation — does it honor t.Context()? (%d polls)", + funcName, hardDeadlockTimeout(), polls) + } else { + tb.Fatalf("%s: condition still running %v past deadline (%d polls)", + funcName, hardDeadlockTimeout(), polls) + } + return + } + if len(t.errors) > 0 { + failures = append(failures, attemptFailure{attempt: polls, errors: t.errors}) + } + + // Check misuse where the real test failed instead of just the attempt. + if tb.Failed() { + tb.Fatalf("%s: the test was marked failed directly — %s", funcName, misuseHint) + return + } + + // Parent context was canceled during the attempt (not our deadline). + if err := awaitCtx.Err(); err != nil && !deadlineReached(deadline) { + reportAttemptErrors(tb, failures) + tb.Fatalf("%s: context canceled before condition was satisfied: %v", funcName, err) + return + } + + // Our deadline expired. + if deadlineReached(deadline) { + reportTimeout(tb, failures, funcName, timeoutMsg, effectiveTimeout, polls) + return + } + + // Success: attempt completed without failures. + if !res.stopped && !t.Failed() { + return + } + + // Wait for pollInterval, or context is canceled or deadline is reached. + sleep(awaitCtx, deadline, pollInterval) + } +} + +// attemptResult describes how an attempt terminated. Exactly one of the +// following fields is set: +// - panicVal != nil: condition panicked with a non-attemptFailed value; +// caller should re-panic with panicVal. +// - deadlocked: condition did not honor context cancellation within +// [hardDeadlockTimeout]; the goroutine is abandoned and leaks until the +// process exits. +// - stopped: condition stopped via attemptFailed (FailNow on *T) or +// runtime.Goexit (real-test FailNow misuse). +// - none: condition returned normally. +type attemptResult struct { + panicVal any + stopped bool + deadlocked bool +} + +// runAttempt runs condition in a goroutine so that an accidental call to the +// real test's FailNow (runtime.Goexit) terminates only this goroutine. +// +// Termination is detected in two phases: +// - Soft (cancellable only): if condition hasn't returned within +// [softDeadlockTimeout], log a warning and cancel ctx. Skipped if the +// parent ctx was already cancelled. +// - Hard: if condition still hasn't returned within [hardDeadlockTimeout], +// declare it deadlocked and abandon the goroutine. +func runAttempt( + t *T, + condition func(*T), + cancel context.CancelFunc, + funcName string, + cancellable bool, +) attemptResult { + done := make(chan attemptResult, 1) + + go func() { + completed := false + defer func() { + if r := recover(); r != nil { + if _, ok := r.(attemptFailed); ok { + done <- attemptResult{stopped: true} + return + } + done <- attemptResult{panicVal: r} + return + } + // recover returned nil: either normal return (completed=true) or + // runtime.Goexit (completed=false; Goexit is not a panic). + done <- attemptResult{stopped: !completed} + }() + condition(t) + completed = true + }() + + if cancellable { + // Soft phase: wait for the condition, our soft timer, or parent cancel. + softTimer := time.NewTimer(softDeadlockTimeout()) + defer softTimer.Stop() + + select { + case r := <-done: + return r + case <-softTimer.C: + // Soft deadlock: log a warning. + t.tb.Logf("%s: soft deadlock — condition still running after %v; waiting %v before declaring hard deadlock", + funcName, softDeadlockTimeout(), hardDeadlockTimeout()) + + // Cancel so the condition can observe ctx.Done(). + cancel() + case <-t.ctx.Done(): + // Parent cancelled (await deadline reached or upstream cancel). + // Proceed to hard phase quietly. + } + } + + // Hard phase: wait for the condition or the hard timer. + hardTimer := time.NewTimer(hardDeadlockTimeout()) + defer hardTimer.Stop() + + select { + case r := <-done: + return r + case <-hardTimer.C: + return attemptResult{deadlocked: true} + } +} + +func sleep(ctx context.Context, deadline time.Time, pollInterval time.Duration) { + remaining := time.Until(deadline) + if remaining < pollInterval { + pollInterval = remaining + } + + timer := time.NewTimer(pollInterval) + defer timer.Stop() + + select { + case <-ctx.Done(): + case <-timer.C: + } +} + +func deadlineReached(deadline time.Time) bool { + return !time.Now().Before(deadline) +} diff --git a/common/testing/await/require_ctx_test.go b/common/testing/await/require_ctx_test.go new file mode 100644 index 00000000000..7b6a62a7e6f --- /dev/null +++ b/common/testing/await/require_ctx_test.go @@ -0,0 +1,536 @@ +package await_test + +import ( + "context" + "fmt" + "runtime" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.temporal.io/server/common/testing/await" + "go.temporal.io/server/common/testing/testcontext" +) + +func TestRequire_ImmediateSuccess(t *testing.T) { + t.Parallel() + + attempts := 0 + + await.Require(t.Context(), t, func(t *await.T) { + attempts++ + }, time.Second, 100*time.Millisecond) + + require.Equal(t, 1, attempts, "condition should be called exactly once") +} + +func TestRequire_RetriesUntilAttemptPasses(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + name string + fail func(*await.T, int32) + stops bool + }{ + { + name: "Errorf", + fail: func(t *await.T, attempt int32) { + t.Errorf("not ready: %d", attempt) + }, + }, + { + name: "FailNow", + stops: true, + fail: func(t *await.T, _ int32) { + t.FailNow() + }, + }, + { + name: "Fatal", + stops: true, + fail: func(t *await.T, _ int32) { + t.Fatal("not ready") + }, + }, + { + name: "Fatalf", + stops: true, + fail: func(t *await.T, attempt int32) { + t.Fatalf("not ready: %d", attempt) + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + var attempts atomic.Int32 + var continuedAfterFailure atomic.Bool + await.Require(t.Context(), t, func(t *await.T) { + attempt := attempts.Add(1) + if attempt < 3 { + tc.fail(t, attempt) + continuedAfterFailure.Store(true) + } + }, time.Second, 100*time.Millisecond) + + require.Equal(t, int32(3), attempts.Load()) + require.Equal(t, !tc.stops, continuedAfterFailure.Load()) + }) + } +} + +func TestRequire_PropagatesParentContextValues(t *testing.T) { + t.Parallel() + + type contextKey struct{} + ctx := context.WithValue(t.Context(), contextKey{}, "value") + + var got any + await.Require(ctx, t, func(t *await.T) { + got = t.Context().Value(contextKey{}) + }, time.Second, 100*time.Millisecond) + + require.Equal(t, "value", got) +} + +func TestRequire_SetsTimeoutContextDeadline(t *testing.T) { + t.Parallel() + + longCtx, cancel := context.WithTimeout(testcontext.New(t), time.Minute) + defer cancel() + longDeadline, ok := longCtx.Deadline() + require.True(t, ok) + + shortTimeout := 1 * time.Second + + var shortCtx context.Context + await.Require(longCtx, t, func(t *await.T) { + shortCtx = t.Context() + }, shortTimeout, 100*time.Millisecond) + + require.NotNil(t, shortCtx) + require.NotSame(t, longCtx, shortCtx) + + shortDeadline, ok := shortCtx.Deadline() + require.True(t, ok) + require.True(t, shortDeadline.Before(longDeadline)) + require.LessOrEqual(t, time.Until(shortDeadline), shortTimeout) + require.Greater(t, time.Until(shortDeadline), shortTimeout-200*time.Millisecond) +} + +func TestRequire_PollIntervalStartsAfterAttemptFinishes(t *testing.T) { + t.Parallel() + + var attempts atomic.Int32 + var attemptStarts []time.Time + var attemptEnds []time.Time + attemptDuration := 60 * time.Millisecond + pollInterval := 100 * time.Millisecond + + await.Require(t.Context(), t, func(t *await.T) { + attemptStarts = append(attemptStarts, time.Now()) + defer func() { attemptEnds = append(attemptEnds, time.Now()) }() + + time.Sleep(attemptDuration) //nolint:forbidigo // simulate attempt work to distinguish poll-after-start vs poll-after-end + + if attempts.Add(1) < 3 { + t.Error("not ready") + } + }, time.Second, pollInterval) + + require.Equal(t, int32(3), attempts.Load()) + require.Len(t, attemptStarts, 3) + require.Len(t, attemptEnds, 3) + for i := 1; i < len(attemptStarts); i++ { + gap := attemptStarts[i].Sub(attemptEnds[i-1]) + require.GreaterOrEqual(t, gap, pollInterval, + "poll interval should run after attempt finishes (gap=%v < %v)", gap, pollInterval) + } +} + +func TestRequire_FailureScenarios(t *testing.T) { + t.Parallel() + + t.Run("reports timeout", func(t *testing.T) { + t.Parallel() + + ctx := testcontext.New(t) + tb := newRecordingTB() + tb.run(func() { + await.Require(ctx, tb, func(t *await.T) { + t.Error("not ready") + }, time.Second, 100*time.Millisecond) + }) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "not satisfied after") + }) + + t.Run("cancels attempt context on timeout", func(t *testing.T) { + t.Parallel() + + ctx := testcontext.New(t) + tb := newRecordingTB() + tb.run(func() { + await.Require(ctx, tb, func(t *await.T) { + <-t.Context().Done() + if t.Context().Err() != context.DeadlineExceeded { + t.Errorf("context error = %v", t.Context().Err()) + } + }, 2*time.Second, time.Second) + }) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "not satisfied after") + }) + + t.Run("does not poll again after attempt consumes timeout", func(t *testing.T) { + t.Parallel() + + ctx := testcontext.New(t) + var attempts atomic.Int32 + + tb := newRecordingTB() + tb.run(func() { + await.Require(ctx, tb, func(t *await.T) { + attempts.Add(1) + <-t.Context().Done() // block until timeout + }, time.Second, 100*time.Millisecond) + }) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "not satisfied after") + require.Equal(t, int32(1), attempts.Load()) + }) + + t.Run("caps attempt context with parent deadline", func(t *testing.T) { + t.Parallel() + + parentCtx, cancel := context.WithTimeout(testcontext.New(t), time.Second) + defer cancel() + + tb := newRecordingTB() + tb.run(func() { + await.Require(parentCtx, tb, func(t *await.T) { + deadline, ok := t.Context().Deadline() + if !ok { + t.Error("missing deadline") + } + if time.Until(deadline) > time.Second { + t.Errorf("deadline = %v", deadline) + } + <-t.Context().Done() + if t.Context().Err() != context.DeadlineExceeded { + t.Errorf("context error = %v", t.Context().Err()) + } + }, 2*time.Second, time.Second) + }) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "not satisfied after") + }) + + t.Run("parent context cancellation stops polling", func(t *testing.T) { + t.Parallel() + + parentCtx, cancel := context.WithCancel(testcontext.New(t)) + defer cancel() + var attempts atomic.Int32 + + tb := newRecordingTB() + tb.run(func() { + await.Require(parentCtx, tb, func(t *await.T) { + attempts.Add(1) + t.Error("not ready") + cancel() + }, time.Second, 100*time.Millisecond) + }) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "context canceled before condition was satisfied") + + require.Equal(t, int32(1), attempts.Load(), "expected cancellation to stop polling") + }) + + t.Run("reports all attempt errors on timeout", func(t *testing.T) { + t.Parallel() + + ctx := testcontext.New(t) + var attempts atomic.Int32 + tb := newRecordingTB() + tb.run(func() { + await.Require(ctx, tb, func(t *await.T) { + if attempts.Add(1) == 1 { + t.Error("first attempt error") + return + } + <-t.Context().Done() + t.Error("last attempt error") + }, time.Second, 100*time.Millisecond) + }) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "not satisfied after") + require.Equal(t, "attempt errors:\n attempt 1:\n first attempt error\n attempt 2:\n last attempt error", tb.errors()) + require.Equal(t, int32(2), attempts.Load()) + }) + + t.Run("truncates middle attempts when many fail", func(t *testing.T) { + t.Parallel() + + ctx := testcontext.New(t) + var attempts atomic.Int32 + tb := newRecordingTB() + tb.run(func() { + await.Require(ctx, tb, func(t *await.T) { + n := attempts.Add(1) + t.Errorf("attempt %d failed", n) + }, 400*time.Millisecond, 50*time.Millisecond) + }) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "not satisfied after") + + n := attempts.Load() + require.Greater(t, n, int32(4), "need >4 attempts to exercise truncation") + + errs := tb.errors() + require.Contains(t, errs, "attempt errors:\n attempt 1:\n attempt 1 failed\n") + require.Contains(t, errs, fmt.Sprintf("... %d attempts omitted ...", n-4)) + // Last three attempts present in order. + for i := n - 2; i <= n; i++ { + require.Contains(t, errs, fmt.Sprintf("attempt %d:\n attempt %d failed", i, i)) + } + }) + + t.Run("Requiref includes message on timeout", func(t *testing.T) { + t.Parallel() + + ctx := testcontext.New(t) + tb := newRecordingTB() + tb.run(func() { + await.Requiref(ctx, tb, func(t *await.T) { + t.Error("not ready") + }, time.Second, 100*time.Millisecond, "workflow %s not ready", "wf-123") + }) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "workflow wf-123 not ready") + }) + + t.Run("panic propagates", func(t *testing.T) { + t.Parallel() + + require.PanicsWithValue(t, "unexpected nil pointer", func() { + await.Require(t.Context(), t, func(_ *await.T) { + panic("unexpected nil pointer") + }, time.Second, 100*time.Millisecond) + }) + }) + + t.Run("reports real TB misuse", func(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + name string + misuse func(*recordingTB) + }{ + {"Fatal stops real TB", func(tb *recordingTB) { tb.Fatal("wrong t used") }}, + {"Errorf marks real TB failed", func(tb *recordingTB) { tb.Errorf("assert-style misuse") }}, + } { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + ctx := testcontext.New(t) + tb := newRecordingTB() + tb.run(func() { + await.Require(ctx, tb, func(_ *await.T) { + tc.misuse(tb) + }, time.Second, 100*time.Millisecond) + }) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "use the *await.T") + }) + } + }) + + t.Run("does not poll after prior failure", func(t *testing.T) { + t.Parallel() + + ctx := testcontext.New(t) + conditionCalled := false + tb := newRecordingTB() + tb.run(func() { + tb.Errorf("previous failure") + await.Require(ctx, tb, func(_ *await.T) { + conditionCalled = true + }, time.Second, 100*time.Millisecond) + }) + require.True(t, tb.Failed()) + require.Empty(t, tb.fatals()) + require.False(t, conditionCalled, "condition should not run when test already failed") + }) +} + +func TestRequire_SoftDeadlockLogsAndCancels(t *testing.T) { + // not using T.Parallel() so it can use t.Setenv to override the deadlock timeouts + t.Setenv("TEMPORAL_AWAIT_SOFT_DEADLOCK_TIMEOUT", "50ms") + t.Setenv("TEMPORAL_AWAIT_HARD_DEADLOCK_TIMEOUT", "5s") + + const awaitTimeout = 10 * time.Second + + ctx := testcontext.New(t) + tb := newRecordingTB() + start := time.Now() + tb.run(func() { + // Await timeout is long so parent-cancel doesn't beat the soft timer. + await.Require(ctx, tb, func(t *await.T) { + <-t.Context().Done() // exits as soon as soft cancel fires + }, awaitTimeout, 100*time.Millisecond) + }) + elapsed := time.Since(start) + require.False(t, tb.Failed(), "soft deadlock + clean exit should succeed") + require.Contains(t, tb.logs(), "soft deadlock") + require.NotContains(t, tb.fatals(), "still running") + require.Less(t, elapsed, awaitTimeout, + "should return shortly after soft cancel, not wait the full await timeout (elapsed=%v)", elapsed) +} + +func TestRequire_DeadlockDetected(t *testing.T) { + // not using T.Parallel() so it can use t.Setenv to override the deadlock timeouts. + // Await timeout is long enough that the soft timer fires before parent cancellation, + // so the path is: soft fires → log + cancel → condition still running → hard fires. + t.Setenv("TEMPORAL_AWAIT_SOFT_DEADLOCK_TIMEOUT", "50ms") + t.Setenv("TEMPORAL_AWAIT_HARD_DEADLOCK_TIMEOUT", "100ms") + + const awaitTimeout = 10 * time.Second + + ctx := testcontext.New(t) + tb := newRecordingTB() + start := time.Now() + tb.run(func() { + await.Require(ctx, tb, func(*await.T) { + select {} // ignores t.Context() + }, awaitTimeout, 50*time.Millisecond) + }) + elapsed := time.Since(start) + require.True(t, tb.Failed()) + require.Contains(t, tb.logs(), "soft deadlock") + require.Contains(t, tb.fatals(), "still running") + require.Contains(t, tb.fatals(), "does it honor t.Context()") + require.Less(t, elapsed, awaitTimeout, + "should fail at hard deadlock, not wait the full await timeout (elapsed=%v)", elapsed) +} + +func TestRequire_WaitsForInFlightAttemptOnTimeout(t *testing.T) { + t.Parallel() + + var finished atomic.Bool + ctx := testcontext.New(t) + tb := newRecordingTB() + tb.run(func() { + await.Require(ctx, tb, func(t *await.T) { + <-t.Context().Done() + finished.Store(true) + }, time.Second, time.Second) + }) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "not satisfied after") + require.True(t, finished.Load(), "Require returned before the running attempt exited") +} + +// recordingTB is a minimal testing.TB implementation for testing failure scenarios. +type recordingTB struct { + testing.TB // embed for interface satisfaction + mu sync.Mutex + failed atomic.Bool + errorMessages []string + fatalMessages []string + logMessages []string + cleanups []func() +} + +func newRecordingTB() *recordingTB { + return &recordingTB{} +} + +func (r *recordingTB) Helper() {} +func (r *recordingTB) Failed() bool { return r.failed.Load() } +func (r *recordingTB) Logf(format string, args ...any) { + r.mu.Lock() + defer r.mu.Unlock() + r.logMessages = append(r.logMessages, fmt.Sprintf(format, args...)) +} +func (r *recordingTB) Context() context.Context { + return context.Background() +} + +func (r *recordingTB) Cleanup(fn func()) { + r.mu.Lock() + defer r.mu.Unlock() + r.cleanups = append(r.cleanups, fn) +} + +func (r *recordingTB) Errorf(format string, args ...any) { + r.mu.Lock() + defer r.mu.Unlock() + r.failed.Store(true) + r.errorMessages = append(r.errorMessages, fmt.Sprintf(format, args...)) +} + +func (r *recordingTB) Fatalf(format string, args ...any) { + r.mu.Lock() + r.failed.Store(true) + r.fatalMessages = append(r.fatalMessages, fmt.Sprintf(format, args...)) + r.mu.Unlock() + runtime.Goexit() +} + +func (r *recordingTB) Fatal(args ...any) { + r.mu.Lock() + r.failed.Store(true) + r.fatalMessages = append(r.fatalMessages, strings.TrimSuffix(fmt.Sprintln(args...), "\n")) + r.mu.Unlock() + runtime.Goexit() +} + +func (r *recordingTB) FailNow() { + r.failed.Store(true) + runtime.Goexit() +} + +func (r *recordingTB) run(fn func()) { + done := make(chan struct{}) + go func() { + defer func() { + r.runCleanups() + close(done) + }() + fn() + }() + <-done +} + +func (r *recordingTB) runCleanups() { + r.mu.Lock() + cleanups := r.cleanups + r.cleanups = nil + r.mu.Unlock() + + for i := len(cleanups) - 1; i >= 0; i-- { + cleanups[i]() + } +} + +func (r *recordingTB) fatals() string { + r.mu.Lock() + defer r.mu.Unlock() + return strings.Join(r.fatalMessages, "\n") +} + +func (r *recordingTB) errors() string { + r.mu.Lock() + defer r.mu.Unlock() + return strings.Join(r.errorMessages, "\n") +} + +func (r *recordingTB) logs() string { + r.mu.Lock() + defer r.mu.Unlock() + return strings.Join(r.logMessages, "\n") +} diff --git a/common/testing/await/require_true.go b/common/testing/await/require_true.go new file mode 100644 index 00000000000..48be79674f9 --- /dev/null +++ b/common/testing/await/require_true.go @@ -0,0 +1,36 @@ +package await + +import ( + "fmt" + "testing" + "time" + + "go.temporal.io/server/common/testing/testcontext" +) + +const requireTrueMisuseHint = "do not use test assertions inside the predicate - return false to retry or use await.Require for assertions" + +// RequireTrue runs `condition` repeatedly until it returns true, or until the +// timeout expires. The timeout is capped at the test's deadline, if one is set. +// +// Use [RequireTrue] for simple local predicates only. Do not use assertions or +// side effects in the predicate - use [Require] for these. +func RequireTrue(tb testing.TB, condition func() bool, timeout, pollInterval time.Duration) { + tb.Helper() + run(testcontext.New(tb), tb, func(t *T) { + if !condition() { + t.Fail() + } + }, timeout, pollInterval, "", "RequireTrue", requireTrueMisuseHint, false) +} + +// RequireTruef is like [RequireTrue] but accepts a format string that is included +// in the failure message when the condition is not satisfied before the timeout. +func RequireTruef(tb testing.TB, condition func() bool, timeout, pollInterval time.Duration, msg string, args ...any) { + tb.Helper() + run(testcontext.New(tb), tb, func(t *T) { + if !condition() { + t.Fail() + } + }, timeout, pollInterval, fmt.Sprintf(msg, args...), "RequireTruef", requireTrueMisuseHint, false) +} diff --git a/common/testing/await/require_true_test.go b/common/testing/await/require_true_test.go new file mode 100644 index 00000000000..0a749d549b6 --- /dev/null +++ b/common/testing/await/require_true_test.go @@ -0,0 +1,133 @@ +package await_test + +import ( + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.temporal.io/server/common/testing/await" +) + +// RequireTrue is a thin bool-predicate adapter over the same polling runner +// covered by require_ctx_test.go, so these tests focus on adapter behavior. + +func TestRequireTrue_ImmediateSuccess(t *testing.T) { + t.Parallel() + + attempts := 0 + + await.RequireTrue(t, func() bool { + attempts++ + return true + }, time.Second, 100*time.Millisecond) + + require.Equal(t, 1, attempts, "condition should be called exactly once") +} + +func TestRequireTrue_RetriesFalseUntilTrue(t *testing.T) { + t.Parallel() + + var attempts atomic.Int32 + + await.RequireTrue(t, func() bool { + return attempts.Add(1) >= 3 + }, time.Second, 100*time.Millisecond) + + require.Equal(t, int32(3), attempts.Load()) +} + +func TestRequireTrue_FailureScenarios(t *testing.T) { + t.Parallel() + + t.Run("reports timeout", func(t *testing.T) { + t.Parallel() + + tb := newRecordingTB() + tb.run(func() { + await.RequireTrue(tb, func() bool { + return false + }, time.Second, 100*time.Millisecond) + }) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "not satisfied after") + }) + + t.Run("RequireTruef includes message on timeout", func(t *testing.T) { + t.Parallel() + + tb := newRecordingTB() + tb.run(func() { + await.RequireTruef(tb, func() bool { + return false + }, time.Second, 100*time.Millisecond, "workflow %s not ready", "wf-123") + }) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "workflow wf-123 not ready") + }) + + t.Run("reports real TB misuse", func(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + name string + misuse func(*recordingTB) + }{ + {"Fatal stops real TB", func(tb *recordingTB) { tb.Fatal("wrong t used") }}, + {"Errorf marks real TB failed", func(tb *recordingTB) { tb.Errorf("assert-style misuse") }}, + } { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + tb := newRecordingTB() + tb.run(func() { + await.RequireTrue(tb, func() bool { + tc.misuse(tb) + return true + }, time.Second, 100*time.Millisecond) + }) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "do not use test assertions") + }) + } + }) + + t.Run("does not poll after prior failure", func(t *testing.T) { + t.Parallel() + + conditionCalled := false + + tb := newRecordingTB() + tb.run(func() { + tb.Errorf("previous failure") + await.RequireTrue(tb, func() bool { + conditionCalled = true + return true + }, time.Second, 100*time.Millisecond) + }) + require.True(t, tb.Failed()) + require.Empty(t, tb.fatals()) + require.False(t, conditionCalled, "condition should not run when test already failed") + }) +} + +func TestRequireTrue_DeadlockDetected(t *testing.T) { + // not using T.Parallel() so it can use t.Setenv to override the deadlock timeouts + t.Setenv("TEMPORAL_AWAIT_HARD_DEADLOCK_TIMEOUT", "100ms") + + const awaitTimeout = 10 * time.Second + + tb := newRecordingTB() + start := time.Now() + tb.run(func() { + await.RequireTrue(tb, func() bool { + select {} // never returns; predicate has no way to honor cancellation + }, awaitTimeout, 50*time.Millisecond) + }) + elapsed := time.Since(start) + require.True(t, tb.Failed()) + require.Contains(t, tb.fatals(), "still running") + require.Contains(t, tb.fatals(), "past deadline") + require.Less(t, elapsed, awaitTimeout, + "should fail at hard deadlock, not wait the full await timeout (elapsed=%v)", elapsed) +} diff --git a/common/testing/await/t.go b/common/testing/await/t.go new file mode 100644 index 00000000000..c8f66196e7a --- /dev/null +++ b/common/testing/await/t.go @@ -0,0 +1,77 @@ +package await + +import ( + "context" + "fmt" + "strings" + "testing" +) + +type attemptFailed struct{} + +// T is passed to the condition callback. It intercepts assertion failures +// so the polling loop can retry. +// +// Only use T for assertions (require.*, assert.*, t.Errorf, t.Fatal, t.FailNow). +type T struct { + tb testing.TB + ctx context.Context + errors []string + failed bool +} + +// Context returns the await-scoped context for the current attempt. +func (t *T) Context() context.Context { + if t.ctx != nil { + return t.ctx + } + return t.tb.Context() +} + +// Fail marks the current attempt as failed without stopping it. +func (t *T) Fail() { + t.failed = true +} + +// Error records an error message for reporting on timeout. +func (t *T) Error(args ...any) { + t.Fail() + t.errors = append(t.errors, strings.TrimSuffix(fmt.Sprintln(args...), "\n")) +} + +// Errorf records an error message for reporting on timeout. +func (t *T) Errorf(format string, args ...any) { + t.Fail() + t.errors = append(t.errors, fmt.Sprintf(format, args...)) +} + +// FailNow is called by require.* on failure. It stops the current attempt. +// Unlike testing.TB.FailNow(), this does NOT mark the test as failed. +func (t *T) FailNow() { + t.Fail() + panic(attemptFailed{}) +} + +// Fatal records an error message and stops this attempt. +func (t *T) Fatal(args ...any) { + t.errors = append(t.errors, strings.TrimSuffix(fmt.Sprintln(args...), "\n")) + t.FailNow() +} + +// Fatalf records an error message and stops this attempt. +func (t *T) Fatalf(format string, args ...any) { + t.Errorf(format, args...) + t.FailNow() +} + +// Failed reports whether this attempt has failed. +func (t *T) Failed() bool { + return t.failed +} + +// Helper marks the calling function as a test helper. +func (t *T) Helper() { + if t.tb != nil { + t.tb.Helper() + } +} diff --git a/common/testing/await/t_test.go b/common/testing/await/t_test.go new file mode 100644 index 00000000000..31b51ae5767 --- /dev/null +++ b/common/testing/await/t_test.go @@ -0,0 +1,80 @@ +package await_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.temporal.io/server/common/testing/await" +) + +func TestT_CollectsAssertionFailures(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + name string + fail func(*await.T) + stops bool + }{ + { + name: "assert", + fail: func(t *await.T) { + assert.Equal(t, "expected", "actual") //nolint:forbidigo // intentionally testing that assert.* works with *await.T + }, + }, + { + name: "Errorf", + fail: func(t *await.T) { + t.Error("not ready") + }, + }, + { + name: "FailNow", + fail: func(t *await.T) { + t.FailNow() + }, + stops: true, + }, + { + name: "Fatal", + fail: func(t *await.T) { + t.Fatal("not ready") + }, + stops: true, + }, + { + name: "Fatalf", + fail: func(t *await.T) { + t.Fatalf("not ready: %d", 1) + }, + stops: true, + }, + { + name: "require", + fail: func(t *await.T) { + require.Equal(t, "expected", "actual") + }, + stops: true, + }, + } { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + at := &await.T{} + continuedAfterFailure := false + run := func() { + tc.fail(at) + continuedAfterFailure = true + } + + if tc.stops { + require.Panics(t, run) + require.False(t, continuedAfterFailure) + } else { + require.NotPanics(t, run) + require.True(t, continuedAfterFailure) + } + require.True(t, at.Failed()) + }) + } +} diff --git a/common/testing/parallelsuite/suite.go b/common/testing/parallelsuite/suite.go index 91937617dab..cf84dcb2141 100644 --- a/common/testing/parallelsuite/suite.go +++ b/common/testing/parallelsuite/suite.go @@ -1,24 +1,31 @@ package parallelsuite import ( + "context" "flag" "fmt" "reflect" "regexp" "strings" + "sync" "testing" + "time" "github.com/stretchr/testify/require" testifysuite "github.com/stretchr/testify/suite" + "go.temporal.io/server/common/testing/await" "go.temporal.io/server/common/testing/historyrequire" "go.temporal.io/server/common/testing/protorequire" + "go.temporal.io/server/common/testing/testcontext" ) // testingSuite is the constraint for suite types. type testingSuite interface { testifysuite.TestingSuite - copySuite(t *testing.T) testingSuite - initSuite(t *testing.T) + //nolint:revive // ctx is last so callers can pass nil to mean "no override"; SA1012 forbids passing nil as the first ctx arg. + copySuite(t *testing.T, assertT require.TestingT, ctx context.Context) testingSuite + //nolint:revive // see copySuite above. + initSuite(t *testing.T, assertT require.TestingT, ctx context.Context) } // Suite provides parallel test execution with require-style (fail-fast) assertions. @@ -31,24 +38,34 @@ type Suite[T testingSuite] struct { protorequire.ProtoAssertions historyrequire.HistoryRequire - guardT guardT + guardT guardT + ctx context.Context // override set in initSuite; lazy-filled by Context() under ctxOnce when nil + ctxOnce sync.Once } // copySuite creates a fresh suite instance initialized for the given *testing.T. -func (s *Suite[T]) copySuite(t *testing.T) testingSuite { +// assertT overrides which TestingT assertions are bound to; nil means use the copy's own guardT. +// ctx overrides the suite's context; nil means use the default (lazy testcontext.New). +// +//nolint:revive // ctx is last so callers can pass nil to mean "no override"; SA1012 forbids passing nil as the first ctx arg. +func (s *Suite[T]) copySuite(t *testing.T, assertT require.TestingT, ctx context.Context) testingSuite { cp := reflect.New(reflect.TypeFor[T]().Elem()).Interface().(T) - cp.initSuite(t) + cp.initSuite(t, assertT, ctx) return cp } -func (s *Suite[T]) initSuite(t *testing.T) { +//nolint:revive // see copySuite above. +func (s *Suite[T]) initSuite(t *testing.T, assertT require.TestingT, ctx context.Context) { g := &s.guardT g.name = t.Name() g.T = t - g.hasSubtests.Store(false) - s.Assertions = require.New(g) - s.ProtoAssertions = protorequire.New(g) - s.HistoryRequire = historyrequire.New(g) + s.ctx = ctx + if assertT == nil { + assertT = g + } + s.Assertions = require.New(assertT) + s.ProtoAssertions = protorequire.New(assertT) + s.HistoryRequire = historyrequire.New(assertT) } // T returns the *testing.T, panicking if the guard has been sealed. @@ -59,6 +76,17 @@ func (s *Suite[T]) T() *testing.T { return s.guardT.T } +// Context returns the test-scoped context (created from [testcontext]). +// Inside an [Await] callback, it returns the await-scoped context. +func (s *Suite[T]) Context() context.Context { + s.ctxOnce.Do(func() { + if s.ctx == nil { + s.ctx = testcontext.New(s.T()) + } + }) + return s.ctx +} + // Run creates a parallel subtest. The callback receives a fresh copy of the // concrete suite type, initialized for the subtest's *testing.T. func (s *Suite[T]) Run(name string, fn func(T)) bool { @@ -66,10 +94,35 @@ func (s *Suite[T]) Run(name string, fn func(T)) bool { s.guardT.markHasSubtests() return pt.Run(name, func(t *testing.T) { t.Parallel() //nolint:testifylint // parallelsuite intentionally supports parallel subtests - fn(s.copySuite(t).(T)) + fn(s.copySuite(t, nil, nil).(T)) }) } +// Await calls fn repeatedly until all assertions pass or timeout is reached. +func (s *Suite[T]) Await(fn func(T), timeout, interval time.Duration) { + s.Awaitf(fn, timeout, interval, "") +} + +// Awaitf is like [Await] but includes a format string appended to the failure message. +func (s *Suite[T]) Awaitf(fn func(T), timeout, interval time.Duration, msg string, args ...any) { + t := s.T() + await.Requiref(s.Context(), t, func(at *await.T) { + fn(s.copySuite(t, at, at.Context()).(T)) + }, timeout, interval, msg, args...) +} + +// AwaitTrue calls fn repeatedly until it returns true or timeout is reached. +// +// Use it for simple local predicates only. Do not use assertions or side effects; use [Await] instead. +func (s *Suite[T]) AwaitTrue(fn func() bool, timeout, interval time.Duration) { + s.AwaitTruef(fn, timeout, interval, "") +} + +// AwaitTruef is like [AwaitTrue] but includes a format string appended to the failure message. +func (s *Suite[T]) AwaitTruef(fn func() bool, timeout, interval time.Duration, msg string, args ...any) { + await.RequireTruef(s.T(), fn, timeout, interval, msg, args...) +} + // Run discovers and runs all exported Test* methods on the given suite in parallel. // // Each method gets its own fresh suite instance initialized for the subtest's @@ -81,7 +134,7 @@ func Run[T testingSuite](t *testing.T, s T, args ...any) { t.Helper() typ := reflect.TypeFor[T]() - if typ.Kind() != reflect.Ptr || typ.Elem().Kind() != reflect.Struct { + if typ.Kind() != reflect.Pointer || typ.Elem().Kind() != reflect.Struct { panic(fmt.Sprintf("parallelsuite.Run: suite must be a pointer to a struct, got %v", typ)) } structType := typ.Elem() @@ -109,7 +162,7 @@ func Run[T testingSuite](t *testing.T, s T, args ...any) { t.Run(method.Name, func(t *testing.T) { t.Parallel() - cpS := s.copySuite(t) + cpS := s.copySuite(t, nil, nil) callArgs := append([]reflect.Value{reflect.ValueOf(cpS)}, argVals...) method.Func.Call(callArgs) }) @@ -122,8 +175,8 @@ func init() { type ds struct{ Suite[*ds] } ptrType := reflect.TypeFor[*ds]() inheritedMethods = make(map[string]bool, ptrType.NumMethod()) - for i := range ptrType.NumMethod() { - inheritedMethods[ptrType.Method(i).Name] = true + for method := range ptrType.Methods() { + inheritedMethods[method.Name] = true } } @@ -178,8 +231,8 @@ func applyTestifyMFilter(methods []reflect.Method) []reflect.Method { func discoverTestMethods(ptrType, structType reflect.Type, args []any) []reflect.Method { expectedNumIn := 1 + len(args) - for i := range ptrType.NumMethod() { - name := ptrType.Method(i).Name + for method := range ptrType.Methods() { + name := method.Name if !strings.HasPrefix(name, "Test") && !inheritedMethods[name] { panic(fmt.Sprintf( "parallelsuite.Run: suite %s has exported method %s that does not start with Test; "+ @@ -190,8 +243,8 @@ func discoverTestMethods(ptrType, structType reflect.Type, args []any) []reflect } var methods []reflect.Method - for i := range ptrType.NumMethod() { - method := ptrType.Method(i) + for method := range ptrType.Methods() { + method := method if !strings.HasPrefix(method.Name, "Test") { continue } diff --git a/common/testing/parallelsuite/suite_test.go b/common/testing/parallelsuite/suite_test.go index f13a6ce8b8a..ebf49e66a52 100644 --- a/common/testing/parallelsuite/suite_test.go +++ b/common/testing/parallelsuite/suite_test.go @@ -1,11 +1,15 @@ package parallelsuite import ( + "context" "flag" "reflect" + "sync/atomic" "testing" + "time" "github.com/stretchr/testify/require" + "go.temporal.io/server/common/testing/testcontext" ) type validSuite struct{ Suite[*validSuite] } @@ -48,6 +52,54 @@ type setupTestSuite struct{ Suite[*setupTestSuite] } func (s *setupTestSuite) TestA() {} func (s *setupTestSuite) SetupTest() {} //nolint:unused +type awaitTrueSuite struct{ Suite[*awaitTrueSuite] } + +func (s *awaitTrueSuite) TestAwaitTrue() { + var attempts atomic.Int32 + s.AwaitTrue(func() bool { + attempts.Add(1) + return true + }, time.Second, time.Millisecond) + s.Equal(int32(1), attempts.Load()) +} + +func (s *awaitTrueSuite) TestAwaitTrueFalseRetry() { + var attempts atomic.Int32 + s.AwaitTrue(func() bool { + return attempts.Add(1) == 2 + }, time.Second, time.Millisecond) + s.Equal(int32(2), attempts.Load()) +} + +func (s *awaitTrueSuite) TestAwaitTruef() { + s.AwaitTruef(func() bool { + return true + }, time.Second, time.Millisecond, "condition should pass") +} + +type contextSuite struct{ Suite[*contextSuite] } + +func (s *contextSuite) TestContextHasDeadline() { + deadline, ok := s.Context().Deadline() + s.True(ok) + s.Positive(time.Until(deadline)) +} + +func (s *contextSuite) TestAwaitUsesSuiteContext() { + type key struct{} + + testcontext.New(s.T(), testcontext.WithContextDecorator(key{}, func(ctx context.Context) context.Context { + return context.WithValue(ctx, key{}, "decorated") + })) + + s.Await(func(s *contextSuite) { + s.Equal("decorated", s.Context().Value(key{})) + deadline, ok := s.Context().Deadline() + s.True(ok) + s.Less(time.Until(deadline), 200*time.Millisecond) + }, 100*time.Millisecond, time.Millisecond) +} + type sealAfterRunSuite struct{ Suite[*sealAfterRunSuite] } func (s *sealAfterRunSuite) TestAssertionAfterRun() { @@ -72,6 +124,12 @@ func TestRun_AcceptsSuite(t *testing.T) { t.Run("with args", func(t *testing.T) { require.NotPanics(t, func() { Run(t, &validWithArgsSuite{}, "hello", 42) }) }) + t.Run("await true", func(t *testing.T) { + require.NotPanics(t, func() { Run(t, &awaitTrueSuite{}) }) + }) + t.Run("context", func(t *testing.T) { + require.NotPanics(t, func() { Run(t, &contextSuite{}) }) + }) } func TestRun_RejectsSuite(t *testing.T) { diff --git a/common/testing/testcontext/context.go b/common/testing/testcontext/context.go index 4b881fd7b22..96d3d30e176 100644 --- a/common/testing/testcontext/context.go +++ b/common/testing/testcontext/context.go @@ -14,13 +14,13 @@ const defaultTimeout = 90 * time.Second type contextStore struct { sync.Mutex - byTest map[*testing.T]*contextState + byTest map[testing.TB]*contextState } // testContexts is process-global so repeated helpers in the same test share // one context and one cleanup. var testContexts = contextStore{ - byTest: make(map[*testing.T]*contextState), + byTest: make(map[testing.TB]*contextState), } type config struct { @@ -34,22 +34,22 @@ type contextDecorator struct { decorate func(context.Context) context.Context } -// New returns the test-scoped context for t. The context is canceled when the +// New returns the test-scoped context for tb. The context is canceled when the // test ends or when the configured test timeout expires. // // The first call creates the per-test context and fixes its timeout. Later calls // may add decorators, but an explicit different timeout fails instead of being // silently ignored. -func New(t *testing.T, opts ...Option) context.Context { - t.Helper() +func New(tb testing.TB, opts ...Option) context.Context { + tb.Helper() cfg := config{timeout: effectiveTimeout(0)} for _, opt := range opts { opt(&cfg) } - st := getContextState(t, cfg.timeout) - st.configure(t, cfg) + st := getContextState(tb, cfg.timeout) + st.configure(tb, cfg) return st.context() } @@ -86,55 +86,55 @@ type contextState struct { decorators map[any]struct{} } -func getContextState(t *testing.T, timeout time.Duration) *contextState { - t.Helper() +func getContextState(tb testing.TB, timeout time.Duration) *contextState { + tb.Helper() testContexts.Lock() defer testContexts.Unlock() - if st, ok := testContexts.byTest[t]; ok { + if st, ok := testContexts.byTest[tb]; ok { return st } - ctx, cancel := context.WithTimeout(t.Context(), timeout) + ctx, cancel := context.WithTimeout(tb.Context(), timeout) st := &contextState{ ctx: ctx, cancel: cancel, timeout: timeout, decorators: make(map[any]struct{}), } - testContexts.byTest[t] = st + testContexts.byTest[tb] = st - t.Cleanup(func() { + tb.Cleanup(func() { st.cancel() testContexts.Lock() - delete(testContexts.byTest, t) + delete(testContexts.byTest, tb) testContexts.Unlock() if st.err() == context.DeadlineExceeded { - t.Errorf("Test exceeded timeout of %v", st.timeout) + tb.Errorf("Test exceeded timeout of %v", st.timeout) } }) return st } -func (s *contextState) configure(t *testing.T, cfg config) { - t.Helper() +func (s *contextState) configure(tb testing.TB, cfg config) { + tb.Helper() s.mu.Lock() defer s.mu.Unlock() if cfg.timeoutSet && cfg.timeout != s.timeout { - t.Fatalf("testcontext: test context already exists with timeout %v; cannot change it to %v", s.timeout, cfg.timeout) + tb.Fatalf("testcontext: test context already exists with timeout %v; cannot change it to %v", s.timeout, cfg.timeout) } // Decorators may be registered by independent helpers, so apply each keyed // decorator at most once while preserving call order. for _, decorator := range cfg.decorators { if decorator.key == nil { - t.Fatal("testcontext: context decorator key must not be nil") + tb.Fatal("testcontext: context decorator key must not be nil") } if decorator.decorate == nil { - t.Fatal("testcontext: context decorator must not be nil") + tb.Fatal("testcontext: context decorator must not be nil") } if _, ok := s.decorators[decorator.key]; ok { continue diff --git a/docs/development/testing.md b/docs/development/testing.md index 7e41158db7f..1c7734343d7 100644 --- a/docs/development/testing.md +++ b/docs/development/testing.md @@ -41,6 +41,33 @@ Always use `require.X` (and `protorequire.X`) instead of `assert.X` (and `protoa `assert` records a failure but lets the test continue, which often leads to confusing cascading errors. +### Polling with await.Require + +For polling/retry loops in tests, use `await.Require` (or `await.Requiref`) +from `common/testing/await` instead of testify's `EventuallyWithT`. + +Use `t.Context()` inside the callback for a context derived from the parent +context and canceled when the parent context is canceled or the await timeout +expires. + +```go +await.Require(ctx, t, func(t *await.T) { + resp, err := client.GetStatus(t.Context()) + require.NoError(t, err) + require.Equal(t, "ready", resp.Status) +}, 5*time.Second, 200*time.Millisecond) +``` + +Use `RequireTrue` instead of testify's `Eventually` for simple local bool-returning predicates. + +```go +await.RequireTrue(t, func() bool { + return cache.Ready() +}, 5*time.Second, 200*time.Millisecond) +``` + +`RequireTrue` is the wrong tool when dealing with errors or assertions; use `Require` instead. + ### Parallelization All tests (and subtests!) should use `t.Parallel()` to be run concurrently; @@ -60,6 +87,18 @@ and provides assertion helpers and safety mechanisms. It replaces all use of `testify`'s `Suite`. +#### Await shorthand + +```go +s.Await(func(s *MySuite) { + resp, err := client.GetStatus(s.Context()) + s.NoError(err) + s.Equal("ready", resp.Status) +}, 5*time.Second, 200*time.Millisecond) +``` + +Inside an `s.Await` callback, `s.Context()` is capped to that await's timeout. + ### testvars package Instead of creating identifiers like task queue name, namespace or worker identity by hand, diff --git a/tests/premature_eos_test.go b/tests/premature_eos_test.go index ee8258c9294..b58f290273d 100644 --- a/tests/premature_eos_test.go +++ b/tests/premature_eos_test.go @@ -92,13 +92,15 @@ func (s *PrematureEosTestSuite) Test_SpeculativeWFTEventsLostAfterSignalMidHisto // Without this wait there is a race: if the update hasn't been processed yet, the signal // would only add event 8 (SignalReceived) with freshNextEventId=9, producing 8 events // instead of the expected 9 and causing a false test failure. - s.Eventually(func() bool { - desc, descErr := env.FrontendClient().DescribeWorkflowExecution(testcore.NewContext(), + s.Awaitf(func(s *PrematureEosTestSuite) { + desc, descErr := env.FrontendClient().DescribeWorkflowExecution(s.Context(), &workflowservice.DescribeWorkflowExecutionRequest{ Namespace: env.Namespace().String(), Execution: wfExecution, }) - return descErr == nil && desc.GetPendingWorkflowTask() != nil + s.NoError(descErr) + s.NotNil(desc) + s.NotNil(desc.GetPendingWorkflowTask()) }, 5*time.Second, 250*time.Millisecond, "speculative WFT should be scheduled after sending update") // Fetch page 1 via GetWorkflowExecutionHistory — mimicking what the SDK does when a diff --git a/tests/query_workflow_test.go b/tests/query_workflow_test.go index 76373b52b94..35f28662ee8 100644 --- a/tests/query_workflow_test.go +++ b/tests/query_workflow_test.go @@ -305,7 +305,7 @@ func (s *QueryWorkflowSuite) TestQueryWorkflow_QueryFailedWorkflowTask() { s.NotNil(workflowRun) s.NotEmpty(workflowRun.GetRunID()) - s.Eventually(func() bool { + s.AwaitTrue(func() bool { // wait for workflow task to fail 3 times return atomic.LoadInt32(&failures) >= 3 }, 10*time.Second, 50*time.Millisecond) From 659013e6cf617314a06730fdbd5bfb99190b09f6 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Fri, 15 May 2026 15:20:41 -0700 Subject: [PATCH 45/73] Strongly consistent Nexus endpoints lookup via dynamic config (#10208) ## What changed? Instead of using the cache for Nexus endpoint name lookups, always look it up via RPC when `RefreshNexusEndpointsOnRead` is set. See https://github.com/temporalio/temporal/pull/10204 for an alternative approach. ## Why? During testing - such as functional server tests or SDK tests against the CLI - the Nexus endpoint is not always immediately available after creation. This creates friction as it requires extra retries and causes flakiness if not guarded against. This change eliminates that need. ## How did you test it? - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [x] added new unit test(s) - [ ] added new functional test(s) --- common/dynamicconfig/constants.go | 8 ++++ common/nexus/endpoint_registry.go | 20 +++++++++ common/nexus/endpoint_registry_test.go | 47 ++++++++++++++++++++++ tests/nexus_test_base.go | 23 ----------- tests/testcore/dynamic_config_overrides.go | 1 + 5 files changed, 76 insertions(+), 23 deletions(-) diff --git a/common/dynamicconfig/constants.go b/common/dynamicconfig/constants.go index 2b02cc27114..30bc93c04a2 100644 --- a/common/dynamicconfig/constants.go +++ b/common/dynamicconfig/constants.go @@ -960,6 +960,14 @@ and deployment interaction in matching and history.`, 1*time.Second, `RefreshNexusEndpointsMinWait is the minimum wait time between background long poll requests to update Nexus endpoints.`, ) + ForceNexusEndpointRefreshOnRead = NewGlobalBoolSetting( + "system.forceNexusEndpointRefreshOnRead", + false, + `ForceNexusEndpointRefreshOnRead forces the Nexus endpoint registry to refresh from matching service on read. +This effectively bypasses the cache so that endpoint writes are visible to readers immediately, instead of after the +next background long-poll refresh. This should not be turned on in production, as it would introduce scalability +and reliability problems.`, + ) NexusReadThroughCacheSize = NewGlobalIntSetting( "system.nexusReadThroughCacheSize", 100, diff --git a/common/nexus/endpoint_registry.go b/common/nexus/endpoint_registry.go index c16cc9590cc..d15aed35099 100644 --- a/common/nexus/endpoint_registry.go +++ b/common/nexus/endpoint_registry.go @@ -3,6 +3,7 @@ package nexus import ( "context" "errors" + "fmt" "sync" "sync/atomic" "time" @@ -30,6 +31,7 @@ type ( refreshPageSize dynamicconfig.IntPropertyFn refreshMinWait dynamicconfig.DurationPropertyFn refreshRetryPolicy backoff.RetryPolicy + refreshOnRead dynamicconfig.BoolPropertyFn readThroughCacheSize dynamicconfig.IntPropertyFn readThroughCacheTTL dynamicconfig.DurationPropertyFn } @@ -78,6 +80,7 @@ func NewEndpointRegistryConfig(dc *dynamicconfig.Collection) *EndpointRegistryCo refreshMinWait: dynamicconfig.RefreshNexusEndpointsMinWait.Get(dc), readThroughCacheSize: dynamicconfig.NexusReadThroughCacheSize.Get(dc), readThroughCacheTTL: dynamicconfig.NexusReadThroughCacheTTL.Get(dc), + refreshOnRead: dynamicconfig.ForceNexusEndpointRefreshOnRead.Get(dc), } config.refreshRetryPolicy = backoff.NewExponentialRetryPolicy(config.refreshMinWait()).WithMaximumInterval(config.refreshLongPollTimeout()) return config @@ -148,6 +151,15 @@ func (r *EndpointRegistryImpl) GetByName(ctx context.Context, _ namespace.ID, en if err := r.waitUntilInitialized(ctx); err != nil { return nil, err } + + if r.config.refreshOnRead() { + // This is useful for test and single-node deployments that need endpoint writes + // to be visible to GetByName immediately, without waiting for background long poll. + if err := r.loadEndpoints(ctx); err != nil { + return nil, fmt.Errorf("refreshing endpoints: %w", err) + } + } + r.dataLock.RLock() endpoint, ok := r.endpointsByName[endpointName] r.dataLock.RUnlock() @@ -163,6 +175,14 @@ func (r *EndpointRegistryImpl) GetByID(ctx context.Context, id string) (*persist return nil, err } + if r.config.refreshOnRead() { + // This is useful for test and single-node deployments that need endpoint writes + // to be visible to GetByID immediately, without waiting for background long poll. + if err := r.loadEndpoints(ctx); err != nil { + return nil, fmt.Errorf("refreshing endpoints: %w", err) + } + } + r.dataLock.RLock() endpoint, ok := r.endpointsByID[id] r.dataLock.RUnlock() diff --git a/common/nexus/endpoint_registry_test.go b/common/nexus/endpoint_registry_test.go index c670ac2a52f..f2fa488c817 100644 --- a/common/nexus/endpoint_registry_test.go +++ b/common/nexus/endpoint_registry_test.go @@ -21,6 +21,7 @@ import ( "go.temporal.io/server/common/metrics" "go.temporal.io/server/common/persistence" "go.temporal.io/server/common/testing/protoassert" + "go.temporal.io/server/common/testing/protorequire" "go.uber.org/mock/gomock" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -129,6 +130,52 @@ func TestGetNotFound(t *testing.T) { assert.Equal(t, int64(1), reg.tableVersion) } +func TestRefreshOnRead(t *testing.T) { + t.Parallel() + + for name, read := range map[string]func(*EndpointRegistryImpl, *persistencespb.NexusEndpointEntry) (*persistencespb.NexusEndpointEntry, error){ + "GetByName": func(reg *EndpointRegistryImpl, entry *persistencespb.NexusEndpointEntry) (*persistencespb.NexusEndpointEntry, error) { + return reg.GetByName(context.Background(), "ignored", entry.Endpoint.Spec.Name) + }, + "GetByID": func(reg *EndpointRegistryImpl, entry *persistencespb.NexusEndpointEntry) (*persistencespb.NexusEndpointEntry, error) { + return reg.GetByID(context.Background(), entry.Id) + }, + } { + t.Run(name, func(t *testing.T) { + t.Parallel() + + testEntry := newEndpointEntry(t.Name()) + mocks := newTestMocks(t) + mocks.config.refreshOnRead = dynamicconfig.GetBoolPropertyFn(true) + + mocks.matchingClient.EXPECT().ListNexusEndpoints(gomock.Any(), &matchingservice.ListNexusEndpointsRequest{ + PageSize: int32(100), + LastKnownTableVersion: int64(0), + Wait: false, + }).Return(&matchingservice.ListNexusEndpointsResponse{ + Entries: []*persistencespb.NexusEndpointEntry{testEntry}, + TableVersion: int64(2), + }, nil) + + reg := NewEndpointRegistry(mocks.config, mocks.matchingClient, mocks.persistence, log.NewNoopLogger(), metrics.NoopMetricsHandler) + + // Skip StartLifecycle so the background loop does not race with the single mock + // expectation above. Pre-closing ready lets waitUntilInitialized fall through. + ready := make(chan struct{}) + close(ready) + reg.dataReady.Store(&dataReady{ready: ready}) + + endpoint, err := read(reg, testEntry) + require.NoError(t, err) + protorequire.ProtoEqual(t, testEntry, endpoint) + + reg.dataLock.RLock() + defer reg.dataLock.RUnlock() + require.Equal(t, int64(2), reg.tableVersion) + }) + } +} + func TestInitializationFallback(t *testing.T) { t.Parallel() diff --git a/tests/nexus_test_base.go b/tests/nexus_test_base.go index 3b386e63ce2..72eab3c2200 100644 --- a/tests/nexus_test_base.go +++ b/tests/nexus_test_base.go @@ -3,9 +3,7 @@ package tests import ( "context" "errors" - "strings" "testing" - "time" "github.com/google/uuid" "github.com/nexus-rpc/sdk-go/nexus" @@ -59,7 +57,6 @@ func (env *NexusTestEnv) createNexusEndpoint(ctx context.Context, t *testing.T, }) }) - env.ensureNexusEndpoint(ctx, t, name) return resp.Endpoint } @@ -94,29 +91,9 @@ func (env *NexusTestEnv) createRandomExternalNexusServer(ctx context.Context, t }) }) - env.ensureNexusEndpoint(ctx, t, endpointName) return endpointName } -// ensureNexusEndpoint probes the specified endpoint until it's visible to StartNexusOperationExecution to ensure tests -// can use it. -func (env *NexusTestEnv) ensureNexusEndpoint(ctx context.Context, t *testing.T, endpointName string) { - require.Eventually(t, func() bool { - _, err := env.FrontendClient().StartNexusOperationExecution(ctx, &workflowservice.StartNexusOperationExecutionRequest{ - Namespace: env.Namespace().String(), - Endpoint: endpointName, - Service: "probe", - Operation: "probe", - RequestId: "probe", - }) - if notFound, ok := errors.AsType[*serviceerror.NotFound](err); ok { - msg := notFound.Error() - return msg != "endpoint not registered" && !strings.HasPrefix(msg, "could not find Nexus endpoint by name:") - } - return true - }, 10*time.Second, 100*time.Millisecond, "endpoint should become visible") -} - // nexusTaskResponse represents a successful response from a nexus task handler. // A nil response indicates no response should be sent (e.g., handler timed out). type nexusTaskResponse struct { diff --git a/tests/testcore/dynamic_config_overrides.go b/tests/testcore/dynamic_config_overrides.go index f08db10a7ac..4fdbb7cf772 100644 --- a/tests/testcore/dynamic_config_overrides.go +++ b/tests/testcore/dynamic_config_overrides.go @@ -62,6 +62,7 @@ var ( dynamicconfig.FrontendMaxConcurrentBatchOperationPerNamespace.Key(): ClientSuiteLimit, dynamicconfig.FrontendEnableWorkerVersioningDataAPIs.Key(): true, dynamicconfig.FrontendEnableWorkerVersioningWorkflowAPIs.Key(): true, + dynamicconfig.ForceNexusEndpointRefreshOnRead.Key(): true, dynamicconfig.RefreshNexusEndpointsMinWait.Key(): 1 * time.Millisecond, nexusoperations.RecordCancelRequestCompletionEvents.Key(): true, nexusoperations.UseSystemCallbackURL.Key(): true, From 18ad8786df0efa62a5f17c06d079a515d0330ccc Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 15 May 2026 18:35:55 -0400 Subject: [PATCH 46/73] Add OR statement to match V1 and V2 Scheduler BusinessIds (#10248) ## What changed? Add a Scheduler specific query converter to handle `ScheduleId` search attribute alias to underlying `WorkflowId` system search attribute field. If chasm is enabled, the query converter will match against both V1 and V2 Scheduler workflowId formats. V1 prefixes the workflowId with `temporal-sys-scheduler`, while V2 doesn't, so to handle either case, we need to add a OR/AND statement to match both V1 and V2 Scheduler BusinessId. OR will be used if the operator's boolean is positive, AND if the operator's boolean is negative. ## Why? Unable to retrieve V2 Schedules that query using `ScheduleId` as a search attribute. ## How did you test it? - [X] built - [X] run locally and tested manually - [X] covered by existing tests - [X] added new unit test(s) - [X] added new functional test(s) --- .../store/elasticsearch/query_interceptors.go | 5 - .../elasticsearch/query_interceptors_test.go | 27 --- .../visibility/store/query/converter.go | 4 - .../visibility/store/query/converter_test.go | 22 -- .../store/sql/query_converter_legacy.go | 5 - .../store/sql/query_converter_legacy_test.go | 12 - common/searchattribute/mapper.go | 20 ++ service/frontend/workflow_handler.go | 9 + .../scheduler/schedule_id_query_rewriter.go | 181 +++++++++++++++ .../schedule_id_query_rewriter_test.go | 214 ++++++++++++++++++ tests/schedule_test.go | 132 +++++++++++ 11 files changed, 556 insertions(+), 75 deletions(-) create mode 100644 service/worker/scheduler/schedule_id_query_rewriter.go create mode 100644 service/worker/scheduler/schedule_id_query_rewriter_test.go diff --git a/common/persistence/visibility/store/elasticsearch/query_interceptors.go b/common/persistence/visibility/store/elasticsearch/query_interceptors.go index c7276b23333..380033638bb 100644 --- a/common/persistence/visibility/store/elasticsearch/query_interceptors.go +++ b/common/persistence/visibility/store/elasticsearch/query_interceptors.go @@ -13,7 +13,6 @@ import ( "go.temporal.io/server/common/namespace" "go.temporal.io/server/common/persistence/visibility/store" "go.temporal.io/server/common/persistence/visibility/store/query" - "go.temporal.io/server/common/primitives" "go.temporal.io/server/common/searchattribute" "go.temporal.io/server/common/searchattribute/sadefs" ) @@ -131,10 +130,6 @@ func (vi *valuesInterceptor) Values(name string, fieldName string, values ...any return nil, err } - if name == sadefs.ScheduleID && fieldName == sadefs.WorkflowID { - value = primitives.ScheduleWorkflowIDPrefix + fmt.Sprintf("%v", value) - } - value, err = vi.validateValueType(name, value, fieldType) if err != nil { return nil, err diff --git a/common/persistence/visibility/store/elasticsearch/query_interceptors_test.go b/common/persistence/visibility/store/elasticsearch/query_interceptors_test.go index 7d9be9c355f..a38b89d7ad1 100644 --- a/common/persistence/visibility/store/elasticsearch/query_interceptors_test.go +++ b/common/persistence/visibility/store/elasticsearch/query_interceptors_test.go @@ -9,7 +9,6 @@ import ( "go.temporal.io/server/common/log" "go.temporal.io/server/common/metrics" "go.temporal.io/server/common/persistence/visibility/store/query" - "go.temporal.io/server/common/primitives" "go.temporal.io/server/common/searchattribute" "go.temporal.io/server/common/searchattribute/sadefs" "go.uber.org/mock/gomock" @@ -179,32 +178,6 @@ func (s *QueryInterceptorSuite) TestNameInterceptor_ScheduleIDToWorkflowID() { s.Equal(sadefs.WorkflowID, fieldName) } -// Ensures the valuesInterceptor applies the ScheduleID to WorkflowID transformation, -// including prepending the WorkflowIDPrefix. -func (s *QueryInterceptorSuite) TestValuesInterceptor_ScheduleIDToWorkflowID() { - vi := NewValuesInterceptor( - "test-namespace", - searchattribute.TestEsNameTypeMap(), - nil, - metrics.NoopMetricsHandler, - log.NewNoopLogger(), - ) - - values, err := vi.Values(sadefs.ScheduleID, sadefs.WorkflowID, "test-schedule-id") - s.NoError(err) - s.Len(values, 1) - s.Equal(primitives.ScheduleWorkflowIDPrefix+"test-schedule-id", values[0]) - - values, err = vi.Values(sadefs.ScheduleID, - sadefs.WorkflowID, - "test-schedule-id-1", - "test-schedule-id-2") - s.NoError(err) - s.Len(values, 2) - s.Equal(primitives.ScheduleWorkflowIDPrefix+"test-schedule-id-1", values[0]) - s.Equal(primitives.ScheduleWorkflowIDPrefix+"test-schedule-id-2", values[1]) -} - // Ensures the valuesInterceptor doesn't modify values when no transformation is needed. func (s *QueryInterceptorSuite) TestValuesInterceptor_NoTransformation() { vi := NewValuesInterceptor( diff --git a/common/persistence/visibility/store/query/converter.go b/common/persistence/visibility/store/query/converter.go index 004d1cad2a3..394150ba81a 100644 --- a/common/persistence/visibility/store/query/converter.go +++ b/common/persistence/visibility/store/query/converter.go @@ -14,7 +14,6 @@ import ( "go.temporal.io/api/serviceerror" "go.temporal.io/server/chasm" "go.temporal.io/server/common/namespace" - "go.temporal.io/server/common/primitives" "go.temporal.io/server/common/searchattribute" "go.temporal.io/server/common/searchattribute/sadefs" "go.temporal.io/server/common/sqlquery" @@ -578,9 +577,6 @@ func (c *QueryConverter[ExprT]) parseValueExpr( if err != nil { return nil, err } - if saName == sadefs.ScheduleID && saFieldName == sadefs.WorkflowID { - value = primitives.ScheduleWorkflowIDPrefix + fmt.Sprintf("%v", value) - } return value, nil case sqlparser.BoolVal: // no-op: no validation needed diff --git a/common/persistence/visibility/store/query/converter_test.go b/common/persistence/visibility/store/query/converter_test.go index 8746fd58a64..a37be73fa26 100644 --- a/common/persistence/visibility/store/query/converter_test.go +++ b/common/persistence/visibility/store/query/converter_test.go @@ -12,7 +12,6 @@ import ( "go.temporal.io/api/serviceerror" "go.temporal.io/server/chasm" "go.temporal.io/server/common/namespace" - "go.temporal.io/server/common/primitives" "go.temporal.io/server/common/searchattribute" "go.temporal.io/server/common/searchattribute/sadefs" "go.uber.org/mock/gomock" @@ -1697,18 +1696,6 @@ func TestQueryConverter_ConvertColName(t *testing.T) { out: keywordCol, }, - { - name: "success special ScheduleID", - in: &sqlparser.ColName{ - Name: sqlparser.NewColIdent(sadefs.ScheduleID), - }, - out: NewSAColumn( - sadefs.ScheduleID, - sadefs.WorkflowID, - enumspb.INDEXED_VALUE_TYPE_KEYWORD, - ), - }, - { name: "success backticks", in: &sqlparser.ColName{ @@ -1956,15 +1943,6 @@ func TestQueryConverter_ParseValueExpr(t *testing.T) { out: "foo", }, - { - name: "success special ScheduleID", - expr: sqlparser.NewStrVal([]byte("foo")), - alias: sadefs.ScheduleID, - field: sadefs.WorkflowID, - saType: enumspb.INDEXED_VALUE_TYPE_KEYWORD, - out: primitives.ScheduleWorkflowIDPrefix + "foo", - }, - { name: "success bool", expr: sqlparser.BoolVal(true), diff --git a/common/persistence/visibility/store/sql/query_converter_legacy.go b/common/persistence/visibility/store/sql/query_converter_legacy.go index e28894b8ff7..46d07b7a976 100644 --- a/common/persistence/visibility/store/sql/query_converter_legacy.go +++ b/common/persistence/visibility/store/sql/query_converter_legacy.go @@ -13,7 +13,6 @@ import ( "go.temporal.io/server/common/namespace" "go.temporal.io/server/common/persistence/sql/sqlplugin" "go.temporal.io/server/common/persistence/visibility/store/query" - "go.temporal.io/server/common/primitives" "go.temporal.io/server/common/searchattribute" "go.temporal.io/server/common/searchattribute/sadefs" "go.temporal.io/server/common/sqlquery" @@ -489,10 +488,6 @@ func (c *QueryConverterLegacy) convertValueExpr( return err } - if name == sadefs.ScheduleID && saFieldName == sadefs.WorkflowID { - value = primitives.ScheduleWorkflowIDPrefix + fmt.Sprintf("%v", value) - } - switch v := value.(type) { case string: // escape strings for safety diff --git a/common/persistence/visibility/store/sql/query_converter_legacy_test.go b/common/persistence/visibility/store/sql/query_converter_legacy_test.go index c53b57f8cfc..1309e05af8c 100644 --- a/common/persistence/visibility/store/sql/query_converter_legacy_test.go +++ b/common/persistence/visibility/store/sql/query_converter_legacy_test.go @@ -14,7 +14,6 @@ import ( "go.temporal.io/server/chasm" "go.temporal.io/server/common/namespace" "go.temporal.io/server/common/persistence/visibility/store/query" - "go.temporal.io/server/common/primitives" "go.temporal.io/server/common/searchattribute" "go.temporal.io/server/common/searchattribute/sadefs" ) @@ -773,17 +772,6 @@ func (s *queryConverterSuite) TestConvertValueExpr() { output: "('foo', 'bar')", err: nil, }, - { - name: "ScheduleId transformation", - input: "'test-schedule'", - args: map[string]any{ - "saName": sadefs.ScheduleID, - "saFieldName": sadefs.WorkflowID, - "saType": enumspb.INDEXED_VALUE_TYPE_KEYWORD, - }, - output: fmt.Sprintf("'%stest-schedule'", primitives.ScheduleWorkflowIDPrefix), - err: nil, - }, } for _, tc := range tests { diff --git a/common/searchattribute/mapper.go b/common/searchattribute/mapper.go index 115682ae149..7d77b3ad00e 100644 --- a/common/searchattribute/mapper.go +++ b/common/searchattribute/mapper.go @@ -221,3 +221,23 @@ func UnaliasFields( return &commonpb.SearchAttributes{IndexedFields: newIndexedFields}, nil } + +// IsUserDefinedSearchAttribute returns true if alias refers to a user-defined custom search +// attribute rather than a synthetic one (e.g. the synthetic ScheduleId that maps to WorkflowId). +// +// Two independent checks are required because custom SAs can be registered in two ways: +// 1. Via UpdateNamespace with an explicit alias: stored in the Mapper as alias → field-name. +// GetFieldName returns the underlying field name (different from the alias), so the SA is +// identifiable even when it is absent from the NameTypeMap under the alias. +// 2. Via AddSearchAttributes without an alias: stored directly in NameTypeMap's custom map +// under the alias itself. GetFieldName returns an error (no mapping exists), so the type +// map is the only way to detect these. +func IsUserDefinedSearchAttribute(alias string, saMapper Mapper, saNameType NameTypeMap, ns string) bool { + // Check 1: explicit alias mapping resolves to a different underlying field name. + if mapped, err := saMapper.GetFieldName(alias, ns); err == nil && mapped != alias { + return true + } + // Check 2: alias is registered as a custom SA in the type map (no alias mapping). + _, ok := saNameType.Custom()[alias] + return ok +} diff --git a/service/frontend/workflow_handler.go b/service/frontend/workflow_handler.go index 9736b4979a3..e85aad1a354 100644 --- a/service/frontend/workflow_handler.go +++ b/service/frontend/workflow_handler.go @@ -5120,6 +5120,15 @@ func (wh *WorkflowHandler) prepareSchedulerQuery( return "", err } + saMapper, err := wh.saMapperProvider.GetMapper(namespaceName) + if err != nil { + return "", serviceerror.NewUnavailablef(errUnableToGetSearchAttributesMessage, err) + } + query, err = scheduler.RewriteScheduleIDQuery(query, chasmEnabled, saMapper, saNameType, namespaceName) + if err != nil { + return "", err + } + result = fmt.Sprintf("%s AND (%s)", baseQuery, query) } diff --git a/service/worker/scheduler/schedule_id_query_rewriter.go b/service/worker/scheduler/schedule_id_query_rewriter.go new file mode 100644 index 00000000000..a50c60041af --- /dev/null +++ b/service/worker/scheduler/schedule_id_query_rewriter.go @@ -0,0 +1,181 @@ +package scheduler + +import ( + "strings" + + "github.com/temporalio/sqlparser" + "go.temporal.io/server/common/namespace" + "go.temporal.io/server/common/primitives" + "go.temporal.io/server/common/searchattribute" + "go.temporal.io/server/common/searchattribute/sadefs" +) + +var workflowIDCol = &sqlparser.ColName{Name: sqlparser.NewColIdent(sadefs.WorkflowID)} + +// RewriteScheduleIDQuery rewrites ScheduleId comparisons in the query string to WorkflowId +// comparisons before the query reaches the visibility store converters. +// +// V1 schedules store WorkflowId with a "temporal-sys-scheduler:" prefix; V2/CHASM schedules +// store it without any prefix. When chasmEnabled (migration period), each ScheduleId comparison +// becomes an OR of the prefixed (V1) and unprefixed (V2) WorkflowId conditions for positive +// operators, and AND for negative operators (!=, NOT IN, NOT STARTS_WITH), so both stores are +// correctly included or excluded. +// +// If the user has defined a custom search attribute named ScheduleId, this function leaves the +// expression unchanged; the converter handles it as a regular keyword SA. +// +// TODO: once V1 schedules are fully migrated to CHASM, drop the OR/AND and emit only the +// unprefixed V2 WorkflowId condition. +func RewriteScheduleIDQuery( + queryStr string, + chasmEnabled bool, + saMapper searchattribute.Mapper, + saNameType searchattribute.NameTypeMap, + ns namespace.Name, +) (string, error) { + if strings.TrimSpace(queryStr) == "" { + return queryStr, nil + } + + stmt, err := sqlparser.Parse("select * from table1 where " + queryStr) + if err != nil { + // Malformed SQL is passed through; the normal validation path will return a proper error. + return queryStr, nil + } + sel, ok := stmt.(*sqlparser.Select) + if !ok { + return queryStr, nil + } + if sel.Where == nil { + return queryStr, nil + } + + changed := rewriteExpr(&sel.Where.Expr, chasmEnabled, saMapper, saNameType, ns.String()) + if !changed { + return queryStr, nil + } + + // Reconstruct the query from the rewritten WHERE expression. + // If the original query also had a GROUP BY clause, append it so it is preserved for + // when GROUP BY support is added to prepareSchedulerQuery — omitting it would silently + // drop the clause and cause the ScheduleId issue to resurface once GROUP BY is supported. + result := sqlparser.String(sel.Where.Expr) + if len(sel.GroupBy) > 0 { + groupByCols := make([]string, len(sel.GroupBy)) + for i, expr := range sel.GroupBy { + groupByCols[i] = sqlparser.String(expr) + } + result += " group by " + strings.Join(groupByCols, ", ") + } + return result, nil +} + +// rewriteExpr recursively walks expr and rewrites ScheduleId comparison nodes in-place. +// Returns true if any rewriting occurred. +func rewriteExpr(exprRef *sqlparser.Expr, chasmEnabled bool, saMapper searchattribute.Mapper, saNameType searchattribute.NameTypeMap, ns string) bool { + switch e := (*exprRef).(type) { + case *sqlparser.AndExpr: + l := rewriteExpr(&e.Left, chasmEnabled, saMapper, saNameType, ns) + r := rewriteExpr(&e.Right, chasmEnabled, saMapper, saNameType, ns) + return l || r + case *sqlparser.OrExpr: + l := rewriteExpr(&e.Left, chasmEnabled, saMapper, saNameType, ns) + r := rewriteExpr(&e.Right, chasmEnabled, saMapper, saNameType, ns) + return l || r + case *sqlparser.ParenExpr: + return rewriteExpr(&e.Expr, chasmEnabled, saMapper, saNameType, ns) + case *sqlparser.NotExpr: + return rewriteExpr(&e.Expr, chasmEnabled, saMapper, saNameType, ns) + case *sqlparser.ComparisonExpr: + return rewriteComparison(exprRef, e, chasmEnabled, saMapper, saNameType, ns) + case *sqlparser.IsExpr: + return rewriteIsExpr(e, saMapper, saNameType, ns) + } + return false +} + +// rewriteComparison rewrites a single ComparisonExpr if its LHS is the synthetic ScheduleId SA. +func rewriteComparison(exprRef *sqlparser.Expr, expr *sqlparser.ComparisonExpr, chasmEnabled bool, saMapper searchattribute.Mapper, saNameType searchattribute.NameTypeMap, ns string) bool { + col, ok := expr.Left.(*sqlparser.ColName) + if !ok || !isScheduleIDToWorkflowIDColumn(col, saMapper, saNameType, ns) { + return false + } + + if !chasmEnabled { + // V1-only: prefix the value and use WorkflowId as column. + expr.Left = workflowIDCol + expr.Right = prefixScheduleIDSQLValues(expr.Right) + return true + } + + // CHASM migration path: OR of prefixed (V1) and unprefixed (V2) for positive operators; + // AND for negative operators so both forms are excluded. + v1Expr := &sqlparser.ComparisonExpr{ + Operator: expr.Operator, + Left: workflowIDCol, + Right: prefixScheduleIDSQLValues(expr.Right), + } + v2Expr := &sqlparser.ComparisonExpr{ + Operator: expr.Operator, + Left: workflowIDCol, + Right: expr.Right, + } + + if IsNegativeScheduleIDOperator(expr.Operator) { + *exprRef = &sqlparser.ParenExpr{Expr: &sqlparser.AndExpr{Left: v1Expr, Right: v2Expr}} + } else { + *exprRef = &sqlparser.ParenExpr{Expr: &sqlparser.OrExpr{Left: v1Expr, Right: v2Expr}} + } + return true +} + +// rewriteIsExpr rewrites a ScheduleId IS [NOT] NULL expression to use WorkflowId. +// No prefix rewriting is needed for IS NULL / IS NOT NULL. +func rewriteIsExpr(expr *sqlparser.IsExpr, saMapper searchattribute.Mapper, saNameType searchattribute.NameTypeMap, ns string) bool { + col, ok := expr.Expr.(*sqlparser.ColName) + if !ok || !isScheduleIDToWorkflowIDColumn(col, saMapper, saNameType, ns) { + return false + } + expr.Expr = &sqlparser.ColName{Name: sqlparser.NewColIdent(sadefs.WorkflowID)} + return true +} + +// IsNegativeScheduleIDOperator returns true for operators that express exclusion. +// Negative operators require AND when combining V1 and V2 WorkflowId conditions so that +// both prefixed and unprefixed forms are excluded; positive operators use OR. +func IsNegativeScheduleIDOperator(operator string) bool { + return operator == sqlparser.NotEqualStr || + operator == sqlparser.NotInStr || + operator == sqlparser.NotStartsWithStr +} + +// isScheduleIDToWorkflowIDColumn returns true if col refers to the ScheduleId search attribute +// that maps to WorkflowId (the built-in virtual SA), as opposed to a user-defined custom SA +// named ScheduleId which should be queried as-is. +func isScheduleIDToWorkflowIDColumn(col *sqlparser.ColName, saMapper searchattribute.Mapper, saNameType searchattribute.NameTypeMap, ns string) bool { + alias := col.Name.String() + if searchattribute.IsUserDefinedSearchAttribute(alias, saMapper, saNameType, ns) { + return false + } + return strings.TrimPrefix(alias, sadefs.ReservedPrefix) == sadefs.ScheduleID +} + +// prefixScheduleIDSQLValues returns a copy of the SQL value expression with the V1 schedule +// WorkflowId prefix prepended to each string literal. Handles single SQLVal and ValTuple (IN). +func prefixScheduleIDSQLValues(expr sqlparser.Expr) sqlparser.Expr { + switch e := expr.(type) { + case *sqlparser.SQLVal: + if e.Type == sqlparser.StrVal { + return sqlparser.NewStrVal([]byte(primitives.ScheduleWorkflowIDPrefix + string(e.Val))) + } + return e + case sqlparser.ValTuple: + result := make(sqlparser.ValTuple, len(e)) + for i, item := range e { + result[i] = prefixScheduleIDSQLValues(item) + } + return result + default: + return expr + } +} diff --git a/service/worker/scheduler/schedule_id_query_rewriter_test.go b/service/worker/scheduler/schedule_id_query_rewriter_test.go new file mode 100644 index 00000000000..88dfac7aa6b --- /dev/null +++ b/service/worker/scheduler/schedule_id_query_rewriter_test.go @@ -0,0 +1,214 @@ +package scheduler + +import ( + "testing" + + "github.com/stretchr/testify/require" + enumspb "go.temporal.io/api/enums/v1" + "go.temporal.io/server/common/namespace" + "go.temporal.io/server/common/primitives" + "go.temporal.io/server/common/searchattribute" +) + +// TestMapper only processes "test-namespace"; use it so custom SA lookups resolve correctly. +var testNS = namespace.Name("test-namespace") + +// emptyNameTypeMap has no custom SAs — ScheduleId is synthetic. +var emptyNameTypeMap = searchattribute.NewNameTypeMap(nil) + +// customScheduleIDNameTypeMap simulates a namespace that registered ScheduleId as a custom SA. +var customScheduleIDNameTypeMap = searchattribute.NewNameTypeMap(map[string]enumspb.IndexedValueType{ + "ScheduleId": enumspb.INDEXED_VALUE_TYPE_KEYWORD, +}) + +func TestRewriteScheduleIDQuery(t *testing.T) { + t.Parallel() + + prefix := primitives.ScheduleWorkflowIDPrefix + + tests := []struct { + name string + query string + chasmEnabled bool + mapper searchattribute.Mapper + saNameType *searchattribute.NameTypeMap // nil means emptyNameTypeMap + want string + }{ + // Empty / no-op cases. + { + name: "empty query", + query: "", + want: "", + }, + { + // ScheduleId is rewritten and GROUP BY is preserved so that when GROUP BY support + // is added to prepareSchedulerQuery the rewrite is already in place. + name: "CHASM ScheduleId with GROUP BY rewrites WHERE preserves GROUP BY", + query: "ScheduleId = 'my-sched' Group By TemporalSchedulePaused", + chasmEnabled: true, + want: "(WorkflowId = '" + prefix + "my-sched' or WorkflowId = 'my-sched') group by TemporalSchedulePaused", + }, + { + name: "whitespace query", + query: " ", + want: " ", + }, + { + name: "no ScheduleId no rewrite", + query: "ExecutionStatus = 'Running'", + want: "ExecutionStatus = 'Running'", + }, + + // V1 (chasmEnabled=false) — single-value operators. + { + name: "V1 equal", + query: "ScheduleId = 'my-sched'", + chasmEnabled: false, + want: "WorkflowId = '" + prefix + "my-sched'", + }, + { + name: "V1 not equal", + query: "ScheduleId != 'my-sched'", + chasmEnabled: false, + want: "WorkflowId != '" + prefix + "my-sched'", + }, + { + name: "V1 starts with", + query: "ScheduleId STARTS_WITH 'my-'", + chasmEnabled: false, + want: "WorkflowId starts_with '" + prefix + "my-'", + }, + { + name: "V1 not starts with", + query: "ScheduleId NOT STARTS_WITH 'my-'", + chasmEnabled: false, + want: "WorkflowId not starts_with '" + prefix + "my-'", + }, + { + name: "V1 IN", + query: "ScheduleId IN ('foo', 'bar')", + chasmEnabled: false, + want: "WorkflowId in ('" + prefix + "foo', '" + prefix + "bar')", + }, + { + name: "V1 NOT IN", + query: "ScheduleId NOT IN ('foo', 'bar')", + chasmEnabled: false, + want: "WorkflowId not in ('" + prefix + "foo', '" + prefix + "bar')", + }, + + // V1 — reserved TemporalScheduleId alias. + { + name: "V1 TemporalScheduleId alias", + query: "TemporalScheduleId = 'my-sched'", + chasmEnabled: false, + want: "WorkflowId = '" + prefix + "my-sched'", + }, + + // V1 — IS NOT NULL (no prefix, just column rename). + { + name: "V1 IS NOT NULL", + query: "ScheduleId IS NOT NULL", + chasmEnabled: false, + want: "WorkflowId is not null", + }, + + // CHASM (chasmEnabled=true) — positive operators produce OR. + { + name: "CHASM equal OR", + query: "ScheduleId = 'my-sched'", + chasmEnabled: true, + want: "(WorkflowId = '" + prefix + "my-sched' or WorkflowId = 'my-sched')", + }, + { + name: "CHASM TemporalScheduleId alias OR", + query: "TemporalScheduleId = 'my-sched'", + chasmEnabled: true, + want: "(WorkflowId = '" + prefix + "my-sched' or WorkflowId = 'my-sched')", + }, + { + name: "CHASM starts with OR", + query: "ScheduleId STARTS_WITH 'my-'", + chasmEnabled: true, + want: "(WorkflowId starts_with '" + prefix + "my-' or WorkflowId starts_with 'my-')", + }, + { + name: "CHASM IN OR", + query: "ScheduleId IN ('foo', 'bar')", + chasmEnabled: true, + want: "(WorkflowId in ('" + prefix + "foo', '" + prefix + "bar') or WorkflowId in ('foo', 'bar'))", + }, + + // CHASM — negative operators produce AND. + { + name: "CHASM not equal AND", + query: "ScheduleId != 'my-sched'", + chasmEnabled: true, + want: "(WorkflowId != '" + prefix + "my-sched' and WorkflowId != 'my-sched')", + }, + { + name: "CHASM not starts with AND", + query: "ScheduleId NOT STARTS_WITH 'my-'", + chasmEnabled: true, + want: "(WorkflowId not starts_with '" + prefix + "my-' and WorkflowId not starts_with 'my-')", + }, + { + name: "CHASM NOT IN AND", + query: "ScheduleId NOT IN ('foo', 'bar')", + chasmEnabled: true, + want: "(WorkflowId not in ('" + prefix + "foo', '" + prefix + "bar') and WorkflowId not in ('foo', 'bar'))", + }, + + // CHASM — IS NOT NULL (just column rename, no OR/AND needed). + { + name: "CHASM IS NOT NULL", + query: "ScheduleId IS NOT NULL", + chasmEnabled: true, + want: "WorkflowId is not null", + }, + + // ScheduleId combined with another filter. + { + name: "CHASM combined AND with other filter", + query: "ScheduleId = 'my-sched' AND TemporalSchedulePaused = true", + chasmEnabled: true, + want: "(WorkflowId = '" + prefix + "my-sched' or WorkflowId = 'my-sched') and TemporalSchedulePaused = true", + }, + + // Custom SA named ScheduleId with explicit alias mapping (check 1: mapper). + { + name: "custom SA with alias mapping not rewritten", + query: "ScheduleId = 'my-sched'", + chasmEnabled: true, + mapper: &searchattribute.TestMapper{WithCustomScheduleID: true}, + want: "ScheduleId = 'my-sched'", + }, + + // Custom SA named ScheduleId registered in the type map without alias (check 2: type map). + // This is the common case when a user adds ScheduleId via AddSearchAttributes. + { + name: "custom SA in type map not rewritten", + query: "ScheduleId = 'my-sched'", + chasmEnabled: true, + saNameType: &customScheduleIDNameTypeMap, + want: "ScheduleId = 'my-sched'", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + mapper := tc.mapper + if mapper == nil { + mapper = &searchattribute.NoopMapper{} + } + saNameType := emptyNameTypeMap + if tc.saNameType != nil { + saNameType = *tc.saNameType + } + got, err := RewriteScheduleIDQuery(tc.query, tc.chasmEnabled, mapper, saNameType, testNS) + require.NoError(t, err) + require.Equal(t, tc.want, got) + }) + } +} diff --git a/tests/schedule_test.go b/tests/schedule_test.go index 3ec9769ed23..0e99df0c2d0 100644 --- a/tests/schedule_test.go +++ b/tests/schedule_test.go @@ -118,6 +118,7 @@ func runSharedScheduleTests(t *testing.T, newContext contextFactory) { t.Run("TestUpdateScheduleBlobSizeLimit", func(t *testing.T) { testUpdateScheduleBlobSizeLimit(t, newContext) }) t.Run("TestListSchedulesPagination", func(t *testing.T) { testListSchedulesPagination(t, newContext) }) t.Run("TestListSchedulesFilterAndEntryFields", func(t *testing.T) { testListSchedulesFilterAndEntryFields(t, newContext) }) + t.Run("TestListSchedulesFilterByScheduleId", func(t *testing.T) { testListSchedulesFilterByScheduleID(t, newContext) }) t.Run("TestBufferSizeReportedWhenBuffered", func(t *testing.T) { testBufferSizeReportedWhenBuffered(t, newContext) }) } @@ -1326,6 +1327,137 @@ func testListSchedulesFilterAndEntryFields(t *testing.T, newContext contextFacto }, 15*time.Second, 1*time.Second) } +func testListSchedulesFilterByScheduleID(t *testing.T, newContext contextFactory) { + s := testcore.NewEnv(t, scheduleCommonOpts()...) + + sid1 := "sched-filter-by-id-alpha" + sid2 := "sched-filter-by-id-beta" + + schedule := func(sid string) *schedulepb.Schedule { + return &schedulepb.Schedule{ + Spec: &schedulepb.ScheduleSpec{ + Interval: []*schedulepb.IntervalSpec{ + {Interval: durationpb.New(1 * time.Hour)}, + }, + }, + Action: &schedulepb.ScheduleAction{ + Action: &schedulepb.ScheduleAction_StartWorkflow{ + StartWorkflow: &workflowpb.NewWorkflowExecutionInfo{ + WorkflowId: "wf-" + sid, + WorkflowType: &commonpb.WorkflowType{Name: "action"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: s.WorkerTaskQueue(), Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + }, + }, + }, + State: &schedulepb.ScheduleState{Paused: true}, + } + } + + ctx := newContext(s.Context()) + + // Create two schedules. + for _, sid := range []string{sid1, sid2} { + _, err := s.FrontendClient().CreateSchedule(ctx, &workflowservice.CreateScheduleRequest{ + Namespace: s.Namespace().String(), + ScheduleId: sid, + Schedule: schedule(sid), + Identity: "test", + RequestId: uuid.NewString(), + }) + s.NoError(err) + } + + // Wait for both schedules to appear in visibility. + getScheduleEntryFromVisibility(s, sid1, newContext, nil) + getScheduleEntryFromVisibility(s, sid2, newContext, nil) + + listScheduleIDs := func(query string) []string { + t.Helper() + listResp, err := s.FrontendClient().ListSchedules(ctx, &workflowservice.ListSchedulesRequest{ + Namespace: s.Namespace().String(), + MaximumPageSize: 10, + Query: query, + }) + require.NoError(t, err) + var ids []string + for _, e := range listResp.Schedules { + ids = append(ids, e.ScheduleId) + } + return ids + } + + // wantIDs is the exact set of schedule IDs expected in the result. + // IsNegativeScheduleIDOperator drives whether an operator excludes or includes: + // negative operators (!=, NOT IN, NOT STARTS_WITH) produce AND in the rewriter so both + // V1 and V2 forms are excluded; positive operators produce OR so both forms are included. + tests := []struct { + name string + query string + wantIDs []string + }{ + { + name: "Equal", + query: fmt.Sprintf("ScheduleId = '%s'", sid1), + wantIDs: []string{sid1}, + }, + { + // scheduler.IsNegativeScheduleIDOperator("!=") == true + name: "NotEqual", + query: fmt.Sprintf("ScheduleId != '%s'", sid1), + wantIDs: []string{sid2}, + }, + { + name: "StartsWith", + query: "ScheduleId STARTS_WITH 'sched-filter-by-id-'", + wantIDs: []string{sid1, sid2}, + }, + { + name: "StartsWithSpecific", + query: "ScheduleId STARTS_WITH 'sched-filter-by-id-a'", + wantIDs: []string{sid1}, + }, + { + // scheduler.IsNegativeScheduleIDOperator("not starts_with") == true + name: "NotStartsWith", + query: "ScheduleId NOT STARTS_WITH 'sched-filter-by-id-a'", + wantIDs: []string{sid2}, + }, + { + name: "In", + query: fmt.Sprintf("ScheduleId IN ('%s', '%s')", sid1, sid2), + wantIDs: []string{sid1, sid2}, + }, + { + name: "InSingle", + query: fmt.Sprintf("ScheduleId IN ('%s')", sid2), + wantIDs: []string{sid2}, + }, + { + // scheduler.IsNegativeScheduleIDOperator("not in") == true + name: "NotIn", + query: fmt.Sprintf("ScheduleId NOT IN ('%s')", sid1), + wantIDs: []string{sid2}, + }, + { + name: "IsNotNull", + query: "ScheduleId IS NOT NULL", + wantIDs: []string{sid1, sid2}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + s.EventuallyWithT(func(c *assert.CollectT) { + ids := listScheduleIDs(tc.query) + require.Len(c, ids, len(tc.wantIDs)) + for _, want := range tc.wantIDs { + require.Contains(c, ids, want) + } + }, 15*time.Second, 1*time.Second) + }) + } +} + func testScheduleInternalTaskQueue(t *testing.T, newContext contextFactory) { s := testcore.NewEnv(t, scheduleCommonOpts()...) errorMessageKeyword := "internal per-namespace task queue" From 16d78187f248e2977bfe30bb150ef78d867b59e1 Mon Sep 17 00:00:00 2001 From: Pasha Fateev Date: Fri, 15 May 2026 16:14:12 -0700 Subject: [PATCH 47/73] Bump go.temporal.io/api to v1.62.12 (#10293) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Replace the pseudo-version pin of `go.temporal.io/api` with the freshly-tagged `v1.62.12`. This is a prerequisite for cutting cloud release 3.156, which requires all OSS dependencies to be on tagged versions (runbook step 2). ## Details - `v1.62.12` tags api-go commit `0a978d4fd72ccadc7666d7f19aa6df9b335b3133` — the same commit the previous pseudo-version pinned (`v1.62.12-0.20260511225354-0a978d4fd72c`). - No source code changes — the module content is identical (the `/go.mod` h1 hash didn't change in go.sum, only the version-identifier hash). - Tag created via `temporalio/api`'s `create-release.yml` workflow. ## Test plan - [x] `go mod tidy` clean - [ ] CI passes --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 6612146055b..7e264e7ba5d 100644 --- a/go.mod +++ b/go.mod @@ -63,7 +63,7 @@ require ( go.opentelemetry.io/otel/sdk v1.43.0 go.opentelemetry.io/otel/sdk/metric v1.43.0 go.opentelemetry.io/otel/trace v1.43.0 - go.temporal.io/api v1.62.12-0.20260511225354-0a978d4fd72c + go.temporal.io/api v1.62.12 go.temporal.io/auto-scaled-workers v0.0.0-20260407181057-edd947d743d2 go.temporal.io/sdk v1.41.1 go.uber.org/fx v1.24.0 diff --git a/go.sum b/go.sum index 6a6dba82142..d93212758de 100644 --- a/go.sum +++ b/go.sum @@ -469,8 +469,8 @@ go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.3.0 h1:R go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.3.0/go.mod h1:I89cynRj8y+383o7tEQVg2SVA6SRgDVIouWPUVXjx0U= go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.3.0 h1:CQvJSldHRUN6Z8jsUeYv8J0lXRvygALXIzsmAeCcZE0= go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.3.0/go.mod h1:xSQ+mEfJe/GjK1LXEyVOoSI1N9JV9ZI923X5kup43W4= -go.temporal.io/api v1.62.12-0.20260511225354-0a978d4fd72c h1:ADDxNS26VTfDWmW55zYgAFkG6WEU83RHv0HwrarHXtk= -go.temporal.io/api v1.62.12-0.20260511225354-0a978d4fd72c/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.62.12 h1:627rVnItegQmrszg1bH4vfyc/1uNo5qCereCNkvZefw= +go.temporal.io/api v1.62.12/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= go.temporal.io/auto-scaled-workers v0.0.0-20260407181057-edd947d743d2 h1:1hKeH3GyR6YD6LKMHGCZ76t6h1Sgha0hXVQBxWi3dlQ= go.temporal.io/auto-scaled-workers v0.0.0-20260407181057-edd947d743d2/go.mod h1:T8dnzVPeO+gaUTj9eDgm/lT2lZH4+JXNvrGaQGyVi50= go.temporal.io/sdk v1.41.1 h1:yOpvsHyDD1lNuwlGBv/SUodCPhjv9nDeC9lLHW/fJUA= From cbb464fa98dd153b116e26d24e475522d05920fd Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sat, 16 May 2026 00:41:53 -0700 Subject: [PATCH 48/73] Document historyrequire package (#10297) WISOTT --- docs/development/testing.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/development/testing.md b/docs/development/testing.md index 1c7734343d7..0167ebb5956 100644 --- a/docs/development/testing.md +++ b/docs/development/testing.md @@ -215,6 +215,42 @@ protorequire.ProtoEqual(t, expected, actual, ) ``` +### historyrequire package + +`historyrequire` has assertions to verify workflow event histories. + +Use `EqualHistoryEvents` to assert the full event sequence: + +```go +events := env.GetHistory(env.Namespace().String(), workflowExecution) +s.EqualHistoryEvents(` + 1 WorkflowExecutionStarted + 2 WorkflowTaskScheduled {"Attempt": 1} + 3 WorkflowTaskStarted + 4 WorkflowTaskCompleted + 5 WorkflowExecutionCompleted`, events) +``` + +Optional inline JSON (e.g. `{"Attempt": 1}`) can be used to assert on specific attributes. + +Use `ContainsHistoryEvents` when you only care about a particular segment: + +```go +s.ContainsHistoryEvents(` + 4 WorkflowTaskFailed {"Identity": "worker-1"} + 5 WorkflowTaskScheduled + 6 WorkflowTaskStarted`, events) +``` + +Use `RequireHistoryEvent` when you only care about a single event type: + +```go +completed := s.RequireHistoryEvent(events, enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_COMPLETED) +require.Equal(t, "expected-result", completed.GetWorkflowExecutionCompletedEventAttributes().Result) +``` + +Or use `RequireNoHistoryEvent` when you expect no event of a given type to be present. + ### Test Cluster Use `testcore.NewEnv(t)` to create a test environment with access to a Temporal cluster for end-to-end testing. From 3c32027e55496c06bd2727a121c150cece544699 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Sat, 16 May 2026 00:56:22 -0700 Subject: [PATCH 49/73] Document testcontext package (#10296) WISOTT --- docs/development/testing.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/development/testing.md b/docs/development/testing.md index 0167ebb5956..92a412e681e 100644 --- a/docs/development/testing.md +++ b/docs/development/testing.md @@ -87,6 +87,14 @@ and provides assertion helpers and safety mechanisms. It replaces all use of `testify`'s `Suite`. +#### Context shorthand + +```go +ctx := s.Context() +``` + +`s.Context()` returns the subtest-scoped context - equivalent to `testcontext.New(s.T())`. + #### Await shorthand ```go @@ -165,6 +173,10 @@ func TestFoo(t *testing.T) { If you don't care about specific value, you can use `Any()` method to generate a random value. It indicates that value doesn't matter for this test and will never be asserted on (but required for API, for example). +### testcontext package + +There's no need to create your own `context.Context` via `context.WithTimeout`; use `testcontext.New(t)` instead. It returns a test-scoped `context.Context`, memoized per `*testing.T` and canceled on test end or timeout. + ### taskpoller package For end-to-end testing, consider using `taskpoller.TaskPoller` to handle workflow tasks. This is From 593fdba51696291421e6411e514546a47117f5eb Mon Sep 17 00:00:00 2001 From: Roey Berman Date: Sat, 16 May 2026 19:32:02 -0700 Subject: [PATCH 50/73] Rename the dynamic config `components.callbacks.allowedAddresses` to `callback.allowedAddresses`. (#10234) ## What changed? Use the new dynamic config instead of leaving it as a placeholder and update its documentation. Also bumps the server version constant to `1.32.0`. This was supposed to be done after the `1.31.0` release and was missed. ## Why? Remove duplication as we migrate the code to the CHASM backed implementation. ## Potential risks This config is only relevant for external endpoints which are experimental or older server versions, needs to be called out in the release notes. --- chasm/lib/callback/config.go | 12 +++++++----- common/headers/version_checker.go | 2 +- components/callbacks/config.go | 12 ------------ config/dynamicconfig/development-cass.yaml | 2 +- config/dynamicconfig/development-sql.yaml | 2 +- config/dynamicconfig/development-xdc.yaml | 2 +- docs/architecture/nexus.md | 5 ++++- service/frontend/fx.go | 14 ++------------ service/frontend/service.go | 6 +++--- service/frontend/workflow_handler_test.go | 5 ++--- tests/callbacks_migration_test.go | 8 ++++---- tests/callbacks_test.go | 9 ++++----- tests/schedule_test.go | 4 ++-- tests/standalone_activity_test.go | 5 ++--- tests/workflow_test.go | 4 ++-- tests/xdc/nexus_state_replication_test.go | 3 ++- 16 files changed, 38 insertions(+), 57 deletions(-) diff --git a/chasm/lib/callback/config.go b/chasm/lib/callback/config.go index 844add8d671..755f233b126 100644 --- a/chasm/lib/callback/config.go +++ b/chasm/lib/callback/config.go @@ -59,15 +59,17 @@ func configProvider(dc *dynamicconfig.Collection) *Config { } var AllowedAddresses = dynamicconfig.NewNamespaceTypedSettingWithConverter( - "chasm.callback.allowedAddresses", + "callback.allowedAddresses", allowedAddressConverter, AddressMatchRules{}, `The per-namespace list of addresses that are allowed for callbacks and whether secure connections (https) are required. -URL: "temporal://system" is always allowed for worker callbacks. The default is no address rules. -URLs are checked against each in order when starting a workflow with attached callbacks and only need to match one to pass validation. -This configuration is required for external endpoint targets; any invalid entries are ignored. Each entry is a map with possible values: +URLs: "temporal://system" and "temporal://internal" are always allowed. The default is no address rules. +URLs are checked against each in order when starting a workflow or activitiy with attached callbacks or a standalone +callback and only need to match one to pass validation. This configuration is required for external endpoint targets; +any invalid entries are ignored. Each entry is a map with possible values: - "Pattern":string (required) the host:port pattern to which this config applies. - Wildcards, '*', are supported and can match any number of characters (e.g. '*' matches everything, 'prefix.*.domain' matches 'prefix.a.domain' as well as 'prefix.a.b.domain'). + Wildcards, '*', are supported and can match any number of characters (e.g. '*' matches everything, + 'prefix.*.domain' matches 'prefix.a.domain' as well as 'prefix.a.b.domain'). - "AllowInsecure":bool (optional, default=false) indicates whether https is required`) type AddressMatchRules struct { diff --git a/common/headers/version_checker.go b/common/headers/version_checker.go index baf77890671..ea439c02e5f 100644 --- a/common/headers/version_checker.go +++ b/common/headers/version_checker.go @@ -23,7 +23,7 @@ const ( // ServerVersion value can be changed by the create-tag Github workflow. // If you change the var name or move it, be sure to update the workflow. - ServerVersion = "1.31.0" + ServerVersion = "1.32.0" // SupportedServerVersions is used by CLI and inter role communication. SupportedServerVersions = ">=1.0.0 <2.0.0" diff --git a/components/callbacks/config.go b/components/callbacks/config.go index 8b6a52e5239..824bf76c0cf 100644 --- a/components/callbacks/config.go +++ b/components/callbacks/config.go @@ -52,18 +52,6 @@ func ConfigProvider(dc *dynamicconfig.Collection) *Config { } } -var AllowedAddresses = dynamicconfig.NewNamespaceTypedSettingWithConverter( - "component.callbacks.allowedAddresses", - allowedAddressConverter, - AddressMatchRules{}, - `The per-namespace list of addresses that are allowed for callbacks and whether secure connections (https) are required. -URL: "temporal://system" is always allowed for worker callbacks. The default is no address rules. -URLs are checked against each in order when starting a workflow with attached callbacks and only need to match one to pass validation. -This configuration is required for external endpoint targets; any invalid entries are ignored. Each entry is a map with possible values: - - "Pattern":string (required) the host:port pattern to which this config applies. - Wildcards, '*', are supported and can match any number of characters (e.g. '*' matches everything, 'prefix.*.domain' matches 'prefix.a.domain' as well as 'prefix.a.b.domain'). - - "AllowInsecure":bool (optional, default=false) indicates whether https is required`) - type AddressMatchRules struct { Rules []AddressMatchRule } diff --git a/config/dynamicconfig/development-cass.yaml b/config/dynamicconfig/development-cass.yaml index ab9998f7af5..d21293713cb 100644 --- a/config/dynamicconfig/development-cass.yaml +++ b/config/dynamicconfig/development-cass.yaml @@ -39,7 +39,7 @@ frontend.workerVersioningRuleAPIs: - value: true component.nexusoperations.callback.endpoint.template: - value: http://localhost:7243/namespaces/{{.NamespaceName}}/nexus/callback -component.callbacks.allowedAddresses: +callback.allowedAddresses: - value: - Pattern: "*" AllowInsecure: true diff --git a/config/dynamicconfig/development-sql.yaml b/config/dynamicconfig/development-sql.yaml index 85a3c355591..c7ab8a86991 100644 --- a/config/dynamicconfig/development-sql.yaml +++ b/config/dynamicconfig/development-sql.yaml @@ -57,7 +57,7 @@ system.enableDeployments: - value: true component.nexusoperations.callback.endpoint.template: - value: http://localhost:7243/namespaces/{{.NamespaceName}}/nexus/callback -component.callbacks.allowedAddresses: +callback.allowedAddresses: - value: - Pattern: "*" AllowInsecure: true diff --git a/config/dynamicconfig/development-xdc.yaml b/config/dynamicconfig/development-xdc.yaml index fb5d13c7f94..926981865fa 100644 --- a/config/dynamicconfig/development-xdc.yaml +++ b/config/dynamicconfig/development-xdc.yaml @@ -33,7 +33,7 @@ frontend.workerVersioningWorkflowAPIs: - value: true component.nexusoperations.callback.endpoint.template: - value: http://localhost:7243/namespaces/{{.NamespaceName}}/nexus/callback -component.callbacks.allowedAddresses: +callback.allowedAddresses: - value: - Pattern: "*" AllowInsecure: true diff --git a/docs/architecture/nexus.md b/docs/architecture/nexus.md index 4e1025b5332..ddbcd1a852c 100644 --- a/docs/architecture/nexus.md +++ b/docs/architecture/nexus.md @@ -82,7 +82,10 @@ the 1.31.0 release and will be made the default. # When using Nexus for cross namespace calls, the URL's host is irrelevant as the address is resolved using # membership. The URL is a Go template that interpolates the `NamepaceName` and `NamespaceID` variables. - value: https://$PUBLIC_URL:7243/namespaces/{{.NamespaceName}}/nexus/callback - component.callbacks.allowedAddresses: + # From version 1.32.x + callback.allowedAddresses: + # Uncomment versions older than 1.32.x + # component.callbacks.allowedAddresses: # Limits which callback URLs are accepted by the server. # Wildcard patterns (*) and insecure (HTTP) callbacks are intended for development only. # For production, restrict allowed hosts and set AllowInsecure to false diff --git a/service/frontend/fx.go b/service/frontend/fx.go index e3a46524103..53320dbaddd 100644 --- a/service/frontend/fx.go +++ b/service/frontend/fx.go @@ -43,7 +43,6 @@ import ( "go.temporal.io/server/common/sdk" "go.temporal.io/server/common/searchattribute" "go.temporal.io/server/common/telemetry" - hsmcallbacks "go.temporal.io/server/components/callbacks" "go.temporal.io/server/service" "go.temporal.io/server/service/frontend/configs" "go.temporal.io/server/service/history/tasks" @@ -843,22 +842,13 @@ func OperatorHandlerProvider( } // callbackValidatorProvider creates a callback Validator using the production dynamic config keys -// so that existing operator configurations (component.callbacks.allowedAddresses) are honored. -// TODO: Once HSM callbacks (components/callbacks) are removed, move this provider into -// chasm/lib/callback/fx.go and read directly from callback.AllowedAddresses. +// so that existing operator configurations (callback.allowedAddresses) are honored. func callbackValidatorProvider(dc *dynamicconfig.Collection) callback.Validator { return callback.NewValidator( callback.MaxPerExecution.Get(dc), dynamicconfig.FrontendCallbackURLMaxLength.Get(dc), dynamicconfig.FrontendCallbackHeaderMaxSize.Get(dc), - func(ns string) callback.AddressMatchRules { - hsmRules := hsmcallbacks.AllowedAddresses.Get(dc)(ns) - chasmRules := make([]callback.AddressMatchRule, len(hsmRules.Rules)) - for i, r := range hsmRules.Rules { - chasmRules[i] = callback.AddressMatchRule{Regexp: r.Regexp, AllowInsecure: r.AllowInsecure} - } - return callback.AddressMatchRules{Rules: chasmRules} - }, + callback.AllowedAddresses.Get(dc), ) } diff --git a/service/frontend/service.go b/service/frontend/service.go index 04b5a1f3ded..0a743002912 100644 --- a/service/frontend/service.go +++ b/service/frontend/service.go @@ -10,6 +10,7 @@ import ( "go.temporal.io/api/workflowservice/v1" "go.temporal.io/server/api/adminservice/v1" "go.temporal.io/server/chasm/lib/activity" + "go.temporal.io/server/chasm/lib/callback" chasmnexus "go.temporal.io/server/chasm/lib/nexusoperation" "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/log" @@ -18,7 +19,6 @@ import ( "go.temporal.io/server/common/metrics" "go.temporal.io/server/common/persistence/visibility/manager" "go.temporal.io/server/common/retrypolicy" - "go.temporal.io/server/components/callbacks" "go.temporal.io/server/components/nexusoperations" "google.golang.org/grpc" "google.golang.org/grpc/health" @@ -200,7 +200,7 @@ type Config struct { CallbackURLMaxLength dynamicconfig.IntPropertyFnWithNamespaceFilter CallbackHeaderMaxSize dynamicconfig.IntPropertyFnWithNamespaceFilter MaxCallbacksPerWorkflow dynamicconfig.IntPropertyFnWithNamespaceFilter - CallbackEndpointConfigs dynamicconfig.TypedPropertyFnWithNamespaceFilter[callbacks.AddressMatchRules] + CallbackEndpointConfigs dynamicconfig.TypedPropertyFnWithNamespaceFilter[callback.AddressMatchRules] MaxNexusOperationTokenLength dynamicconfig.IntPropertyFnWithNamespaceFilter NexusRequestHeadersBlacklist dynamicconfig.TypedPropertyFn[*regexp.Regexp] @@ -384,7 +384,7 @@ func NewConfig( LinkMaxSize: dynamicconfig.FrontendLinkMaxSize.Get(dc), MaxLinksPerRequest: dynamicconfig.FrontendMaxLinksPerRequest.Get(dc), - CallbackEndpointConfigs: callbacks.AllowedAddresses.Get(dc), + CallbackEndpointConfigs: callback.AllowedAddresses.Get(dc), AdminEnableListHistoryTasks: dynamicconfig.AdminEnableListHistoryTasks.Get(dc), MaskInternalErrorDetails: dynamicconfig.FrontendMaskInternalErrorDetails.Get(dc), diff --git a/service/frontend/workflow_handler_test.go b/service/frontend/workflow_handler_test.go index 5d66a229221..4fd23f7de92 100644 --- a/service/frontend/workflow_handler_test.go +++ b/service/frontend/workflow_handler_test.go @@ -64,7 +64,6 @@ import ( "go.temporal.io/server/common/testing/protoassert" "go.temporal.io/server/common/testing/protorequire" "go.temporal.io/server/common/tqid" - "go.temporal.io/server/components/callbacks" "go.temporal.io/server/service/history/api" "go.temporal.io/server/service/history/tests" "go.temporal.io/server/service/worker/batcher" @@ -876,8 +875,8 @@ func (s *WorkflowHandlerSuite) TestStartWorkflowExecution_Failed_InvalidAggregat s.mockSearchAttributesMapperProvider.EXPECT().GetMapper(gomock.Any()).AnyTimes().Return(nil, nil) config := s.newConfig() config.MaxLinksPerRequest = dc.GetIntPropertyFnFilteredByNamespace(10) - config.CallbackEndpointConfigs = dc.GetTypedPropertyFnFilteredByNamespace(callbacks.AddressMatchRules{ - Rules: []callbacks.AddressMatchRule{ + config.CallbackEndpointConfigs = dc.GetTypedPropertyFnFilteredByNamespace(callback.AddressMatchRules{ + Rules: []callback.AddressMatchRule{ { Regexp: regexp.MustCompile(`.*`), AllowInsecure: true, diff --git a/tests/callbacks_migration_test.go b/tests/callbacks_migration_test.go index 02498b151bb..04d9a42dc72 100644 --- a/tests/callbacks_migration_test.go +++ b/tests/callbacks_migration_test.go @@ -18,10 +18,10 @@ import ( "go.temporal.io/sdk/client" "go.temporal.io/sdk/worker" "go.temporal.io/sdk/workflow" + "go.temporal.io/server/chasm/lib/callback" "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/nexus/nexusrpc" "go.temporal.io/server/common/testing/testvars" - "go.temporal.io/server/components/callbacks" "go.temporal.io/server/tests/testcore" "google.golang.org/protobuf/types/known/durationpb" ) @@ -52,7 +52,7 @@ func (s *CallbacksMigrationSuite) TestWorkflowCallbacks_CHASM_Enabled_Mid_WF() { // 5. Verify callback is invoked successfully s.OverrideDynamicConfig( - callbacks.AllowedAddresses, + callback.AllowedAddresses, []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, ) @@ -185,7 +185,7 @@ func (s *CallbacksMigrationSuite) TestWorkflowCallbacks_CHASM_Disabled_Mid_WF() // 6. Verify callback is invoked successfully despite EnableCHASMCallbacks being disabled s.OverrideDynamicConfig( - callbacks.AllowedAddresses, + callback.AllowedAddresses, []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, ) @@ -319,7 +319,7 @@ func (s *CallbacksMigrationSuite) TestWorkflowCallbacks_MixedCallbacks() { // 6. Verify both callbacks (HSM and CHASM) are invoked successfully s.OverrideDynamicConfig( - callbacks.AllowedAddresses, + callback.AllowedAddresses, []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, ) diff --git a/tests/callbacks_test.go b/tests/callbacks_test.go index 789ee2321e4..89b13bb5797 100644 --- a/tests/callbacks_test.go +++ b/tests/callbacks_test.go @@ -27,7 +27,6 @@ import ( "go.temporal.io/server/common/testing/protoassert" "go.temporal.io/server/common/testing/protorequire" "go.temporal.io/server/common/testing/testvars" - "go.temporal.io/server/components/callbacks" "go.temporal.io/server/tests/testcore" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/durationpb" @@ -126,7 +125,7 @@ func (s *CallbacksSuite) TestWorkflowCallbacks_InvalidArgument() { s.OverrideDynamicConfig(dynamicconfig.MaxCallbacksPerWorkflow, 2) s.OverrideDynamicConfig(callback.MaxPerExecution, 2) s.OverrideDynamicConfig( - callbacks.AllowedAddresses, + callback.AllowedAddresses, []any{map[string]any{"Pattern": "some-ignored-address", "AllowInsecure": true}, map[string]any{"Pattern": "some-secure-address", "AllowInsecure": false}}, ) @@ -165,7 +164,7 @@ func (s *CallbacksSuite) TestWorkflowCallbacks_InvalidArgument() { func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { s.OverrideDynamicConfig( - callbacks.AllowedAddresses, + callback.AllowedAddresses, []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, ) @@ -421,7 +420,7 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback() { s.OverrideDynamicConfig( - callbacks.AllowedAddresses, + callback.AllowedAddresses, []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, ) @@ -611,7 +610,7 @@ func blockingWorkflow(ctx workflow.Context) error { func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback_ResetToNotBaseRun() { s.OverrideDynamicConfig( - callbacks.AllowedAddresses, + callback.AllowedAddresses, []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, ) diff --git a/tests/schedule_test.go b/tests/schedule_test.go index 0e99df0c2d0..b124e72810d 100644 --- a/tests/schedule_test.go +++ b/tests/schedule_test.go @@ -25,6 +25,7 @@ import ( "go.temporal.io/sdk/temporal" "go.temporal.io/sdk/workflow" schedulespb "go.temporal.io/server/api/schedule/v1" + "go.temporal.io/server/chasm/lib/callback" schedulerpb "go.temporal.io/server/chasm/lib/scheduler/gen/schedulerpb/v1" "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/headers" @@ -34,7 +35,6 @@ import ( "go.temporal.io/server/common/primitives" "go.temporal.io/server/common/searchattribute/sadefs" "go.temporal.io/server/common/testing/protorequire" - "go.temporal.io/server/components/callbacks" "go.temporal.io/server/service/worker/dummy" "go.temporal.io/server/service/worker/scheduler" "go.temporal.io/server/tests/testcore" @@ -1711,7 +1711,7 @@ func testResetWithAdditionalCallback(t *testing.T, newContext contextFactory, en s := testcore.NewEnv(t, scheduleCommonOpts()...) s.OverrideDynamicConfig(dynamicconfig.EnableCHASMCallbacks, enableCHASMCallbacks) s.OverrideDynamicConfig( - callbacks.AllowedAddresses, + callback.AllowedAddresses, []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, ) diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index 49ce3fe4dc4..bb30839382b 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -32,7 +32,6 @@ import ( "go.temporal.io/server/common/tasktoken" "go.temporal.io/server/common/testing/parallelsuite" "go.temporal.io/server/common/testing/protorequire" - "go.temporal.io/server/components/callbacks" "go.temporal.io/server/tests/testcore" "google.golang.org/grpc/codes" "google.golang.org/protobuf/types/known/durationpb" @@ -272,7 +271,7 @@ func (s *standaloneActivityTestSuite) TestIDConflictPolicy() { t.Run("OnConflictOptions", func(t *testing.T) { env.OverrideDynamicConfig( - callbacks.AllowedAddresses, + callback.AllowedAddresses, []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, ) @@ -5840,7 +5839,7 @@ func (s *standaloneActivityTestSuite) TestCallbacks() { defer cancel() env.OverrideDynamicConfig( - callbacks.AllowedAddresses, + callback.AllowedAddresses, []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, ) diff --git a/tests/workflow_test.go b/tests/workflow_test.go index 9d5f44046fc..bca1d369e59 100644 --- a/tests/workflow_test.go +++ b/tests/workflow_test.go @@ -21,6 +21,7 @@ import ( updatepb "go.temporal.io/api/update/v1" workflowpb "go.temporal.io/api/workflow/v1" "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/server/chasm/lib/callback" "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/failure" "go.temporal.io/server/common/headers" @@ -31,7 +32,6 @@ import ( "go.temporal.io/server/common/primitives/timestamp" "go.temporal.io/server/common/searchattribute/sadefs" "go.temporal.io/server/common/testing/testvars" - "go.temporal.io/server/components/callbacks" "go.temporal.io/server/tests/testcore" "google.golang.org/protobuf/types/known/durationpb" ) @@ -202,7 +202,7 @@ func (s *WorkflowTestSuite) TestStartWorkflowExecution_UseExisting() { } func (s *WorkflowTestSuite) TestStartWorkflowExecution_UseExisting_OnConflictOptions() { - s.OverrideDynamicConfig(callbacks.AllowedAddresses, []any{ + s.OverrideDynamicConfig(callback.AllowedAddresses, []any{ map[string]any{"Pattern": "some-secure-address", "AllowInsecure": false}, map[string]any{"Pattern": "some-random-address", "AllowInsecure": false}, }) diff --git a/tests/xdc/nexus_state_replication_test.go b/tests/xdc/nexus_state_replication_test.go index 3763e1fd203..f4c98ef1bf9 100644 --- a/tests/xdc/nexus_state_replication_test.go +++ b/tests/xdc/nexus_state_replication_test.go @@ -26,6 +26,7 @@ import ( workflowpb "go.temporal.io/api/workflow/v1" "go.temporal.io/api/workflowservice/v1" sdkclient "go.temporal.io/sdk/client" + "go.temporal.io/server/chasm/lib/callback" "go.temporal.io/server/common/dynamicconfig" commonnexus "go.temporal.io/server/common/nexus" "go.temporal.io/server/common/nexus/nexusrpc" @@ -70,7 +71,7 @@ func (s *NexusStateReplicationSuite) SetupSuite() { dynamicconfig.FrontendGlobalNamespaceNamespaceReplicationInducingAPIsRPS.Key(): 1000, dynamicconfig.RefreshNexusEndpointsMinWait.Key(): 1 * time.Millisecond, // tests use external endpoints so we need to allow them - callbacks.AllowedAddresses.Key(): []any{map[string]any{ + callback.AllowedAddresses.Key(): []any{map[string]any{ "Pattern": "*", "AllowInsecure": true, }}, // Cap callback retry backoff to avoid long waits after failover. From 573c2f787b9a60b3cdc4249216e1c78247b6f11e Mon Sep 17 00:00:00 2001 From: nikki-dag Date: Mon, 18 May 2026 10:12:25 -0500 Subject: [PATCH 51/73] Use proto-serialized context metadata in gRPC trailers (#10269) ## What changed - Replaced per-key trailer format with a single protobuf `ContextMetadata` message serialized into `contextmetadata-bin` trailer key - gRPC automatically base64-encodes the `-bin` value, making arbitrary bytes (including HTTP/2-unsafe control chars) transport-safe - Writer emits both proto format and legacy per-key format for backward compatibility during rolling deploys - Reader prefers proto key, falls back to legacy per-key format for old writers - Wired `TrailerToContextMetadataInterceptor` in test server to match production behavior ## Why Workflow type names containing control characters (newlines, NUL, etc.) cause the gRPC HTTP/2 framer to reject trailer values. A single proto message in a `-bin` key is simpler than per-key `-bin` suffixes: one trailer key, one serialization, no key naming constraints, cleaner backward compat removal. ## How tested - Unit tests for proto round-trip, dual-format emission, reader preference, legacy fallback, HTTP/2 safety - Integration test suite (TestWorkflowTypeEncodingSuite) with control chars, UTF-8, long names, -bin suffix workflow types - All existing tests pass ## Risks - During rolling deploy, old writers emit only legacy keys. New readers handle this via fallback path. No data loss. - After full rollout, legacy key emission can be removed in a follow-up. --- > [!NOTE] > **Medium Risk** > Changes how context metadata is encoded/decoded in gRPC trailers, which can affect cross-version compatibility and observability of propagated metadata. Backward-compatible legacy fallback and extensive unit/integration tests reduce the rollout risk. > > **Overview** > **Switches context-metadata propagation in gRPC trailers to a single proto-encoded payload.** Server-side `ContextMetadataInterceptor` now serializes all context metadata into a new `ContextMetadata` protobuf and emits it under `contextmetadata-bin`, avoiding HTTP/2-unsafe control characters in values. > > **Maintains rolling-deploy compatibility.** Writers still emit legacy per-key trailers (skipping unsafe values), and the client-side `TrailerToContextMetadataInterceptor` now *prefers* the proto trailer and falls back to legacy keys (including unprefixed well-known keys) when needed. > > Adds the new `contextpropagation/v1` proto + generated Go types, plus unit tests around proto/legacy behavior and an integration suite (`WorkflowTypeEncodingSuite`) covering control characters, UTF-8, long names, and `-bin` suffix workflow types. > > Reviewed by [Cursor Bugbot](https://cursor.com/bugbot) for commit e1d772fc3fe8e136fded479415f8f049ac896054. Bugbot is set up for automated code reviews on this repo. Configure [here](https://www.cursor.com/dashboard/bugbot). --------- Co-authored-by: Claude Opus 4.6 (1M context) --- .../v1/message.go-helpers.pb.go | 43 ++++++ api/contextpropagation/v1/message.pb.go | 133 ++++++++++++++++ common/contextutil/metadata.go | 6 +- .../context_metadata_interceptor.go | 85 +++++++++-- .../context_metadata_interceptor_test.go | 143 ++++++++++++++++++ ...trailer_to_context_metadata_interceptor.go | 79 +++++++--- ...er_to_context_metadata_interceptor_test.go | 131 ++++++++++++++++ .../api/contextpropagation/v1/message.proto | 14 ++ tests/workflow_type_encoding_test.go | 121 +++++++++++++++ 9 files changed, 724 insertions(+), 31 deletions(-) create mode 100644 api/contextpropagation/v1/message.go-helpers.pb.go create mode 100644 api/contextpropagation/v1/message.pb.go create mode 100644 proto/internal/temporal/server/api/contextpropagation/v1/message.proto create mode 100644 tests/workflow_type_encoding_test.go diff --git a/api/contextpropagation/v1/message.go-helpers.pb.go b/api/contextpropagation/v1/message.go-helpers.pb.go new file mode 100644 index 00000000000..46897596d57 --- /dev/null +++ b/api/contextpropagation/v1/message.go-helpers.pb.go @@ -0,0 +1,43 @@ +// Code generated by protoc-gen-go-helpers. DO NOT EDIT. +package contextpropagation + +import ( + "google.golang.org/protobuf/proto" +) + +// Marshal an object of type ContextMetadata to the protobuf v3 wire format +func (val *ContextMetadata) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type ContextMetadata from the protobuf v3 wire format +func (val *ContextMetadata) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *ContextMetadata) Size() int { + return proto.Size(val) +} + +// Equal returns whether two ContextMetadata values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *ContextMetadata) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *ContextMetadata + switch t := that.(type) { + case *ContextMetadata: + that1 = t + case ContextMetadata: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} diff --git a/api/contextpropagation/v1/message.pb.go b/api/contextpropagation/v1/message.pb.go new file mode 100644 index 00000000000..db5011403a3 --- /dev/null +++ b/api/contextpropagation/v1/message.pb.go @@ -0,0 +1,133 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// plugins: +// protoc-gen-go +// protoc +// source: temporal/server/api/contextpropagation/v1/message.proto + +package contextpropagation + +import ( + reflect "reflect" + sync "sync" + unsafe "unsafe" + + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// ContextMetadata carries all context metadata key-value pairs in a single +// protobuf message. It is serialized into a gRPC trailer under the +// "contextmetadata-bin" key. The "-bin" suffix causes gRPC to base64-encode +// the value on the wire, making it safe for arbitrary byte sequences +// (including HTTP/2-unsafe control characters in workflow type names). +type ContextMetadata struct { + state protoimpl.MessageState `protogen:"open.v1"` + Entries map[string]string `protobuf:"bytes,1,rep,name=entries,proto3" json:"entries,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ContextMetadata) Reset() { + *x = ContextMetadata{} + mi := &file_temporal_server_api_contextpropagation_v1_message_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ContextMetadata) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ContextMetadata) ProtoMessage() {} + +func (x *ContextMetadata) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_api_contextpropagation_v1_message_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ContextMetadata.ProtoReflect.Descriptor instead. +func (*ContextMetadata) Descriptor() ([]byte, []int) { + return file_temporal_server_api_contextpropagation_v1_message_proto_rawDescGZIP(), []int{0} +} + +func (x *ContextMetadata) GetEntries() map[string]string { + if x != nil { + return x.Entries + } + return nil +} + +var File_temporal_server_api_contextpropagation_v1_message_proto protoreflect.FileDescriptor + +const file_temporal_server_api_contextpropagation_v1_message_proto_rawDesc = "" + + "\n" + + "7temporal/server/api/contextpropagation/v1/message.proto\x12)temporal.server.api.contextpropagation.v1\"\xb0\x01\n" + + "\x0fContextMetadata\x12a\n" + + "\aentries\x18\x01 \x03(\v2G.temporal.server.api.contextpropagation.v1.ContextMetadata.EntriesEntryR\aentries\x1a:\n" + + "\fEntriesEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" + + "\x05value\x18\x02 \x01(\tR\x05value:\x028\x01BDZBgo.temporal.io/server/api/contextpropagation/v1;contextpropagationb\x06proto3" + +var ( + file_temporal_server_api_contextpropagation_v1_message_proto_rawDescOnce sync.Once + file_temporal_server_api_contextpropagation_v1_message_proto_rawDescData []byte +) + +func file_temporal_server_api_contextpropagation_v1_message_proto_rawDescGZIP() []byte { + file_temporal_server_api_contextpropagation_v1_message_proto_rawDescOnce.Do(func() { + file_temporal_server_api_contextpropagation_v1_message_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_temporal_server_api_contextpropagation_v1_message_proto_rawDesc), len(file_temporal_server_api_contextpropagation_v1_message_proto_rawDesc))) + }) + return file_temporal_server_api_contextpropagation_v1_message_proto_rawDescData +} + +var file_temporal_server_api_contextpropagation_v1_message_proto_msgTypes = make([]protoimpl.MessageInfo, 2) +var file_temporal_server_api_contextpropagation_v1_message_proto_goTypes = []any{ + (*ContextMetadata)(nil), // 0: temporal.server.api.contextpropagation.v1.ContextMetadata + nil, // 1: temporal.server.api.contextpropagation.v1.ContextMetadata.EntriesEntry +} +var file_temporal_server_api_contextpropagation_v1_message_proto_depIdxs = []int32{ + 1, // 0: temporal.server.api.contextpropagation.v1.ContextMetadata.entries:type_name -> temporal.server.api.contextpropagation.v1.ContextMetadata.EntriesEntry + 1, // [1:1] is the sub-list for method output_type + 1, // [1:1] is the sub-list for method input_type + 1, // [1:1] is the sub-list for extension type_name + 1, // [1:1] is the sub-list for extension extendee + 0, // [0:1] is the sub-list for field type_name +} + +func init() { file_temporal_server_api_contextpropagation_v1_message_proto_init() } +func file_temporal_server_api_contextpropagation_v1_message_proto_init() { + if File_temporal_server_api_contextpropagation_v1_message_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_temporal_server_api_contextpropagation_v1_message_proto_rawDesc), len(file_temporal_server_api_contextpropagation_v1_message_proto_rawDesc)), + NumEnums: 0, + NumMessages: 2, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_temporal_server_api_contextpropagation_v1_message_proto_goTypes, + DependencyIndexes: file_temporal_server_api_contextpropagation_v1_message_proto_depIdxs, + MessageInfos: file_temporal_server_api_contextpropagation_v1_message_proto_msgTypes, + }.Build() + File_temporal_server_api_contextpropagation_v1_message_proto = out.File + file_temporal_server_api_contextpropagation_v1_message_proto_goTypes = nil + file_temporal_server_api_contextpropagation_v1_message_proto_depIdxs = nil +} diff --git a/common/contextutil/metadata.go b/common/contextutil/metadata.go index 6717f22d36c..c6c2f319509 100644 --- a/common/contextutil/metadata.go +++ b/common/contextutil/metadata.go @@ -21,7 +21,11 @@ type ( var metadataCtxKey = metadataContextKey{} const ( - // MetadataKeyWorkflowType is the context metadata key for workflow type + // MetadataKeyWorkflowType is the context metadata key for workflow type. + // These keys are serialized into a protobuf message (ContextMetadata) and sent + // in the "contextmetadata-bin" gRPC trailer. The "-bin" suffix causes gRPC to + // base64-encode the value on the wire, so metadata values may contain arbitrary + // bytes including HTTP/2-unsafe control characters. MetadataKeyWorkflowType = "workflow-type" // MetadataKeyWorkflowTaskQueue is the context metadata key for workflow task queue MetadataKeyWorkflowTaskQueue = "workflow-task-queue" diff --git a/common/rpc/interceptor/context_metadata_interceptor.go b/common/rpc/interceptor/context_metadata_interceptor.go index 9d6fb51d0ca..1368d417d42 100644 --- a/common/rpc/interceptor/context_metadata_interceptor.go +++ b/common/rpc/interceptor/context_metadata_interceptor.go @@ -4,17 +4,25 @@ import ( "context" "fmt" + contextpropagationspb "go.temporal.io/server/api/contextpropagation/v1" "go.temporal.io/server/common/contextutil" "go.temporal.io/server/common/log" "go.temporal.io/server/common/log/tag" "google.golang.org/grpc" "google.golang.org/grpc/metadata" + "google.golang.org/protobuf/proto" ) // trailerKeyPrefix namespaces context metadata keys in gRPC trailers so the client-side // interceptor can distinguish them from gRPC-internal and other interceptor trailer keys. const trailerKeyPrefix = "contextmetadata-" +// protoTrailerKey is the gRPC trailer key that carries all context metadata as a single +// serialized protobuf. The "-bin" suffix tells gRPC to base64-encode the value on the +// wire, making it safe for arbitrary byte sequences (including HTTP/2-unsafe control +// characters in workflow type names). +const protoTrailerKey = "contextmetadata-bin" + type ContextMetadataInterceptor struct { setTrailer bool logger log.Logger @@ -63,24 +71,16 @@ func (c *ContextMetadataInterceptor) appendContextMetadataToTrailer(ctx context. default: } - var trailerPairs []string - - for key, value := range contextutil.ContextMetadataGetAll(ctx) { - valStr := fmt.Sprint(value) - trailerPairs = append(trailerPairs, trailerKeyPrefix+key, valStr) - // Backward compatibility: also emit unprefixed keys for older readers. - if key == contextutil.MetadataKeyWorkflowType || key == contextutil.MetadataKeyWorkflowTaskQueue { - trailerPairs = append(trailerPairs, key, valStr) - } - } - - if len(trailerPairs) == 0 { + allMetadata := contextutil.ContextMetadataGetAll(ctx) + if len(allMetadata) == 0 { c.throttledLogger.Info("ContextMetadataInterceptor: No metadata in context, not setting trailer", tag.NewStringTag("fullMethod", info.FullMethod), ) return } + trailerPairs := c.buildTrailerPairs(allMetadata) + trailer := metadata.Pairs(trailerPairs...) c.throttledLogger.Info("ContextMetadataInterceptor: Setting trailer", tag.NewAnyTag("trailer", trailer), @@ -93,3 +93,64 @@ func (c *ContextMetadataInterceptor) appendContextMetadataToTrailer(ctx context. tag.NewStringTag("fullMethod", info.FullMethod)) } } + +// buildTrailerPairs constructs the gRPC trailer key-value pairs from context metadata. +// +// It emits metadata in two formats: +// 1. Proto format: all entries serialized into a single ContextMetadata protobuf under +// the "contextmetadata-bin" key. The "-bin" suffix causes gRPC to base64-encode the +// value, making it safe for arbitrary bytes (including HTTP/2-unsafe control chars). +// 2. Legacy format (backward compatibility during rolling deploys): individual +// "contextmetadata-" entries plus unprefixed well-known keys. Old readers that +// don't understand the proto key will fall back to these. +func (c *ContextMetadataInterceptor) buildTrailerPairs(allMetadata map[string]any) []string { + var trailerPairs []string + + // Proto format: serialize all metadata into a single protobuf message. + protoMsg := &contextpropagationspb.ContextMetadata{ + Entries: make(map[string]string, len(allMetadata)), + } + for key, value := range allMetadata { + protoMsg.Entries[key] = fmt.Sprint(value) + } + if protoBytes, err := proto.Marshal(protoMsg); err != nil { + c.throttledLogger.Warn("ContextMetadataInterceptor: Failed to marshal proto metadata, falling back to legacy-only", + tag.Error(err), + ) + } else { + trailerPairs = append(trailerPairs, protoTrailerKey, string(protoBytes)) + } + + // Legacy format: emit individual keys for backward compatibility with older readers. + // Skip entries with HTTP/2-unsafe values (the proto key handles those). + for key, value := range allMetadata { + valStr := fmt.Sprint(value) + if !isHTTP2SafeValue(valStr) { + continue + } + trailerPairs = append(trailerPairs, trailerKeyPrefix+key, valStr) + // Backward compatibility: also emit unprefixed keys for older readers. + if key == contextutil.MetadataKeyWorkflowType || key == contextutil.MetadataKeyWorkflowTaskQueue { + trailerPairs = append(trailerPairs, key, valStr) + } + } + + return trailerPairs +} + +// isHTTP2SafeValue returns true if the string can be used as an HTTP/2 header value. +// Per RFC 9113 section 8.2.1, header field values must not contain NUL (0x00), +// CR (0x0D), or LF (0x0A). Additionally, Go's HTTP/2 framer rejects C0 control +// characters (0x00-0x1F except HTAB 0x09) and DEL (0x7F). +func isHTTP2SafeValue(s string) bool { + for i := 0; i < len(s); i++ { + b := s[i] + if b == 0x09 { // HTAB is allowed + continue + } + if b < 0x20 || b == 0x7f { // C0 controls and DEL + return false + } + } + return true +} diff --git a/common/rpc/interceptor/context_metadata_interceptor_test.go b/common/rpc/interceptor/context_metadata_interceptor_test.go index 2aa1305d1eb..a359a5b414e 100644 --- a/common/rpc/interceptor/context_metadata_interceptor_test.go +++ b/common/rpc/interceptor/context_metadata_interceptor_test.go @@ -6,10 +6,12 @@ import ( "testing" "github.com/stretchr/testify/require" + contextpropagationspb "go.temporal.io/server/api/contextpropagation/v1" "go.temporal.io/server/common/contextutil" "go.temporal.io/server/common/testing/testlogger" "google.golang.org/grpc" "google.golang.org/grpc/metadata" + "google.golang.org/protobuf/proto" ) func TestContextMetadataInterceptor_Intercept(t *testing.T) { @@ -261,3 +263,144 @@ func TestNewContextMetadataInterceptor(t *testing.T) { }) } } + +func TestBuildTrailerPairs_ProtoRoundTrip(t *testing.T) { + allMetadata := map[string]any{ + contextutil.MetadataKeyWorkflowType: "test-workflow", + contextutil.MetadataKeyWorkflowTaskQueue: "test-queue", + "other-key": "other-value", + } + + cmi := NewContextMetadataInterceptor(true, testlogger.NewTestLogger(t, testlogger.FailOnAnyUnexpectedError)) + pairs := cmi.buildTrailerPairs(allMetadata) + trailer := metadata.Pairs(pairs...) + + // Proto key must be present. + protoValues := trailer[protoTrailerKey] + require.Len(t, protoValues, 1, "expected exactly one proto trailer value") + + // Deserialize and verify round-trip. + protoMsg := &contextpropagationspb.ContextMetadata{} + err := proto.Unmarshal([]byte(protoValues[0]), protoMsg) + require.NoError(t, err) + require.Equal(t, "test-workflow", protoMsg.Entries[contextutil.MetadataKeyWorkflowType]) + require.Equal(t, "test-queue", protoMsg.Entries[contextutil.MetadataKeyWorkflowTaskQueue]) + require.Equal(t, "other-value", protoMsg.Entries["other-key"]) +} + +func TestBuildTrailerPairs_EmitsBothProtoAndLegacyKeys(t *testing.T) { + allMetadata := map[string]any{ + contextutil.MetadataKeyWorkflowType: "test-workflow", + contextutil.MetadataKeyWorkflowTaskQueue: "test-queue", + "other-key": "other-value", + } + + cmi := NewContextMetadataInterceptor(true, testlogger.NewTestLogger(t, testlogger.FailOnAnyUnexpectedError)) + pairs := cmi.buildTrailerPairs(allMetadata) + trailer := metadata.Pairs(pairs...) + + // Proto key present. + require.Contains(t, trailer, protoTrailerKey) + + // Legacy prefixed keys present. + require.Contains(t, trailer, trailerKeyPrefix+contextutil.MetadataKeyWorkflowType) + require.Contains(t, trailer, trailerKeyPrefix+contextutil.MetadataKeyWorkflowTaskQueue) + require.Contains(t, trailer, trailerKeyPrefix+"other-key") + + // Well-known unprefixed keys also present (backward compat). + require.Contains(t, trailer, contextutil.MetadataKeyWorkflowType) + require.Contains(t, trailer, contextutil.MetadataKeyWorkflowTaskQueue) + + // Non-well-known keys only appear with prefix (no unprefixed "other-key"). + require.NotContains(t, trailer, "other-key") +} + +func TestBuildTrailerPairs_EmptyMetadata(t *testing.T) { + allMetadata := map[string]any{} + cmi := NewContextMetadataInterceptor(true, testlogger.NewTestLogger(t, testlogger.FailOnAnyUnexpectedError)) + pairs := cmi.buildTrailerPairs(allMetadata) + // Even with empty metadata, proto is serialized (valid empty message). + trailer := metadata.Pairs(pairs...) + require.Contains(t, trailer, protoTrailerKey) +} + +func TestBuildTrailerPairs_NonStringValues(t *testing.T) { + allMetadata := map[string]any{ + contextutil.MetadataKeyWorkflowType: 12345, + contextutil.MetadataKeyWorkflowTaskQueue: struct{ name string }{name: "queue"}, + } + + cmi := NewContextMetadataInterceptor(true, testlogger.NewTestLogger(t, testlogger.FailOnAnyUnexpectedError)) + pairs := cmi.buildTrailerPairs(allMetadata) + trailer := metadata.Pairs(pairs...) + + // Proto key present with fmt.Sprint values. + protoValues := trailer[protoTrailerKey] + require.Len(t, protoValues, 1) + + protoMsg := &contextpropagationspb.ContextMetadata{} + err := proto.Unmarshal([]byte(protoValues[0]), protoMsg) + require.NoError(t, err) + require.Equal(t, fmt.Sprint(12345), protoMsg.Entries[contextutil.MetadataKeyWorkflowType]) +} + +func TestBuildTrailerPairs_ControlCharsInValues(t *testing.T) { + allMetadata := map[string]any{ + contextutil.MetadataKeyWorkflowType: "workflow\nwith\x00control\rchars", + } + + cmi := NewContextMetadataInterceptor(true, testlogger.NewTestLogger(t, testlogger.FailOnAnyUnexpectedError)) + pairs := cmi.buildTrailerPairs(allMetadata) + trailer := metadata.Pairs(pairs...) + + // Proto key must be present and correctly round-trip. + protoValues := trailer[protoTrailerKey] + require.Len(t, protoValues, 1) + + protoMsg := &contextpropagationspb.ContextMetadata{} + err := proto.Unmarshal([]byte(protoValues[0]), protoMsg) + require.NoError(t, err) + require.Equal(t, "workflow\nwith\x00control\rchars", protoMsg.Entries[contextutil.MetadataKeyWorkflowType]) + + // Legacy keys must NOT be present for HTTP/2-unsafe values. + require.NotContains(t, trailer, trailerKeyPrefix+contextutil.MetadataKeyWorkflowType) + require.NotContains(t, trailer, contextutil.MetadataKeyWorkflowType) +} + +func TestBuildTrailerPairs_MixedSafeAndUnsafeValues(t *testing.T) { + allMetadata := map[string]any{ + contextutil.MetadataKeyWorkflowType: "workflow\nwith\nnewlines", + contextutil.MetadataKeyWorkflowTaskQueue: "safe-queue-name", + } + + cmi := NewContextMetadataInterceptor(true, testlogger.NewTestLogger(t, testlogger.FailOnAnyUnexpectedError)) + pairs := cmi.buildTrailerPairs(allMetadata) + trailer := metadata.Pairs(pairs...) + + // Proto key carries both entries. + protoValues := trailer[protoTrailerKey] + require.Len(t, protoValues, 1) + protoMsg := &contextpropagationspb.ContextMetadata{} + err := proto.Unmarshal([]byte(protoValues[0]), protoMsg) + require.NoError(t, err) + require.Equal(t, "workflow\nwith\nnewlines", protoMsg.Entries[contextutil.MetadataKeyWorkflowType]) + require.Equal(t, "safe-queue-name", protoMsg.Entries[contextutil.MetadataKeyWorkflowTaskQueue]) + + // Unsafe workflow type: legacy keys skipped. + require.NotContains(t, trailer, trailerKeyPrefix+contextutil.MetadataKeyWorkflowType) + + // Safe task queue: legacy keys present. + require.Contains(t, trailer, trailerKeyPrefix+contextutil.MetadataKeyWorkflowTaskQueue) + require.Contains(t, trailer, contextutil.MetadataKeyWorkflowTaskQueue) +} + +func TestIsHTTP2SafeValue(t *testing.T) { + require.True(t, isHTTP2SafeValue("hello world")) + require.True(t, isHTTP2SafeValue("value with\ttab")) + require.True(t, isHTTP2SafeValue("")) + require.False(t, isHTTP2SafeValue("has\nnewline")) + require.False(t, isHTTP2SafeValue("has\x00null")) + require.False(t, isHTTP2SafeValue("has\rcarriage")) + require.False(t, isHTTP2SafeValue("has\x07bell")) + require.False(t, isHTTP2SafeValue("has\x7fDEL")) +} diff --git a/common/rpc/interceptor/trailer_to_context_metadata_interceptor.go b/common/rpc/interceptor/trailer_to_context_metadata_interceptor.go index a69014b6d4a..7594f40478b 100644 --- a/common/rpc/interceptor/trailer_to_context_metadata_interceptor.go +++ b/common/rpc/interceptor/trailer_to_context_metadata_interceptor.go @@ -4,11 +4,13 @@ import ( "context" "strings" + contextpropagationspb "go.temporal.io/server/api/contextpropagation/v1" "go.temporal.io/server/common/contextutil" "go.temporal.io/server/common/log" "go.temporal.io/server/common/log/tag" "google.golang.org/grpc" "google.golang.org/grpc/metadata" + "google.golang.org/protobuf/proto" ) // TrailerToContextMetadataInterceptor reads metadata from gRPC response trailers @@ -33,32 +35,73 @@ func TrailerToContextMetadataInterceptor(logger log.Logger) grpc.UnaryClientInte err := invoker(ctx, method, req, reply, cc, opts...) - trailerMetadata := make(map[string]string) - propagatedMetadata := make(map[string]string) + trailerMetadata, propagatedMetadata := extractMetadataFromTrailer(ctx, trailer, throttledLogger) - for prefixedKey, values := range trailer { - key, ok := strings.CutPrefix(prefixedKey, trailerKeyPrefix) - if !ok { - // Backward compatibility: accept unprefixed keys from older writers. - if prefixedKey != contextutil.MetadataKeyWorkflowType && prefixedKey != contextutil.MetadataKeyWorkflowTaskQueue { - continue + logMetadataPropagationStatus(ctx, method, trailerMetadata, propagatedMetadata, throttledLogger) + + return err + } +} + +// extractMetadataFromTrailer reads context metadata from gRPC response trailers. +// +// It first checks for the proto-encoded "contextmetadata-bin" key, which carries all +// metadata in a single base64-encoded protobuf. If present, this is authoritative. +// Otherwise, it falls back to the legacy per-key format for backward compatibility +// with older writers during rolling deploys. +func extractMetadataFromTrailer( + ctx context.Context, + trailer metadata.MD, + throttledLogger log.ThrottledLogger, +) (trailerMetadata map[string]string, propagatedMetadata map[string]string) { + trailerMetadata = make(map[string]string) + propagatedMetadata = make(map[string]string) + + // Try proto format first (authoritative). + if values := trailer[protoTrailerKey]; len(values) > 0 { + protoMsg := &contextpropagationspb.ContextMetadata{} + err := proto.Unmarshal([]byte(values[0]), protoMsg) + if err != nil { + throttledLogger.Warn("TrailerToContextMetadataInterceptor: Failed to unmarshal proto trailer, falling back to legacy", + tag.Error(err), + ) + } + if err == nil { + for key, value := range protoMsg.GetEntries() { + trailerMetadata[key] = value + if contextutil.ContextMetadataSet(ctx, key, value) { + propagatedMetadata[key] = value } - key = prefixedKey - } - if len(values) == 0 { - continue } + return trailerMetadata, propagatedMetadata + } + } - trailerMetadata[key] = values[0] - if contextutil.ContextMetadataSet(ctx, key, values[0]) { - propagatedMetadata[key] = values[0] + // Fallback: legacy per-key format for backward compatibility with older writers. + for prefixedKey, values := range trailer { + // Skip the proto trailer key itself in the legacy path. + if prefixedKey == protoTrailerKey { + continue + } + key, ok := strings.CutPrefix(prefixedKey, trailerKeyPrefix) + if !ok { + // Backward compatibility: accept unprefixed keys from older writers. + if prefixedKey != contextutil.MetadataKeyWorkflowType && prefixedKey != contextutil.MetadataKeyWorkflowTaskQueue { + continue } + key = prefixedKey + } + if len(values) == 0 { + continue } - logMetadataPropagationStatus(ctx, method, trailerMetadata, propagatedMetadata, throttledLogger) - - return err + trailerMetadata[key] = values[0] + if contextutil.ContextMetadataSet(ctx, key, values[0]) { + propagatedMetadata[key] = values[0] + } } + + return trailerMetadata, propagatedMetadata } func logMetadataPropagationStatus( diff --git a/common/rpc/interceptor/trailer_to_context_metadata_interceptor_test.go b/common/rpc/interceptor/trailer_to_context_metadata_interceptor_test.go index 163bfa358eb..075f70bdc7b 100644 --- a/common/rpc/interceptor/trailer_to_context_metadata_interceptor_test.go +++ b/common/rpc/interceptor/trailer_to_context_metadata_interceptor_test.go @@ -6,11 +6,13 @@ import ( "testing" "github.com/stretchr/testify/require" + contextpropagationspb "go.temporal.io/server/api/contextpropagation/v1" "go.temporal.io/server/common/contextutil" "go.temporal.io/server/common/log" "go.temporal.io/server/common/testing/testlogger" "google.golang.org/grpc" "google.golang.org/grpc/metadata" + "google.golang.org/protobuf/proto" ) func TestTrailerToContextMetadataInterceptor(t *testing.T) { @@ -416,3 +418,132 @@ func TestLogMetadataPropagationStatus(t *testing.T) { }) } } + +func TestExtractMetadataFromTrailer_PrefersProtoKey(t *testing.T) { + // Proto key contains "proto-workflow" while legacy keys contain "legacy-workflow". + // Reader should prefer the proto key. + protoMsg := &contextpropagationspb.ContextMetadata{ + Entries: map[string]string{ + contextutil.MetadataKeyWorkflowType: "proto-workflow", + contextutil.MetadataKeyWorkflowTaskQueue: "proto-queue", + }, + } + protoBytes, err := proto.Marshal(protoMsg) + require.NoError(t, err) + + trailer := metadata.MD{ + protoTrailerKey: []string{string(protoBytes)}, + trailerKeyPrefix + contextutil.MetadataKeyWorkflowType: []string{"legacy-workflow"}, + trailerKeyPrefix + contextutil.MetadataKeyWorkflowTaskQueue: []string{"legacy-queue"}, + } + + ctx := contextutil.WithMetadataContext(t.Context()) + trailerMeta, propagatedMeta := extractMetadataFromTrailer(ctx, trailer, log.NewThrottledLogger(testlogger.NewTestLogger(t, testlogger.FailOnAnyUnexpectedError), func() float64 { return 1.0 })) + + require.Equal(t, "proto-workflow", trailerMeta[contextutil.MetadataKeyWorkflowType]) + require.Equal(t, "proto-queue", trailerMeta[contextutil.MetadataKeyWorkflowTaskQueue]) + require.Equal(t, "proto-workflow", propagatedMeta[contextutil.MetadataKeyWorkflowType]) + require.Equal(t, "proto-queue", propagatedMeta[contextutil.MetadataKeyWorkflowTaskQueue]) + + // Verify context was set with proto values. + val, ok := contextutil.ContextMetadataGet(ctx, contextutil.MetadataKeyWorkflowType) + require.True(t, ok) + require.Equal(t, "proto-workflow", val) +} + +func TestExtractMetadataFromTrailer_FallsBackToLegacy(t *testing.T) { + // No proto key present; reader should fall back to legacy format. + trailer := metadata.MD{ + trailerKeyPrefix + contextutil.MetadataKeyWorkflowType: []string{"legacy-workflow"}, + trailerKeyPrefix + contextutil.MetadataKeyWorkflowTaskQueue: []string{"legacy-queue"}, + } + + ctx := contextutil.WithMetadataContext(t.Context()) + trailerMeta, propagatedMeta := extractMetadataFromTrailer(ctx, trailer, log.NewThrottledLogger(testlogger.NewTestLogger(t, testlogger.FailOnAnyUnexpectedError), func() float64 { return 1.0 })) + + require.Equal(t, "legacy-workflow", trailerMeta[contextutil.MetadataKeyWorkflowType]) + require.Equal(t, "legacy-queue", trailerMeta[contextutil.MetadataKeyWorkflowTaskQueue]) + require.Equal(t, "legacy-workflow", propagatedMeta[contextutil.MetadataKeyWorkflowType]) + require.Equal(t, "legacy-queue", propagatedMeta[contextutil.MetadataKeyWorkflowTaskQueue]) +} + +func TestExtractMetadataFromTrailer_ProtoRoundTrip(t *testing.T) { + // Build trailer pairs using the writer, then read them back using the reader. + allMetadata := map[string]any{ + contextutil.MetadataKeyWorkflowType: "test-workflow", + contextutil.MetadataKeyWorkflowTaskQueue: "test-queue", + "custom-key": "custom-value", + } + + cmi := NewContextMetadataInterceptor(true, testlogger.NewTestLogger(t, testlogger.FailOnAnyUnexpectedError)) + pairs := cmi.buildTrailerPairs(allMetadata) + trailer := metadata.Pairs(pairs...) + + ctx := contextutil.WithMetadataContext(t.Context()) + trailerMeta, propagatedMeta := extractMetadataFromTrailer(ctx, trailer, log.NewThrottledLogger(testlogger.NewTestLogger(t, testlogger.FailOnAnyUnexpectedError), func() float64 { return 1.0 })) + + require.Equal(t, "test-workflow", trailerMeta[contextutil.MetadataKeyWorkflowType]) + require.Equal(t, "test-queue", trailerMeta[contextutil.MetadataKeyWorkflowTaskQueue]) + require.Equal(t, "custom-value", trailerMeta["custom-key"]) + require.Equal(t, trailerMeta, propagatedMeta) +} + +func TestExtractMetadataFromTrailer_EmptyProtoMetadata(t *testing.T) { + protoMsg := &contextpropagationspb.ContextMetadata{ + Entries: map[string]string{}, + } + protoBytes, err := proto.Marshal(protoMsg) + require.NoError(t, err) + + trailer := metadata.MD{ + protoTrailerKey: []string{string(protoBytes)}, + } + + ctx := contextutil.WithMetadataContext(t.Context()) + trailerMeta, propagatedMeta := extractMetadataFromTrailer(ctx, trailer, log.NewThrottledLogger(testlogger.NewTestLogger(t, testlogger.FailOnAnyUnexpectedError), func() float64 { return 1.0 })) + + require.Empty(t, trailerMeta) + require.Empty(t, propagatedMeta) +} + +func TestExtractMetadataFromTrailer_InvalidProtoFallsBackToLegacy(t *testing.T) { + // Invalid proto bytes should cause fallback to legacy format. + trailer := metadata.MD{ + protoTrailerKey: []string{"this-is-not-valid-proto"}, + trailerKeyPrefix + contextutil.MetadataKeyWorkflowType: []string{"legacy-workflow"}, + trailerKeyPrefix + contextutil.MetadataKeyWorkflowTaskQueue: []string{"legacy-queue"}, + } + + tl := testlogger.NewTestLogger(t, testlogger.FailOnAnyUnexpectedError) + tl.Expect(testlogger.Warn, "TrailerToContextMetadataInterceptor: Failed to unmarshal proto trailer, falling back to legacy") + ctx := contextutil.WithMetadataContext(t.Context()) + trailerMeta, propagatedMeta := extractMetadataFromTrailer(ctx, trailer, log.NewThrottledLogger(tl, func() float64 { return 1.0 })) + + require.Equal(t, "legacy-workflow", trailerMeta[contextutil.MetadataKeyWorkflowType]) + require.Equal(t, "legacy-queue", trailerMeta[contextutil.MetadataKeyWorkflowTaskQueue]) + require.Equal(t, "legacy-workflow", propagatedMeta[contextutil.MetadataKeyWorkflowType]) + require.Equal(t, "legacy-queue", propagatedMeta[contextutil.MetadataKeyWorkflowTaskQueue]) +} + +func TestExtractMetadataFromTrailer_ControlCharsInProto(t *testing.T) { + protoMsg := &contextpropagationspb.ContextMetadata{ + Entries: map[string]string{ + contextutil.MetadataKeyWorkflowType: "workflow\nwith\x00control\rchars", + }, + } + protoBytes, err := proto.Marshal(protoMsg) + require.NoError(t, err) + + trailer := metadata.MD{ + protoTrailerKey: []string{string(protoBytes)}, + } + + ctx := contextutil.WithMetadataContext(t.Context()) + trailerMeta, _ := extractMetadataFromTrailer(ctx, trailer, log.NewThrottledLogger(testlogger.NewTestLogger(t, testlogger.FailOnAnyUnexpectedError), func() float64 { return 1.0 })) + + require.Equal(t, "workflow\nwith\x00control\rchars", trailerMeta[contextutil.MetadataKeyWorkflowType]) + + val, ok := contextutil.ContextMetadataGet(ctx, contextutil.MetadataKeyWorkflowType) + require.True(t, ok) + require.Equal(t, "workflow\nwith\x00control\rchars", val) +} diff --git a/proto/internal/temporal/server/api/contextpropagation/v1/message.proto b/proto/internal/temporal/server/api/contextpropagation/v1/message.proto new file mode 100644 index 00000000000..06c978d7a2b --- /dev/null +++ b/proto/internal/temporal/server/api/contextpropagation/v1/message.proto @@ -0,0 +1,14 @@ +syntax = "proto3"; + +package temporal.server.api.contextpropagation.v1; + +option go_package = "go.temporal.io/server/api/contextpropagation/v1;contextpropagation"; + +// ContextMetadata carries all context metadata key-value pairs in a single +// protobuf message. It is serialized into a gRPC trailer under the +// "contextmetadata-bin" key. The "-bin" suffix causes gRPC to base64-encode +// the value on the wire, making it safe for arbitrary byte sequences +// (including HTTP/2-unsafe control characters in workflow type names). +message ContextMetadata { + map entries = 1; +} diff --git a/tests/workflow_type_encoding_test.go b/tests/workflow_type_encoding_test.go new file mode 100644 index 00000000000..23ce83746fb --- /dev/null +++ b/tests/workflow_type_encoding_test.go @@ -0,0 +1,121 @@ +package tests + +import ( + "fmt" + "strings" + "testing" + + sdkclient "go.temporal.io/sdk/client" + "go.temporal.io/sdk/workflow" + "go.temporal.io/server/common/testing/parallelsuite" + "go.temporal.io/server/tests/testcore" +) + +// WorkflowTypeEncodingSuite verifies that workflow type names containing +// arbitrary bytes — including HTTP/2-unsafe control characters and multi-byte +// UTF-8 — can be used end-to-end without breaking gRPC transport. +// +// Workflow type names flow through several layers (mutable state, context +// metadata, gRPC response trailers via ContextMetadataInterceptor) and any +// layer that places raw strings into HTTP/2 headers will reject C0 control +// bytes (0x00-0x1F except HTAB 0x09) and DEL (0x7F). The server-side fix +// serializes all context metadata into a single protobuf message under the +// "contextmetadata-bin" gRPC trailer key. The "-bin" suffix causes gRPC to +// base64-encode the value on the wire (RFC 4648), making it safe for +// arbitrary byte sequences. +type WorkflowTypeEncodingSuite struct { + parallelsuite.Suite[*WorkflowTypeEncodingSuite] +} + +func TestWorkflowTypeEncodingSuite(t *testing.T) { + parallelsuite.Run(t, &WorkflowTypeEncodingSuite{}) +} + +func (s *WorkflowTypeEncodingSuite) runWithWorkflowType(env *testcore.TestEnv, workflowType string) error { + env.SdkWorker().RegisterWorkflowWithOptions( + func(ctx workflow.Context) error { return nil }, + workflow.RegisterOptions{Name: workflowType}, + ) + + run, err := env.SdkClient().ExecuteWorkflow( + env.Context(), + sdkclient.StartWorkflowOptions{ + ID: testcore.RandomizeStr("wf-trailer"), + TaskQueue: env.WorkerTaskQueue(), + }, + workflowType, + ) + if err != nil { + return err + } + return run.Get(env.Context(), nil) +} + +func (s *WorkflowTypeEncodingSuite) TestPlainASCII() { + s.Run("Succeeds", func(s *WorkflowTypeEncodingSuite) { + env := testcore.NewEnv(s.T()) + s.NoError(s.runWithWorkflowType(env, "PlainAsciiWorkflowType")) + }) +} + +func (s *WorkflowTypeEncodingSuite) TestControlCharsInWorkflowType() { + cases := []struct { + label string + char string + }{ + {"HTAB (safe control char)", "\t"}, + {"newline", "\n"}, + {"carriage return", "\r"}, + {"CRLF", "\r\n"}, + {"NUL", "\x00"}, + {"bell", "\x07"}, + {"escape", "\x1b"}, + {"DEL", "\x7f"}, + } + for _, tc := range cases { + s.Run(fmt.Sprintf("control char %s succeeds", tc.label), func(s *WorkflowTypeEncodingSuite) { + env := testcore.NewEnv(s.T()) + s.NoError(s.runWithWorkflowType(env, "Foo"+tc.char+"Bar")) + }) + } +} + +func (s *WorkflowTypeEncodingSuite) TestAllControlCharsWorkflowType() { + s.Run("only control chars succeeds", func(s *WorkflowTypeEncodingSuite) { + env := testcore.NewEnv(s.T()) + s.NoError(s.runWithWorkflowType(env, "\n\x00\r")) + }) +} + +func (s *WorkflowTypeEncodingSuite) TestLongWorkflowType() { + s.Run("succeeds", func(s *WorkflowTypeEncodingSuite) { + env := testcore.NewEnv(s.T()) + longName := strings.Repeat("a", 999) + s.NoError(s.runWithWorkflowType(env, longName)) + }) +} + +func (s *WorkflowTypeEncodingSuite) TestWorkflowTypeEndingInBin() { + s.Run("succeeds", func(s *WorkflowTypeEncodingSuite) { + env := testcore.NewEnv(s.T()) + s.NoError(s.runWithWorkflowType(env, "my-workflow-bin")) + }) +} + +func (s *WorkflowTypeEncodingSuite) TestUTF8WorkflowType() { + cases := []struct { + label string + workflowType string + }{ + {"CJK", "Workflow-日本語"}, + {"emoji", "🚀-workflow"}, + {"accented", "cafe-resume"}, + {"mixed", "uber-naive-🎉-工作流"}, + } + for _, tc := range cases { + s.Run(fmt.Sprintf("UTF-8 %s succeeds", tc.label), func(s *WorkflowTypeEncodingSuite) { + env := testcore.NewEnv(s.T()) + s.NoError(s.runWithWorkflowType(env, tc.workflowType)) + }) + } +} From 2886bf602c11113ffb6d0a5b1053a5d345968d85 Mon Sep 17 00:00:00 2001 From: stuart-wells Date: Mon, 18 May 2026 09:13:15 -0700 Subject: [PATCH 52/73] Supporting default TLS config for remote clusters (#9932) ## What changed? Adding support for a fallback/default TLS section in the client config for remote clusters ## Why? When adding a new server, a user would have to add the new hostname to the config. This change allows for new servers to have a default cert, while still preserving the existing behavior. Closes #9881 ## How did you test it? - [x] built - [x] run locally and tested manually - [x] covered by existing tests - [x] added new unit test(s) - [ ] added new functional test(s) --------- Co-authored-by: stuart-wells --- .../rpc/encryption/fixedTLSConfigProvider.go | 5 +- .../encryption/local_store_tls_provider.go | 40 +++- common/rpc/encryption/tls_config_test.go | 173 ++++++++++++++++++ 3 files changed, 212 insertions(+), 6 deletions(-) diff --git a/common/rpc/encryption/fixedTLSConfigProvider.go b/common/rpc/encryption/fixedTLSConfigProvider.go index adeb01a55a5..5cc98e5db76 100644 --- a/common/rpc/encryption/fixedTLSConfigProvider.go +++ b/common/rpc/encryption/fixedTLSConfigProvider.go @@ -41,7 +41,10 @@ func (f *FixedTLSConfigProvider) GetFrontendClientConfig() (*tls.Config, error) // GetRemoteClusterClientConfig implements [TLSConfigProvider.GetRemoteClusterClientConfig]. func (f *FixedTLSConfigProvider) GetRemoteClusterClientConfig(hostname string) (*tls.Config, error) { - return f.RemoteClusterClientConfigs[hostname], nil + if key, ok := matchRemoteClusterKey(hostname, f.RemoteClusterClientConfigs); ok { + return f.RemoteClusterClientConfigs[key], nil + } + return nil, nil } // GetExpiringCerts implements [TLSConfigProvider.GetExpiringCerts]. diff --git a/common/rpc/encryption/local_store_tls_provider.go b/common/rpc/encryption/local_store_tls_provider.go index dab35c79fa8..05dd14431b0 100644 --- a/common/rpc/encryption/local_store_tls_provider.go +++ b/common/rpc/encryption/local_store_tls_provider.go @@ -4,6 +4,9 @@ import ( "crypto/tls" "crypto/x509" "fmt" + "path" + "sort" + "strings" "sync" "time" @@ -61,8 +64,8 @@ func NewLocalStoreTlsProvider(tlsConfig *config.RootTLS, metricsHandler metrics. } remoteClusterClientCertProvider := make(map[string]CertProvider) - for hostname, groupTLS := range tlsConfig.RemoteClusters { - remoteClusterClientCertProvider[hostname] = certProviderFactory(&groupTLS, nil, nil, tlsConfig.RefreshInterval, logger) + for key, groupTLS := range tlsConfig.RemoteClusters { + remoteClusterClientCertProvider[key] = certProviderFactory(&groupTLS, nil, nil, tlsConfig.RefreshInterval, logger) } provider := &localStoreTlsProvider{ @@ -139,16 +142,16 @@ func (s *localStoreTlsProvider) GetFrontendClientConfig() (*tls.Config, error) { } func (s *localStoreTlsProvider) GetRemoteClusterClientConfig(hostname string) (*tls.Config, error) { - groupTLS, ok := s.settings.RemoteClusters[hostname] + key, ok := matchRemoteClusterKey(hostname, s.settings.RemoteClusters) if !ok { return nil, nil } - + groupTLS := s.settings.RemoteClusters[key] return s.getOrCreateRemoteClusterClientConfig( hostname, func() (*tls.Config, error) { return newClientTLSConfig( - s.remoteClusterClientCertProvider[hostname], + s.remoteClusterClientCertProvider[key], groupTLS.Client.ServerName, groupTLS.Server.RequireClientAuth, false, @@ -475,3 +478,30 @@ func isSystemWorker(tls *config.RootTLS) bool { len(tls.SystemWorker.Client.RootCAData) > 0 || len(tls.SystemWorker.Client.RootCAFiles) > 0 || tls.SystemWorker.Client.ForceTLS } + +// matchRemoteClusterKey checks exact matches, then finds the match with the most non-wildcard characters +func matchRemoteClusterKey[V any](hostname string, m map[string]V) (string, bool) { + if _, ok := m[hostname]; ok && !strings.Contains(hostname, "*") { + return hostname, true + } + var wildcards []string + for k := range m { + if strings.Contains(k, "*") { + wildcards = append(wildcards, k) + } + } + sort.Slice(wildcards, func(i, j int) bool { + li := len(wildcards[i]) - strings.Count(wildcards[i], "*") + lj := len(wildcards[j]) - strings.Count(wildcards[j], "*") + if li != lj { + return li > lj + } + return wildcards[i] < wildcards[j] + }) + for _, k := range wildcards { + if matched, err := path.Match(k, hostname); err == nil && matched { + return k, true + } + } + return "", false +} diff --git a/common/rpc/encryption/tls_config_test.go b/common/rpc/encryption/tls_config_test.go index 12d728be961..feb2f6b5bd5 100644 --- a/common/rpc/encryption/tls_config_test.go +++ b/common/rpc/encryption/tls_config_test.go @@ -1,11 +1,16 @@ package encryption import ( + "crypto/tls" + "crypto/x509" "testing" + "time" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "go.temporal.io/server/common/config" + "go.temporal.io/server/common/log" + "go.temporal.io/server/common/metrics" ) type ( @@ -218,3 +223,171 @@ func (s *tlsConfigTest) TestSystemWorkerTLSConfig() { client.RootCAData = []string{""} s.Error(validateRootTLS(cfg)) } + +// stubCertProvider is a no-op CertProvider for use in unit tests. +type stubCertProvider struct{} + +func (s *stubCertProvider) FetchServerCertificate() (*tls.Certificate, error) { return nil, nil } +func (s *stubCertProvider) FetchClientCAs() (*x509.CertPool, error) { return nil, nil } +func (s *stubCertProvider) FetchClientCertificate(_ bool) (*tls.Certificate, error) { + return nil, nil +} +func (s *stubCertProvider) FetchServerRootCAsForClient(_ bool) (*x509.CertPool, error) { + return nil, nil +} +func (s *stubCertProvider) GetExpiringCerts(_ time.Duration) (expiring CertExpirationMap, expired CertExpirationMap, err error) { + return nil, nil, nil +} + +func stubCertProviderFactory(_ *config.GroupTLS, _ *config.WorkerTLS, _ *config.ClientTLS, _ time.Duration, _ log.Logger) CertProvider { + return &stubCertProvider{} +} + +func newTestTLSProvider(t *testing.T, cfg config.RootTLS) TLSConfigProvider { + t.Helper() + provider, err := NewLocalStoreTlsProvider(&cfg, metrics.NoopMetricsHandler, log.NewTestLogger(), stubCertProviderFactory) + require.NoError(t, err) + return provider +} + +func TestGetRemoteClusterClientConfig_NoConfig(t *testing.T) { + provider := newTestTLSProvider(t, config.RootTLS{}) + tlsCfg, err := provider.GetRemoteClusterClientConfig("some-host") + require.NoError(t, err) + require.Nil(t, tlsCfg) +} + +func TestGetRemoteClusterClientConfig_UnknownHostNoDefault(t *testing.T) { + cfg := config.RootTLS{ + RemoteClusters: map[string]config.GroupTLS{ + "cluster-a.example.com": {Client: config.ClientTLS{ForceTLS: true}}, + }, + } + provider := newTestTLSProvider(t, cfg) + + tlsCfg, err := provider.GetRemoteClusterClientConfig("unknown-host.example.com") + require.NoError(t, err) + require.Nil(t, tlsCfg) +} + +func TestGetRemoteClusterClientConfig_ExactMatch(t *testing.T) { + cfg := config.RootTLS{ + RemoteClusters: map[string]config.GroupTLS{ + "cluster-a.example.com": {Client: config.ClientTLS{ForceTLS: true}}, + }, + } + provider := newTestTLSProvider(t, cfg) + + tlsCfg, err := provider.GetRemoteClusterClientConfig("cluster-a.example.com") + require.NoError(t, err) + require.NotNil(t, tlsCfg) + + // Unknown host with no default → nil + tlsCfg, err = provider.GetRemoteClusterClientConfig("cluster-b.example.com") + require.NoError(t, err) + require.Nil(t, tlsCfg) +} + +func TestGetRemoteClusterClientConfig_StarFallback(t *testing.T) { + cfg := config.RootTLS{ + RemoteClusters: map[string]config.GroupTLS{ + "*": {Client: config.ClientTLS{ForceTLS: true}}, + }, + } + provider := newTestTLSProvider(t, cfg) + + tlsCfg, err := provider.GetRemoteClusterClientConfig("any-unknown-host") + require.NoError(t, err) + require.NotNil(t, tlsCfg) +} + +func TestGetRemoteClusterClientConfig_ExactOverStar(t *testing.T) { + cfg := config.RootTLS{ + RemoteClusters: map[string]config.GroupTLS{ + "cluster-a.example.com": {Client: config.ClientTLS{ForceTLS: false}}, + "*": {Client: config.ClientTLS{ForceTLS: true}}, + }, + } + provider := newTestTLSProvider(t, cfg) + + // Exact match → nil (ForceTLS: false, so IsClientEnabled() == false) + tlsCfg, err := provider.GetRemoteClusterClientConfig("cluster-a.example.com") + require.NoError(t, err) + require.Nil(t, tlsCfg) + + // Unknown host falls back to * (ForceTLS: true) → non-nil + tlsCfg, err = provider.GetRemoteClusterClientConfig("unknown-host") + require.NoError(t, err) + require.NotNil(t, tlsCfg) +} + +func TestGetRemoteClusterClientConfig_WildcardSubdomainMatch(t *testing.T) { + cfg := config.RootTLS{ + RemoteClusters: map[string]config.GroupTLS{ + "*.temporal.cloud": {Client: config.ClientTLS{ForceTLS: true}}, + }, + } + provider := newTestTLSProvider(t, cfg) + + tlsCfg, err := provider.GetRemoteClusterClientConfig("cluster-a.temporal.cloud") + require.NoError(t, err) + require.NotNil(t, tlsCfg) + + tlsCfg, err = provider.GetRemoteClusterClientConfig("cluster-b.temporal.cloud") + require.NoError(t, err) + require.NotNil(t, tlsCfg) + + // No match — different domain + tlsCfg, err = provider.GetRemoteClusterClientConfig("random.example.com") + require.NoError(t, err) + require.Nil(t, tlsCfg) +} + +func TestGetRemoteClusterClientConfig_MoreSpecificWildcardWins(t *testing.T) { + cfg := config.RootTLS{ + RemoteClusters: map[string]config.GroupTLS{ + "*.temporal.cloud": {Client: config.ClientTLS{ForceTLS: false}}, + "*": {Client: config.ClientTLS{ForceTLS: true}}, + }, + } + provider := newTestTLSProvider(t, cfg) + + // *.temporal.cloud is more specific, ForceTLS:false → nil + tlsCfg, err := provider.GetRemoteClusterClientConfig("cluster-a.temporal.cloud") + require.NoError(t, err) + require.Nil(t, tlsCfg) + + // Falls through to * catch-all, ForceTLS:true → non-nil + tlsCfg, err = provider.GetRemoteClusterClientConfig("random.example.com") + require.NoError(t, err) + require.NotNil(t, tlsCfg) +} + +func TestGetRemoteClusterClientConfig_ThreeTierPriority(t *testing.T) { + cfg := config.RootTLS{ + RemoteClusters: map[string]config.GroupTLS{ + "cluster-a.temporal.cloud": {Client: config.ClientTLS{ForceTLS: true, ServerName: "exact"}}, + "*.temporal.cloud": {Client: config.ClientTLS{ForceTLS: true, ServerName: "wildcard"}}, + "*": {Client: config.ClientTLS{ForceTLS: true, ServerName: "star"}}, + }, + } + provider := newTestTLSProvider(t, cfg) + + // Exact match + tlsCfg, err := provider.GetRemoteClusterClientConfig("cluster-a.temporal.cloud") + require.NoError(t, err) + require.NotNil(t, tlsCfg) + require.Equal(t, "exact", tlsCfg.ServerName) + + // Wildcard subdomain match + tlsCfg, err = provider.GetRemoteClusterClientConfig("cluster-b.temporal.cloud") + require.NoError(t, err) + require.NotNil(t, tlsCfg) + require.Equal(t, "wildcard", tlsCfg.ServerName) + + // Catch-all + tlsCfg, err = provider.GetRemoteClusterClientConfig("random.example.com") + require.NoError(t, err) + require.NotNil(t, tlsCfg) + require.Equal(t, "star", tlsCfg.ServerName) +} From 7b9ecf24646208c2999614600381b341bf1c99ab Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Mon, 18 May 2026 13:37:35 -0400 Subject: [PATCH 53/73] Improve chasm Map deserialization/serialization logic (#10270) ## What changed? Improve chasm Map deserialization/serialization logic ## Why? chasm.Map should not be added to DeletedNodes if never persisted. When deserializing, it should always be initialized to avoid excessive nil checks. ## How did you test it? - [X] built - [X] run locally and tested manually - [X] covered by existing tests - [X] added new unit test(s) - [ ] added new functional test(s) --- chasm/tree.go | 28 +++++++++--------- chasm/tree_test.go | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 15 deletions(-) diff --git a/chasm/tree.go b/chasm/tree.go index e8771576490..3b9e4cb4fe9 100644 --- a/chasm/tree.go +++ b/chasm/tree.go @@ -893,8 +893,17 @@ func (n *Node) syncSubComponents() error { internalField.Set(reflect.ValueOf(internal)) } case fieldKindSubMap: - if field.val.IsNil() { - // If Map field is nil then delete all collection items nodes and collection node itself. + // Validate map type before doing anything with it. + if !field.val.IsNil() && field.val.Kind() != reflect.Map { + return softassert.UnexpectedInternalErr( + n.logger, + "CHASM map must be of map type", + fmt.Errorf("node %s", n.nodeName)) + } + + if field.val.IsNil() || len(field.val.MapKeys()) == 0 { + // nil or empty map: skip without creating a collection node. + // Any existing collection node will be removed by deleteChildren below. continue } @@ -906,19 +915,6 @@ func (n *Node) syncSubComponents() error { n.children[field.name] = collectionNode } - // Validate map type. - if field.val.Kind() != reflect.Map { - return softassert.UnexpectedInternalErr( - n.logger, - "CHASM map must be of map type", - fmt.Errorf("node %s", n.nodeName)) - } - - if len(field.val.MapKeys()) == 0 { - // If Map field is empty then delete all collection items nodes and collection node itself. - continue - } - mapValT := field.typ.Elem() if mapValT.Kind() != reflect.Struct || genericTypePrefix(mapValT) != chasmFieldTypePrefix { return softassert.UnexpectedInternalErr( @@ -1291,6 +1287,8 @@ func (n *Node) deserializeComponentNode( } mapFieldV.SetMapIndex(mapKeyV, chasmFieldV) } + } else if field.val.IsNil() { + field.val.Set(reflect.MakeMap(field.typ)) } case fieldKindMutableState: field.val.Set(reflect.ValueOf(NewMSPointer(n.backend))) diff --git a/chasm/tree_test.go b/chasm/tree_test.go index bce043594a3..91052b85d71 100644 --- a/chasm/tree_test.go +++ b/chasm/tree_test.go @@ -436,9 +436,81 @@ func (s *nodeSuite) TestCollectionAttributes() { s.Len(mutation.UpdatedNodes, 1, "although root component is not updated, collection is tracked as part of component, therefore root must be updated") s.Len(mutation.DeletedNodes, 3, "collection and 2 items must be deleted") }) + + s.Run("Nil map "+tc.name+" on first transaction produces no deletions", func() { + // A map field that was never set (nil) should not produce any DeletedNodes + // entries when the first transaction is closed — there is nothing in persistence + // to delete. + var nilSerializedNodes map[string]*persistencespb.ChasmNode + rootNode, err := s.newTestTree(nilSerializedNodes) + s.NoError(err) + + rootNode.value = &TestComponent{} // all map fields are nil + rootNode.valueState = valueStateNeedSyncStructure + + mutation, err := rootNode.CloseTransaction() + s.NoError(err) + s.Empty(mutation.DeletedNodes, "no nodes should be deleted for a map that never existed") + }) + + s.Run("Empty (non-nil) map "+tc.name+" on first transaction produces no deletions", func() { + // A map field initialized to an empty (non-nil) map should also not produce + // any DeletedNodes entries — an empty map is equivalent to nil at the + // persistence layer and there is nothing to delete. + var nilSerializedNodes map[string]*persistencespb.ChasmNode + rootNode, err := s.newTestTree(nilSerializedNodes) + s.NoError(err) + + var rootComponent TestComponent + switch tc.mapField { + case "SubComponents": + rootComponent.SubComponents = Map[string, *TestSubComponent1]{} + case "PendingActivities": + rootComponent.PendingActivities = Map[int, *TestSubComponent1]{} + default: + s.Failf("unexpected mapField", "unknown mapField %q in test case", tc.mapField) + } + rootNode.value = &rootComponent + rootNode.valueState = valueStateNeedSyncStructure + + mutation, err := rootNode.CloseTransaction() + s.NoError(err) + s.Empty(mutation.DeletedNodes, "no nodes should be deleted for a newly-created empty map") + }) } } +func (s *nodeSuite) TestMapDeserializeNilToEmpty() { + // Verify that a Map field that was never set deserializes to an empty (non-nil) + // map so callers can range over it without nil checks. + var nilSerializedNodes map[string]*persistencespb.ChasmNode + rootNode, err := s.newTestTree(nilSerializedNodes) + s.NoError(err) + + rootNode.value = &TestComponent{} + rootNode.valueState = valueStateNeedSyncStructure + + mutations, err := rootNode.CloseTransaction() + s.NoError(err) + // Only root is updated; no collection nodes because maps were nil/empty. + s.Len(mutations.UpdatedNodes, 1) + s.Empty(mutations.DeletedNodes) + + persistedNodes := common.CloneProtoMap(mutations.UpdatedNodes) + + rootNode2, err := s.newTestTree(persistedNodes) + s.NoError(err) + + err = rootNode2.deserialize(reflect.TypeFor[*TestComponent]()) + s.NoError(err) + + rootComponent := rootNode2.value.(*TestComponent) + s.NotNil(rootComponent.SubComponents, "SubComponents must be non-nil after deserialization") + s.Empty(rootComponent.SubComponents) + s.NotNil(rootComponent.PendingActivities, "PendingActivities must be non-nil after deserialization") + s.Empty(rootComponent.PendingActivities) +} + func (s *nodeSuite) TestPointerAttributes() { var persistedNodes map[string]*persistencespb.ChasmNode From 69d2f05eb31b37cadc3eba56c07ab9d92d1a4fbc Mon Sep 17 00:00:00 2001 From: Santiago Bricio Rojas <83271398+sanbricio@users.noreply.github.com> Date: Mon, 18 May 2026 21:10:17 +0200 Subject: [PATCH 54/73] fix missing RUnlock in describe (#10077) ## What changed? Added missing versionedQueuesLock.RUnlock() in describe() before the early return when defaultQueue() returns nil. (This can't happen in production.) ## Why? The function acquires RLock at the top but does not use defer instead releases the lock manually at each exit point. One early return path was missing the RUnlock: when buildIds contains "" and the default queue is not yet initialized, the function returned errDefaultQueueNotInit without releasing the lock, leaving the mutex permanently locked and blocking any concurrent writer. ## How did you test it? - [X] built --- service/matching/task_queue_partition_manager.go | 1 + 1 file changed, 1 insertion(+) diff --git a/service/matching/task_queue_partition_manager.go b/service/matching/task_queue_partition_manager.go index c06a3be70c0..ed82428c4e6 100644 --- a/service/matching/task_queue_partition_manager.go +++ b/service/matching/task_queue_partition_manager.go @@ -1077,6 +1077,7 @@ func (pm *taskQueuePartitionManagerImpl) describe( if b == "" { dbq := pm.defaultQueue() if dbq == nil { + pm.versionedQueuesLock.RUnlock() return nil, errDefaultQueueNotInit } versions[dbq.QueueKey().Version()] = true From 1cc2e396e91728093f354aa659bdb021469d6eff Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Mon, 18 May 2026 16:22:10 -0400 Subject: [PATCH 55/73] Add DeleteExecution history service API (#10119) ## What changed? Add a dedicated DeleteChasmExecution RPC to the history service for deleting CHASM executions by namespace, execution key, and archetype ID. Uses the CHASM engine's DeleteExecution path (terminate-if-running + async DeleteExecutionTask), replacing the ForceDeleteWorkflowExecution workaround in the delete namespace activity which bypassed the engine. Also adds NewComponentRefByArchetypeID to chasm/ref.go to construct a ComponentRef from a runtime archetype ID without a compile-time type parameter. ## Why? Remove dependency on force deletion API. ## How did you test it? - [X] built - [X] run locally and tested manually - [X] covered by existing tests - [X] added new unit test(s) - [X] added new functional test(s) --- .../v1/request_response.go-helpers.pb.go | 74 + api/historyservice/v1/request_response.pb.go | 1243 +++++++++-------- api/historyservice/v1/service.pb.go | 389 +++--- api/historyservice/v1/service_grpc.pb.go | 37 + .../v1/service_grpc.pb.mock.go | 35 + client/history/client_gen.go | 20 + client/history/metric_client_gen.go | 14 + client/history/retryable_client_gen.go | 15 + common/dynamicconfig/constants.go | 9 + .../logtags/history_service_server_gen.go | 7 + .../historyservice/v1/request_response.proto | 13 + .../api/historyservice/v1/service.proto | 4 + service/history/api/deleteexecution/api.go | 30 + service/history/handler.go | 16 + .../deleteexecutions/activities.go | 30 +- .../deleteexecutions/workflow_test.go | 35 +- service/worker/deletenamespace/fx.go | 3 + tests/chasm_test.go | 78 +- 18 files changed, 1268 insertions(+), 784 deletions(-) create mode 100644 service/history/api/deleteexecution/api.go diff --git a/api/historyservice/v1/request_response.go-helpers.pb.go b/api/historyservice/v1/request_response.go-helpers.pb.go index dba8b8c3821..b64e5c3c32e 100644 --- a/api/historyservice/v1/request_response.go-helpers.pb.go +++ b/api/historyservice/v1/request_response.go-helpers.pb.go @@ -4519,6 +4519,80 @@ func (this *ForceDeleteWorkflowExecutionResponse) Equal(that interface{}) bool { return proto.Equal(this, that1) } +// Marshal an object of type DeleteExecutionRequest to the protobuf v3 wire format +func (val *DeleteExecutionRequest) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type DeleteExecutionRequest from the protobuf v3 wire format +func (val *DeleteExecutionRequest) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *DeleteExecutionRequest) Size() int { + return proto.Size(val) +} + +// Equal returns whether two DeleteExecutionRequest values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *DeleteExecutionRequest) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *DeleteExecutionRequest + switch t := that.(type) { + case *DeleteExecutionRequest: + that1 = t + case DeleteExecutionRequest: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} + +// Marshal an object of type DeleteExecutionResponse to the protobuf v3 wire format +func (val *DeleteExecutionResponse) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type DeleteExecutionResponse from the protobuf v3 wire format +func (val *DeleteExecutionResponse) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *DeleteExecutionResponse) Size() int { + return proto.Size(val) +} + +// Equal returns whether two DeleteExecutionResponse values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *DeleteExecutionResponse) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *DeleteExecutionResponse + switch t := that.(type) { + case *DeleteExecutionResponse: + that1 = t + case DeleteExecutionResponse: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} + // Marshal an object of type GetDLQTasksRequest to the protobuf v3 wire format func (val *GetDLQTasksRequest) Marshal() ([]byte, error) { return proto.Marshal(val) diff --git a/api/historyservice/v1/request_response.pb.go b/api/historyservice/v1/request_response.pb.go index 46d476bcba2..f332c01a9e7 100644 --- a/api/historyservice/v1/request_response.pb.go +++ b/api/historyservice/v1/request_response.pb.go @@ -8104,6 +8104,119 @@ func (x *ForceDeleteWorkflowExecutionResponse) GetResponse() *v118.DeleteWorkflo return nil } +type DeleteExecutionRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + NamespaceId string `protobuf:"bytes,1,opt,name=namespace_id,json=namespaceId,proto3" json:"namespace_id,omitempty"` + Execution *v14.WorkflowExecution `protobuf:"bytes,2,opt,name=execution,proto3" json:"execution,omitempty"` + // (-- api-linter: core::0141::forbidden-types=disabled --) + ArchetypeId uint32 `protobuf:"varint,3,opt,name=archetype_id,json=archetypeId,proto3" json:"archetype_id,omitempty"` + Reason string `protobuf:"bytes,4,opt,name=reason,proto3" json:"reason,omitempty"` + Identity string `protobuf:"bytes,5,opt,name=identity,proto3" json:"identity,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DeleteExecutionRequest) Reset() { + *x = DeleteExecutionRequest{} + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[122] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DeleteExecutionRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DeleteExecutionRequest) ProtoMessage() {} + +func (x *DeleteExecutionRequest) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[122] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DeleteExecutionRequest.ProtoReflect.Descriptor instead. +func (*DeleteExecutionRequest) Descriptor() ([]byte, []int) { + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{122} +} + +func (x *DeleteExecutionRequest) GetNamespaceId() string { + if x != nil { + return x.NamespaceId + } + return "" +} + +func (x *DeleteExecutionRequest) GetExecution() *v14.WorkflowExecution { + if x != nil { + return x.Execution + } + return nil +} + +func (x *DeleteExecutionRequest) GetArchetypeId() uint32 { + if x != nil { + return x.ArchetypeId + } + return 0 +} + +func (x *DeleteExecutionRequest) GetReason() string { + if x != nil { + return x.Reason + } + return "" +} + +func (x *DeleteExecutionRequest) GetIdentity() string { + if x != nil { + return x.Identity + } + return "" +} + +type DeleteExecutionResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DeleteExecutionResponse) Reset() { + *x = DeleteExecutionResponse{} + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[123] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DeleteExecutionResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DeleteExecutionResponse) ProtoMessage() {} + +func (x *DeleteExecutionResponse) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[123] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DeleteExecutionResponse.ProtoReflect.Descriptor instead. +func (*DeleteExecutionResponse) Descriptor() ([]byte, []int) { + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{123} +} + type GetDLQTasksRequest struct { state protoimpl.MessageState `protogen:"open.v1"` DlqKey *v119.HistoryDLQKey `protobuf:"bytes,1,opt,name=dlq_key,json=dlqKey,proto3" json:"dlq_key,omitempty"` @@ -8116,7 +8229,7 @@ type GetDLQTasksRequest struct { func (x *GetDLQTasksRequest) Reset() { *x = GetDLQTasksRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[122] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[124] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8128,7 +8241,7 @@ func (x *GetDLQTasksRequest) String() string { func (*GetDLQTasksRequest) ProtoMessage() {} func (x *GetDLQTasksRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[122] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[124] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8141,7 +8254,7 @@ func (x *GetDLQTasksRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GetDLQTasksRequest.ProtoReflect.Descriptor instead. func (*GetDLQTasksRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{122} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{124} } func (x *GetDLQTasksRequest) GetDlqKey() *v119.HistoryDLQKey { @@ -8178,7 +8291,7 @@ type GetDLQTasksResponse struct { func (x *GetDLQTasksResponse) Reset() { *x = GetDLQTasksResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[123] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[125] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8190,7 +8303,7 @@ func (x *GetDLQTasksResponse) String() string { func (*GetDLQTasksResponse) ProtoMessage() {} func (x *GetDLQTasksResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[123] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[125] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8203,7 +8316,7 @@ func (x *GetDLQTasksResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use GetDLQTasksResponse.ProtoReflect.Descriptor instead. func (*GetDLQTasksResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{123} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{125} } func (x *GetDLQTasksResponse) GetDlqTasks() []*v119.HistoryDLQTask { @@ -8230,7 +8343,7 @@ type DeleteDLQTasksRequest struct { func (x *DeleteDLQTasksRequest) Reset() { *x = DeleteDLQTasksRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[124] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[126] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8242,7 +8355,7 @@ func (x *DeleteDLQTasksRequest) String() string { func (*DeleteDLQTasksRequest) ProtoMessage() {} func (x *DeleteDLQTasksRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[124] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[126] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8255,7 +8368,7 @@ func (x *DeleteDLQTasksRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use DeleteDLQTasksRequest.ProtoReflect.Descriptor instead. func (*DeleteDLQTasksRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{124} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{126} } func (x *DeleteDLQTasksRequest) GetDlqKey() *v119.HistoryDLQKey { @@ -8282,7 +8395,7 @@ type DeleteDLQTasksResponse struct { func (x *DeleteDLQTasksResponse) Reset() { *x = DeleteDLQTasksResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[125] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[127] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8294,7 +8407,7 @@ func (x *DeleteDLQTasksResponse) String() string { func (*DeleteDLQTasksResponse) ProtoMessage() {} func (x *DeleteDLQTasksResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[125] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[127] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8307,7 +8420,7 @@ func (x *DeleteDLQTasksResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use DeleteDLQTasksResponse.ProtoReflect.Descriptor instead. func (*DeleteDLQTasksResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{125} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{127} } func (x *DeleteDLQTasksResponse) GetMessagesDeleted() int64 { @@ -8328,7 +8441,7 @@ type ListQueuesRequest struct { func (x *ListQueuesRequest) Reset() { *x = ListQueuesRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[126] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[128] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8340,7 +8453,7 @@ func (x *ListQueuesRequest) String() string { func (*ListQueuesRequest) ProtoMessage() {} func (x *ListQueuesRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[126] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[128] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8353,7 +8466,7 @@ func (x *ListQueuesRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use ListQueuesRequest.ProtoReflect.Descriptor instead. func (*ListQueuesRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{126} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{128} } func (x *ListQueuesRequest) GetQueueType() int32 { @@ -8387,7 +8500,7 @@ type ListQueuesResponse struct { func (x *ListQueuesResponse) Reset() { *x = ListQueuesResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[127] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[129] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8399,7 +8512,7 @@ func (x *ListQueuesResponse) String() string { func (*ListQueuesResponse) ProtoMessage() {} func (x *ListQueuesResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[127] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[129] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8412,7 +8525,7 @@ func (x *ListQueuesResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use ListQueuesResponse.ProtoReflect.Descriptor instead. func (*ListQueuesResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{127} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{129} } func (x *ListQueuesResponse) GetQueues() []*ListQueuesResponse_QueueInfo { @@ -8443,7 +8556,7 @@ type AddTasksRequest struct { func (x *AddTasksRequest) Reset() { *x = AddTasksRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[128] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[130] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8455,7 +8568,7 @@ func (x *AddTasksRequest) String() string { func (*AddTasksRequest) ProtoMessage() {} func (x *AddTasksRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[128] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[130] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8468,7 +8581,7 @@ func (x *AddTasksRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use AddTasksRequest.ProtoReflect.Descriptor instead. func (*AddTasksRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{128} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{130} } func (x *AddTasksRequest) GetShardId() int32 { @@ -8493,7 +8606,7 @@ type AddTasksResponse struct { func (x *AddTasksResponse) Reset() { *x = AddTasksResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[129] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[131] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8505,7 +8618,7 @@ func (x *AddTasksResponse) String() string { func (*AddTasksResponse) ProtoMessage() {} func (x *AddTasksResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[129] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[131] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8518,7 +8631,7 @@ func (x *AddTasksResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use AddTasksResponse.ProtoReflect.Descriptor instead. func (*AddTasksResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{129} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{131} } type ListTasksRequest struct { @@ -8530,7 +8643,7 @@ type ListTasksRequest struct { func (x *ListTasksRequest) Reset() { *x = ListTasksRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[130] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[132] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8542,7 +8655,7 @@ func (x *ListTasksRequest) String() string { func (*ListTasksRequest) ProtoMessage() {} func (x *ListTasksRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[130] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[132] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8555,7 +8668,7 @@ func (x *ListTasksRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use ListTasksRequest.ProtoReflect.Descriptor instead. func (*ListTasksRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{130} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{132} } func (x *ListTasksRequest) GetRequest() *v118.ListHistoryTasksRequest { @@ -8574,7 +8687,7 @@ type ListTasksResponse struct { func (x *ListTasksResponse) Reset() { *x = ListTasksResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[131] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[133] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8586,7 +8699,7 @@ func (x *ListTasksResponse) String() string { func (*ListTasksResponse) ProtoMessage() {} func (x *ListTasksResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[131] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[133] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8599,7 +8712,7 @@ func (x *ListTasksResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use ListTasksResponse.ProtoReflect.Descriptor instead. func (*ListTasksResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{131} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{133} } func (x *ListTasksResponse) GetResponse() *v118.ListHistoryTasksResponse { @@ -8633,7 +8746,7 @@ type CompleteNexusOperationChasmRequest struct { func (x *CompleteNexusOperationChasmRequest) Reset() { *x = CompleteNexusOperationChasmRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[132] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[134] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8645,7 +8758,7 @@ func (x *CompleteNexusOperationChasmRequest) String() string { func (*CompleteNexusOperationChasmRequest) ProtoMessage() {} func (x *CompleteNexusOperationChasmRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[132] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[134] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8658,7 +8771,7 @@ func (x *CompleteNexusOperationChasmRequest) ProtoReflect() protoreflect.Message // Deprecated: Use CompleteNexusOperationChasmRequest.ProtoReflect.Descriptor instead. func (*CompleteNexusOperationChasmRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{132} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{134} } func (x *CompleteNexusOperationChasmRequest) GetCompletion() *v120.NexusOperationCompletion { @@ -8747,7 +8860,7 @@ type CompleteNexusOperationChasmResponse struct { func (x *CompleteNexusOperationChasmResponse) Reset() { *x = CompleteNexusOperationChasmResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[133] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[135] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8759,7 +8872,7 @@ func (x *CompleteNexusOperationChasmResponse) String() string { func (*CompleteNexusOperationChasmResponse) ProtoMessage() {} func (x *CompleteNexusOperationChasmResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[133] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[135] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8772,7 +8885,7 @@ func (x *CompleteNexusOperationChasmResponse) ProtoReflect() protoreflect.Messag // Deprecated: Use CompleteNexusOperationChasmResponse.ProtoReflect.Descriptor instead. func (*CompleteNexusOperationChasmResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{133} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{135} } type CompleteNexusOperationRequest struct { @@ -8798,7 +8911,7 @@ type CompleteNexusOperationRequest struct { func (x *CompleteNexusOperationRequest) Reset() { *x = CompleteNexusOperationRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[134] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[136] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8810,7 +8923,7 @@ func (x *CompleteNexusOperationRequest) String() string { func (*CompleteNexusOperationRequest) ProtoMessage() {} func (x *CompleteNexusOperationRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[134] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[136] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8823,7 +8936,7 @@ func (x *CompleteNexusOperationRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use CompleteNexusOperationRequest.ProtoReflect.Descriptor instead. func (*CompleteNexusOperationRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{134} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{136} } func (x *CompleteNexusOperationRequest) GetCompletion() *v120.NexusOperationCompletion { @@ -8912,7 +9025,7 @@ type CompleteNexusOperationResponse struct { func (x *CompleteNexusOperationResponse) Reset() { *x = CompleteNexusOperationResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[135] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[137] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8924,7 +9037,7 @@ func (x *CompleteNexusOperationResponse) String() string { func (*CompleteNexusOperationResponse) ProtoMessage() {} func (x *CompleteNexusOperationResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[135] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[137] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8937,7 +9050,7 @@ func (x *CompleteNexusOperationResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use CompleteNexusOperationResponse.ProtoReflect.Descriptor instead. func (*CompleteNexusOperationResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{135} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{137} } type InvokeStateMachineMethodRequest struct { @@ -8965,7 +9078,7 @@ type InvokeStateMachineMethodRequest struct { func (x *InvokeStateMachineMethodRequest) Reset() { *x = InvokeStateMachineMethodRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[136] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[138] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -8977,7 +9090,7 @@ func (x *InvokeStateMachineMethodRequest) String() string { func (*InvokeStateMachineMethodRequest) ProtoMessage() {} func (x *InvokeStateMachineMethodRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[136] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[138] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -8990,7 +9103,7 @@ func (x *InvokeStateMachineMethodRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use InvokeStateMachineMethodRequest.ProtoReflect.Descriptor instead. func (*InvokeStateMachineMethodRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{136} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{138} } func (x *InvokeStateMachineMethodRequest) GetNamespaceId() string { @@ -9045,7 +9158,7 @@ type InvokeStateMachineMethodResponse struct { func (x *InvokeStateMachineMethodResponse) Reset() { *x = InvokeStateMachineMethodResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[137] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[139] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9057,7 +9170,7 @@ func (x *InvokeStateMachineMethodResponse) String() string { func (*InvokeStateMachineMethodResponse) ProtoMessage() {} func (x *InvokeStateMachineMethodResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[137] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[139] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9070,7 +9183,7 @@ func (x *InvokeStateMachineMethodResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use InvokeStateMachineMethodResponse.ProtoReflect.Descriptor instead. func (*InvokeStateMachineMethodResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{137} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{139} } func (x *InvokeStateMachineMethodResponse) GetOutput() []byte { @@ -9089,7 +9202,7 @@ type DeepHealthCheckRequest struct { func (x *DeepHealthCheckRequest) Reset() { *x = DeepHealthCheckRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[138] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[140] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9101,7 +9214,7 @@ func (x *DeepHealthCheckRequest) String() string { func (*DeepHealthCheckRequest) ProtoMessage() {} func (x *DeepHealthCheckRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[138] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[140] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9114,7 +9227,7 @@ func (x *DeepHealthCheckRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use DeepHealthCheckRequest.ProtoReflect.Descriptor instead. func (*DeepHealthCheckRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{138} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{140} } func (x *DeepHealthCheckRequest) GetHostAddress() string { @@ -9135,7 +9248,7 @@ type DeepHealthCheckResponse struct { func (x *DeepHealthCheckResponse) Reset() { *x = DeepHealthCheckResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[139] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[141] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9147,7 +9260,7 @@ func (x *DeepHealthCheckResponse) String() string { func (*DeepHealthCheckResponse) ProtoMessage() {} func (x *DeepHealthCheckResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[139] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[141] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9160,7 +9273,7 @@ func (x *DeepHealthCheckResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use DeepHealthCheckResponse.ProtoReflect.Descriptor instead. func (*DeepHealthCheckResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{139} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{141} } func (x *DeepHealthCheckResponse) GetState() v112.HealthState { @@ -9192,7 +9305,7 @@ type SyncWorkflowStateRequest struct { func (x *SyncWorkflowStateRequest) Reset() { *x = SyncWorkflowStateRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[140] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[142] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9204,7 +9317,7 @@ func (x *SyncWorkflowStateRequest) String() string { func (*SyncWorkflowStateRequest) ProtoMessage() {} func (x *SyncWorkflowStateRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[140] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[142] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9217,7 +9330,7 @@ func (x *SyncWorkflowStateRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use SyncWorkflowStateRequest.ProtoReflect.Descriptor instead. func (*SyncWorkflowStateRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{140} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{142} } func (x *SyncWorkflowStateRequest) GetNamespaceId() string { @@ -9271,7 +9384,7 @@ type SyncWorkflowStateResponse struct { func (x *SyncWorkflowStateResponse) Reset() { *x = SyncWorkflowStateResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[141] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[143] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9283,7 +9396,7 @@ func (x *SyncWorkflowStateResponse) String() string { func (*SyncWorkflowStateResponse) ProtoMessage() {} func (x *SyncWorkflowStateResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[141] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[143] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9296,7 +9409,7 @@ func (x *SyncWorkflowStateResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use SyncWorkflowStateResponse.ProtoReflect.Descriptor instead. func (*SyncWorkflowStateResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{141} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{143} } func (x *SyncWorkflowStateResponse) GetVersionedTransitionArtifact() *v117.VersionedTransitionArtifact { @@ -9319,7 +9432,7 @@ type UpdateActivityOptionsRequest struct { func (x *UpdateActivityOptionsRequest) Reset() { *x = UpdateActivityOptionsRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[142] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[144] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9331,7 +9444,7 @@ func (x *UpdateActivityOptionsRequest) String() string { func (*UpdateActivityOptionsRequest) ProtoMessage() {} func (x *UpdateActivityOptionsRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[142] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[144] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9344,7 +9457,7 @@ func (x *UpdateActivityOptionsRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use UpdateActivityOptionsRequest.ProtoReflect.Descriptor instead. func (*UpdateActivityOptionsRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{142} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{144} } func (x *UpdateActivityOptionsRequest) GetNamespaceId() string { @@ -9371,7 +9484,7 @@ type UpdateActivityOptionsResponse struct { func (x *UpdateActivityOptionsResponse) Reset() { *x = UpdateActivityOptionsResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[143] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[145] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9383,7 +9496,7 @@ func (x *UpdateActivityOptionsResponse) String() string { func (*UpdateActivityOptionsResponse) ProtoMessage() {} func (x *UpdateActivityOptionsResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[143] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[145] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9396,7 +9509,7 @@ func (x *UpdateActivityOptionsResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use UpdateActivityOptionsResponse.ProtoReflect.Descriptor instead. func (*UpdateActivityOptionsResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{143} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{145} } func (x *UpdateActivityOptionsResponse) GetActivityOptions() *v123.ActivityOptions { @@ -9417,7 +9530,7 @@ type PauseActivityRequest struct { func (x *PauseActivityRequest) Reset() { *x = PauseActivityRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[144] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[146] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9429,7 +9542,7 @@ func (x *PauseActivityRequest) String() string { func (*PauseActivityRequest) ProtoMessage() {} func (x *PauseActivityRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[144] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[146] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9442,7 +9555,7 @@ func (x *PauseActivityRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use PauseActivityRequest.ProtoReflect.Descriptor instead. func (*PauseActivityRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{144} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{146} } func (x *PauseActivityRequest) GetNamespaceId() string { @@ -9467,7 +9580,7 @@ type PauseActivityResponse struct { func (x *PauseActivityResponse) Reset() { *x = PauseActivityResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[145] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[147] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9479,7 +9592,7 @@ func (x *PauseActivityResponse) String() string { func (*PauseActivityResponse) ProtoMessage() {} func (x *PauseActivityResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[145] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[147] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9492,7 +9605,7 @@ func (x *PauseActivityResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use PauseActivityResponse.ProtoReflect.Descriptor instead. func (*PauseActivityResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{145} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{147} } type UnpauseActivityRequest struct { @@ -9506,7 +9619,7 @@ type UnpauseActivityRequest struct { func (x *UnpauseActivityRequest) Reset() { *x = UnpauseActivityRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[146] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[148] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9518,7 +9631,7 @@ func (x *UnpauseActivityRequest) String() string { func (*UnpauseActivityRequest) ProtoMessage() {} func (x *UnpauseActivityRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[146] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[148] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9531,7 +9644,7 @@ func (x *UnpauseActivityRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use UnpauseActivityRequest.ProtoReflect.Descriptor instead. func (*UnpauseActivityRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{146} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{148} } func (x *UnpauseActivityRequest) GetNamespaceId() string { @@ -9556,7 +9669,7 @@ type UnpauseActivityResponse struct { func (x *UnpauseActivityResponse) Reset() { *x = UnpauseActivityResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[147] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[149] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9568,7 +9681,7 @@ func (x *UnpauseActivityResponse) String() string { func (*UnpauseActivityResponse) ProtoMessage() {} func (x *UnpauseActivityResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[147] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[149] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9581,7 +9694,7 @@ func (x *UnpauseActivityResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use UnpauseActivityResponse.ProtoReflect.Descriptor instead. func (*UnpauseActivityResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{147} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{149} } type ResetActivityRequest struct { @@ -9595,7 +9708,7 @@ type ResetActivityRequest struct { func (x *ResetActivityRequest) Reset() { *x = ResetActivityRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[148] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[150] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9607,7 +9720,7 @@ func (x *ResetActivityRequest) String() string { func (*ResetActivityRequest) ProtoMessage() {} func (x *ResetActivityRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[148] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[150] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9620,7 +9733,7 @@ func (x *ResetActivityRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use ResetActivityRequest.ProtoReflect.Descriptor instead. func (*ResetActivityRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{148} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{150} } func (x *ResetActivityRequest) GetNamespaceId() string { @@ -9645,7 +9758,7 @@ type ResetActivityResponse struct { func (x *ResetActivityResponse) Reset() { *x = ResetActivityResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[149] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[151] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9657,7 +9770,7 @@ func (x *ResetActivityResponse) String() string { func (*ResetActivityResponse) ProtoMessage() {} func (x *ResetActivityResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[149] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[151] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9670,7 +9783,7 @@ func (x *ResetActivityResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use ResetActivityResponse.ProtoReflect.Descriptor instead. func (*ResetActivityResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{149} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{151} } // (-- api-linter: core::0134::request-mask-required=disabled @@ -9685,7 +9798,7 @@ type UpdateWorkflowExecutionOptionsRequest struct { func (x *UpdateWorkflowExecutionOptionsRequest) Reset() { *x = UpdateWorkflowExecutionOptionsRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[150] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[152] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9697,7 +9810,7 @@ func (x *UpdateWorkflowExecutionOptionsRequest) String() string { func (*UpdateWorkflowExecutionOptionsRequest) ProtoMessage() {} func (x *UpdateWorkflowExecutionOptionsRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[150] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[152] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9710,7 +9823,7 @@ func (x *UpdateWorkflowExecutionOptionsRequest) ProtoReflect() protoreflect.Mess // Deprecated: Use UpdateWorkflowExecutionOptionsRequest.ProtoReflect.Descriptor instead. func (*UpdateWorkflowExecutionOptionsRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{150} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{152} } func (x *UpdateWorkflowExecutionOptionsRequest) GetNamespaceId() string { @@ -9737,7 +9850,7 @@ type UpdateWorkflowExecutionOptionsResponse struct { func (x *UpdateWorkflowExecutionOptionsResponse) Reset() { *x = UpdateWorkflowExecutionOptionsResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[151] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[153] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9749,7 +9862,7 @@ func (x *UpdateWorkflowExecutionOptionsResponse) String() string { func (*UpdateWorkflowExecutionOptionsResponse) ProtoMessage() {} func (x *UpdateWorkflowExecutionOptionsResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[151] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[153] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9762,7 +9875,7 @@ func (x *UpdateWorkflowExecutionOptionsResponse) ProtoReflect() protoreflect.Mes // Deprecated: Use UpdateWorkflowExecutionOptionsResponse.ProtoReflect.Descriptor instead. func (*UpdateWorkflowExecutionOptionsResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{151} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{153} } func (x *UpdateWorkflowExecutionOptionsResponse) GetWorkflowExecutionOptions() *v15.WorkflowExecutionOptions { @@ -9783,7 +9896,7 @@ type PauseWorkflowExecutionRequest struct { func (x *PauseWorkflowExecutionRequest) Reset() { *x = PauseWorkflowExecutionRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[152] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[154] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9795,7 +9908,7 @@ func (x *PauseWorkflowExecutionRequest) String() string { func (*PauseWorkflowExecutionRequest) ProtoMessage() {} func (x *PauseWorkflowExecutionRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[152] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[154] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9808,7 +9921,7 @@ func (x *PauseWorkflowExecutionRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use PauseWorkflowExecutionRequest.ProtoReflect.Descriptor instead. func (*PauseWorkflowExecutionRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{152} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{154} } func (x *PauseWorkflowExecutionRequest) GetNamespaceId() string { @@ -9833,7 +9946,7 @@ type PauseWorkflowExecutionResponse struct { func (x *PauseWorkflowExecutionResponse) Reset() { *x = PauseWorkflowExecutionResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[153] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[155] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9845,7 +9958,7 @@ func (x *PauseWorkflowExecutionResponse) String() string { func (*PauseWorkflowExecutionResponse) ProtoMessage() {} func (x *PauseWorkflowExecutionResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[153] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[155] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9858,7 +9971,7 @@ func (x *PauseWorkflowExecutionResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use PauseWorkflowExecutionResponse.ProtoReflect.Descriptor instead. func (*PauseWorkflowExecutionResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{153} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{155} } type UnpauseWorkflowExecutionRequest struct { @@ -9872,7 +9985,7 @@ type UnpauseWorkflowExecutionRequest struct { func (x *UnpauseWorkflowExecutionRequest) Reset() { *x = UnpauseWorkflowExecutionRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[154] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[156] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9884,7 +9997,7 @@ func (x *UnpauseWorkflowExecutionRequest) String() string { func (*UnpauseWorkflowExecutionRequest) ProtoMessage() {} func (x *UnpauseWorkflowExecutionRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[154] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[156] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9897,7 +10010,7 @@ func (x *UnpauseWorkflowExecutionRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use UnpauseWorkflowExecutionRequest.ProtoReflect.Descriptor instead. func (*UnpauseWorkflowExecutionRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{154} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{156} } func (x *UnpauseWorkflowExecutionRequest) GetNamespaceId() string { @@ -9922,7 +10035,7 @@ type UnpauseWorkflowExecutionResponse struct { func (x *UnpauseWorkflowExecutionResponse) Reset() { *x = UnpauseWorkflowExecutionResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[155] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[157] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9934,7 +10047,7 @@ func (x *UnpauseWorkflowExecutionResponse) String() string { func (*UnpauseWorkflowExecutionResponse) ProtoMessage() {} func (x *UnpauseWorkflowExecutionResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[155] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[157] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9947,7 +10060,7 @@ func (x *UnpauseWorkflowExecutionResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use UnpauseWorkflowExecutionResponse.ProtoReflect.Descriptor instead. func (*UnpauseWorkflowExecutionResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{155} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{157} } type StartNexusOperationRequest struct { @@ -9961,7 +10074,7 @@ type StartNexusOperationRequest struct { func (x *StartNexusOperationRequest) Reset() { *x = StartNexusOperationRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[156] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[158] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -9973,7 +10086,7 @@ func (x *StartNexusOperationRequest) String() string { func (*StartNexusOperationRequest) ProtoMessage() {} func (x *StartNexusOperationRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[156] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[158] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -9986,7 +10099,7 @@ func (x *StartNexusOperationRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use StartNexusOperationRequest.ProtoReflect.Descriptor instead. func (*StartNexusOperationRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{156} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{158} } func (x *StartNexusOperationRequest) GetNamespaceId() string { @@ -10019,7 +10132,7 @@ type StartNexusOperationResponse struct { func (x *StartNexusOperationResponse) Reset() { *x = StartNexusOperationResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[157] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[159] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -10031,7 +10144,7 @@ func (x *StartNexusOperationResponse) String() string { func (*StartNexusOperationResponse) ProtoMessage() {} func (x *StartNexusOperationResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[157] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[159] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -10044,7 +10157,7 @@ func (x *StartNexusOperationResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use StartNexusOperationResponse.ProtoReflect.Descriptor instead. func (*StartNexusOperationResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{157} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{159} } func (x *StartNexusOperationResponse) GetResponse() *v121.StartOperationResponse { @@ -10065,7 +10178,7 @@ type CancelNexusOperationRequest struct { func (x *CancelNexusOperationRequest) Reset() { *x = CancelNexusOperationRequest{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[158] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[160] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -10077,7 +10190,7 @@ func (x *CancelNexusOperationRequest) String() string { func (*CancelNexusOperationRequest) ProtoMessage() {} func (x *CancelNexusOperationRequest) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[158] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[160] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -10090,7 +10203,7 @@ func (x *CancelNexusOperationRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use CancelNexusOperationRequest.ProtoReflect.Descriptor instead. func (*CancelNexusOperationRequest) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{158} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{160} } func (x *CancelNexusOperationRequest) GetNamespaceId() string { @@ -10123,7 +10236,7 @@ type CancelNexusOperationResponse struct { func (x *CancelNexusOperationResponse) Reset() { *x = CancelNexusOperationResponse{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[159] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[161] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -10135,7 +10248,7 @@ func (x *CancelNexusOperationResponse) String() string { func (*CancelNexusOperationResponse) ProtoMessage() {} func (x *CancelNexusOperationResponse) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[159] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[161] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -10148,7 +10261,7 @@ func (x *CancelNexusOperationResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use CancelNexusOperationResponse.ProtoReflect.Descriptor instead. func (*CancelNexusOperationResponse) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{159} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{161} } func (x *CancelNexusOperationResponse) GetResponse() *v121.CancelOperationResponse { @@ -10171,7 +10284,7 @@ type ExecuteMultiOperationRequest_Operation struct { func (x *ExecuteMultiOperationRequest_Operation) Reset() { *x = ExecuteMultiOperationRequest_Operation{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[160] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[162] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -10183,7 +10296,7 @@ func (x *ExecuteMultiOperationRequest_Operation) String() string { func (*ExecuteMultiOperationRequest_Operation) ProtoMessage() {} func (x *ExecuteMultiOperationRequest_Operation) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[160] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[162] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -10255,7 +10368,7 @@ type ExecuteMultiOperationResponse_Response struct { func (x *ExecuteMultiOperationResponse_Response) Reset() { *x = ExecuteMultiOperationResponse_Response{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[161] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[163] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -10267,7 +10380,7 @@ func (x *ExecuteMultiOperationResponse_Response) String() string { func (*ExecuteMultiOperationResponse_Response) ProtoMessage() {} func (x *ExecuteMultiOperationResponse_Response) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[161] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[163] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -10337,7 +10450,7 @@ type ListQueuesResponse_QueueInfo struct { func (x *ListQueuesResponse_QueueInfo) Reset() { *x = ListQueuesResponse_QueueInfo{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[167] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[169] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -10349,7 +10462,7 @@ func (x *ListQueuesResponse_QueueInfo) String() string { func (*ListQueuesResponse_QueueInfo) ProtoMessage() {} func (x *ListQueuesResponse_QueueInfo) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[167] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[169] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -10362,7 +10475,7 @@ func (x *ListQueuesResponse_QueueInfo) ProtoReflect() protoreflect.Message { // Deprecated: Use ListQueuesResponse_QueueInfo.ProtoReflect.Descriptor instead. func (*ListQueuesResponse_QueueInfo) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{127, 0} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{129, 0} } func (x *ListQueuesResponse_QueueInfo) GetQueueName() string { @@ -10400,7 +10513,7 @@ type AddTasksRequest_Task struct { func (x *AddTasksRequest_Task) Reset() { *x = AddTasksRequest_Task{} - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[168] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[170] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -10412,7 +10525,7 @@ func (x *AddTasksRequest_Task) String() string { func (*AddTasksRequest_Task) ProtoMessage() {} func (x *AddTasksRequest_Task) ProtoReflect() protoreflect.Message { - mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[168] + mi := &file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[170] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -10425,7 +10538,7 @@ func (x *AddTasksRequest_Task) ProtoReflect() protoreflect.Message { // Deprecated: Use AddTasksRequest_Task.ProtoReflect.Descriptor instead. func (*AddTasksRequest_Task) Descriptor() ([]byte, []int) { - return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{128, 0} + return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZIP(), []int{130, 0} } func (x *AddTasksRequest_Task) GetCategoryId() int32 { @@ -11088,7 +11201,14 @@ const file_temporal_server_api_historyservice_v1_request_response_proto_rawDesc "\farchetype_id\x18\x03 \x01(\rR\varchetypeId\x12]\n" + "\arequest\x18\x02 \x01(\v2C.temporal.server.api.adminservice.v1.DeleteWorkflowExecutionRequestR\arequest:#\x92\xc4\x03\x1f*\x1drequest.execution.workflow_id\"\x88\x01\n" + "$ForceDeleteWorkflowExecutionResponse\x12`\n" + - "\bresponse\x18\x01 \x01(\v2D.temporal.server.api.adminservice.v1.DeleteWorkflowExecutionResponseR\bresponse\"\xa8\x01\n" + + "\bresponse\x18\x01 \x01(\v2D.temporal.server.api.adminservice.v1.DeleteWorkflowExecutionResponseR\bresponse\"\xf8\x01\n" + + "\x16DeleteExecutionRequest\x12!\n" + + "\fnamespace_id\x18\x01 \x01(\tR\vnamespaceId\x12G\n" + + "\texecution\x18\x02 \x01(\v2).temporal.api.common.v1.WorkflowExecutionR\texecution\x12!\n" + + "\farchetype_id\x18\x03 \x01(\rR\varchetypeId\x12\x16\n" + + "\x06reason\x18\x04 \x01(\tR\x06reason\x12\x1a\n" + + "\bidentity\x18\x05 \x01(\tR\bidentity:\x1b\x92\xc4\x03\x17*\x15execution.workflow_id\"\x19\n" + + "\x17DeleteExecutionResponse\"\xa8\x01\n" + "\x12GetDLQTasksRequest\x12E\n" + "\adlq_key\x18\x01 \x01(\v2,.temporal.server.api.common.v1.HistoryDLQKeyR\x06dlqKey\x12\x1b\n" + "\tpage_size\x18\x02 \x01(\x05R\bpageSize\x12&\n" + @@ -11237,7 +11357,7 @@ func file_temporal_server_api_historyservice_v1_request_response_proto_rawDescGZ return file_temporal_server_api_historyservice_v1_request_response_proto_rawDescData } -var file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes = make([]protoimpl.MessageInfo, 169) +var file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes = make([]protoimpl.MessageInfo, 171) var file_temporal_server_api_historyservice_v1_request_response_proto_goTypes = []any{ (*RoutingOptions)(nil), // 0: temporal.server.api.historyservice.v1.RoutingOptions (*StartWorkflowExecutionRequest)(nil), // 1: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest @@ -11361,440 +11481,443 @@ var file_temporal_server_api_historyservice_v1_request_response_proto_goTypes = (*GetWorkflowExecutionRawHistoryResponse)(nil), // 119: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryResponse (*ForceDeleteWorkflowExecutionRequest)(nil), // 120: temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionRequest (*ForceDeleteWorkflowExecutionResponse)(nil), // 121: temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionResponse - (*GetDLQTasksRequest)(nil), // 122: temporal.server.api.historyservice.v1.GetDLQTasksRequest - (*GetDLQTasksResponse)(nil), // 123: temporal.server.api.historyservice.v1.GetDLQTasksResponse - (*DeleteDLQTasksRequest)(nil), // 124: temporal.server.api.historyservice.v1.DeleteDLQTasksRequest - (*DeleteDLQTasksResponse)(nil), // 125: temporal.server.api.historyservice.v1.DeleteDLQTasksResponse - (*ListQueuesRequest)(nil), // 126: temporal.server.api.historyservice.v1.ListQueuesRequest - (*ListQueuesResponse)(nil), // 127: temporal.server.api.historyservice.v1.ListQueuesResponse - (*AddTasksRequest)(nil), // 128: temporal.server.api.historyservice.v1.AddTasksRequest - (*AddTasksResponse)(nil), // 129: temporal.server.api.historyservice.v1.AddTasksResponse - (*ListTasksRequest)(nil), // 130: temporal.server.api.historyservice.v1.ListTasksRequest - (*ListTasksResponse)(nil), // 131: temporal.server.api.historyservice.v1.ListTasksResponse - (*CompleteNexusOperationChasmRequest)(nil), // 132: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest - (*CompleteNexusOperationChasmResponse)(nil), // 133: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmResponse - (*CompleteNexusOperationRequest)(nil), // 134: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest - (*CompleteNexusOperationResponse)(nil), // 135: temporal.server.api.historyservice.v1.CompleteNexusOperationResponse - (*InvokeStateMachineMethodRequest)(nil), // 136: temporal.server.api.historyservice.v1.InvokeStateMachineMethodRequest - (*InvokeStateMachineMethodResponse)(nil), // 137: temporal.server.api.historyservice.v1.InvokeStateMachineMethodResponse - (*DeepHealthCheckRequest)(nil), // 138: temporal.server.api.historyservice.v1.DeepHealthCheckRequest - (*DeepHealthCheckResponse)(nil), // 139: temporal.server.api.historyservice.v1.DeepHealthCheckResponse - (*SyncWorkflowStateRequest)(nil), // 140: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest - (*SyncWorkflowStateResponse)(nil), // 141: temporal.server.api.historyservice.v1.SyncWorkflowStateResponse - (*UpdateActivityOptionsRequest)(nil), // 142: temporal.server.api.historyservice.v1.UpdateActivityOptionsRequest - (*UpdateActivityOptionsResponse)(nil), // 143: temporal.server.api.historyservice.v1.UpdateActivityOptionsResponse - (*PauseActivityRequest)(nil), // 144: temporal.server.api.historyservice.v1.PauseActivityRequest - (*PauseActivityResponse)(nil), // 145: temporal.server.api.historyservice.v1.PauseActivityResponse - (*UnpauseActivityRequest)(nil), // 146: temporal.server.api.historyservice.v1.UnpauseActivityRequest - (*UnpauseActivityResponse)(nil), // 147: temporal.server.api.historyservice.v1.UnpauseActivityResponse - (*ResetActivityRequest)(nil), // 148: temporal.server.api.historyservice.v1.ResetActivityRequest - (*ResetActivityResponse)(nil), // 149: temporal.server.api.historyservice.v1.ResetActivityResponse - (*UpdateWorkflowExecutionOptionsRequest)(nil), // 150: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsRequest - (*UpdateWorkflowExecutionOptionsResponse)(nil), // 151: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsResponse - (*PauseWorkflowExecutionRequest)(nil), // 152: temporal.server.api.historyservice.v1.PauseWorkflowExecutionRequest - (*PauseWorkflowExecutionResponse)(nil), // 153: temporal.server.api.historyservice.v1.PauseWorkflowExecutionResponse - (*UnpauseWorkflowExecutionRequest)(nil), // 154: temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionRequest - (*UnpauseWorkflowExecutionResponse)(nil), // 155: temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionResponse - (*StartNexusOperationRequest)(nil), // 156: temporal.server.api.historyservice.v1.StartNexusOperationRequest - (*StartNexusOperationResponse)(nil), // 157: temporal.server.api.historyservice.v1.StartNexusOperationResponse - (*CancelNexusOperationRequest)(nil), // 158: temporal.server.api.historyservice.v1.CancelNexusOperationRequest - (*CancelNexusOperationResponse)(nil), // 159: temporal.server.api.historyservice.v1.CancelNexusOperationResponse - (*ExecuteMultiOperationRequest_Operation)(nil), // 160: temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.Operation - (*ExecuteMultiOperationResponse_Response)(nil), // 161: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.Response - nil, // 162: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.QueriesEntry - nil, // 163: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.QueriesEntry - nil, // 164: temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.ShardMessagesEntry - nil, // 165: temporal.server.api.historyservice.v1.ShardReplicationStatus.RemoteClustersEntry - nil, // 166: temporal.server.api.historyservice.v1.ShardReplicationStatus.HandoverNamespacesEntry - (*ListQueuesResponse_QueueInfo)(nil), // 167: temporal.server.api.historyservice.v1.ListQueuesResponse.QueueInfo - (*AddTasksRequest_Task)(nil), // 168: temporal.server.api.historyservice.v1.AddTasksRequest.Task - (*v1.StartWorkflowExecutionRequest)(nil), // 169: temporal.api.workflowservice.v1.StartWorkflowExecutionRequest - (*v11.ParentExecutionInfo)(nil), // 170: temporal.server.api.workflow.v1.ParentExecutionInfo - (*timestamppb.Timestamp)(nil), // 171: google.protobuf.Timestamp - (v12.ContinueAsNewInitiator)(0), // 172: temporal.api.enums.v1.ContinueAsNewInitiator - (*v13.Failure)(nil), // 173: temporal.api.failure.v1.Failure - (*v14.Payloads)(nil), // 174: temporal.api.common.v1.Payloads - (*durationpb.Duration)(nil), // 175: google.protobuf.Duration - (*v14.WorkerVersionStamp)(nil), // 176: temporal.api.common.v1.WorkerVersionStamp - (*v11.RootExecutionInfo)(nil), // 177: temporal.server.api.workflow.v1.RootExecutionInfo - (*v15.VersioningOverride)(nil), // 178: temporal.api.workflow.v1.VersioningOverride - (*v16.WorkerDeploymentVersion)(nil), // 179: temporal.api.deployment.v1.WorkerDeploymentVersion - (*v16.InheritedAutoUpgradeInfo)(nil), // 180: temporal.api.deployment.v1.InheritedAutoUpgradeInfo - (*v17.DeclinedTargetVersionUpgrade)(nil), // 181: temporal.api.history.v1.DeclinedTargetVersionUpgrade - (*v18.VectorClock)(nil), // 182: temporal.server.api.clock.v1.VectorClock - (*v1.PollWorkflowTaskQueueResponse)(nil), // 183: temporal.api.workflowservice.v1.PollWorkflowTaskQueueResponse - (v12.WorkflowExecutionStatus)(0), // 184: temporal.api.enums.v1.WorkflowExecutionStatus - (*v14.Link)(nil), // 185: temporal.api.common.v1.Link - (*v14.WorkflowExecution)(nil), // 186: temporal.api.common.v1.WorkflowExecution - (*v19.VersionHistoryItem)(nil), // 187: temporal.server.api.history.v1.VersionHistoryItem - (*v110.VersionedTransition)(nil), // 188: temporal.server.api.persistence.v1.VersionedTransition - (*v14.WorkflowType)(nil), // 189: temporal.api.common.v1.WorkflowType - (*v111.TaskQueue)(nil), // 190: temporal.api.taskqueue.v1.TaskQueue - (v112.WorkflowExecutionState)(0), // 191: temporal.server.api.enums.v1.WorkflowExecutionState - (*v19.VersionHistories)(nil), // 192: temporal.server.api.history.v1.VersionHistories - (*v15.WorkflowExecutionVersioningInfo)(nil), // 193: temporal.api.workflow.v1.WorkflowExecutionVersioningInfo - (*v19.TransientWorkflowTaskInfo)(nil), // 194: temporal.server.api.history.v1.TransientWorkflowTaskInfo - (*v1.PollWorkflowTaskQueueRequest)(nil), // 195: temporal.api.workflowservice.v1.PollWorkflowTaskQueueRequest - (*v113.BuildIdRedirectInfo)(nil), // 196: temporal.server.api.taskqueue.v1.BuildIdRedirectInfo - (*v16.Deployment)(nil), // 197: temporal.api.deployment.v1.Deployment - (*v113.TaskVersionDirective)(nil), // 198: temporal.server.api.taskqueue.v1.TaskVersionDirective - (*v115.Message)(nil), // 199: temporal.api.protocol.v1.Message - (*v17.History)(nil), // 200: temporal.api.history.v1.History - (*v1.PollActivityTaskQueueRequest)(nil), // 201: temporal.api.workflowservice.v1.PollActivityTaskQueueRequest - (*v17.HistoryEvent)(nil), // 202: temporal.api.history.v1.HistoryEvent - (*v14.Priority)(nil), // 203: temporal.api.common.v1.Priority - (*v14.RetryPolicy)(nil), // 204: temporal.api.common.v1.RetryPolicy - (*v1.RespondWorkflowTaskCompletedRequest)(nil), // 205: temporal.api.workflowservice.v1.RespondWorkflowTaskCompletedRequest - (*v1.PollActivityTaskQueueResponse)(nil), // 206: temporal.api.workflowservice.v1.PollActivityTaskQueueResponse - (*v1.RespondWorkflowTaskFailedRequest)(nil), // 207: temporal.api.workflowservice.v1.RespondWorkflowTaskFailedRequest - (*v1.RecordActivityTaskHeartbeatRequest)(nil), // 208: temporal.api.workflowservice.v1.RecordActivityTaskHeartbeatRequest - (*v1.RespondActivityTaskCompletedRequest)(nil), // 209: temporal.api.workflowservice.v1.RespondActivityTaskCompletedRequest - (*v1.RespondActivityTaskFailedRequest)(nil), // 210: temporal.api.workflowservice.v1.RespondActivityTaskFailedRequest - (*v1.RespondActivityTaskCanceledRequest)(nil), // 211: temporal.api.workflowservice.v1.RespondActivityTaskCanceledRequest - (*v1.SignalWorkflowExecutionRequest)(nil), // 212: temporal.api.workflowservice.v1.SignalWorkflowExecutionRequest - (*v1.SignalWithStartWorkflowExecutionRequest)(nil), // 213: temporal.api.workflowservice.v1.SignalWithStartWorkflowExecutionRequest - (*v1.TerminateWorkflowExecutionRequest)(nil), // 214: temporal.api.workflowservice.v1.TerminateWorkflowExecutionRequest - (*v1.ResetWorkflowExecutionRequest)(nil), // 215: temporal.api.workflowservice.v1.ResetWorkflowExecutionRequest - (*v1.RequestCancelWorkflowExecutionRequest)(nil), // 216: temporal.api.workflowservice.v1.RequestCancelWorkflowExecutionRequest - (*v1.DescribeWorkflowExecutionRequest)(nil), // 217: temporal.api.workflowservice.v1.DescribeWorkflowExecutionRequest - (*v15.WorkflowExecutionConfig)(nil), // 218: temporal.api.workflow.v1.WorkflowExecutionConfig - (*v15.WorkflowExecutionInfo)(nil), // 219: temporal.api.workflow.v1.WorkflowExecutionInfo - (*v15.PendingActivityInfo)(nil), // 220: temporal.api.workflow.v1.PendingActivityInfo - (*v15.PendingChildExecutionInfo)(nil), // 221: temporal.api.workflow.v1.PendingChildExecutionInfo - (*v15.PendingWorkflowTaskInfo)(nil), // 222: temporal.api.workflow.v1.PendingWorkflowTaskInfo - (*v15.CallbackInfo)(nil), // 223: temporal.api.workflow.v1.CallbackInfo - (*v15.PendingNexusOperationInfo)(nil), // 224: temporal.api.workflow.v1.PendingNexusOperationInfo - (*v15.WorkflowExecutionExtendedInfo)(nil), // 225: temporal.api.workflow.v1.WorkflowExecutionExtendedInfo - (*v14.DataBlob)(nil), // 226: temporal.api.common.v1.DataBlob - (*v11.BaseExecutionInfo)(nil), // 227: temporal.server.api.workflow.v1.BaseExecutionInfo - (*v110.WorkflowMutableState)(nil), // 228: temporal.server.api.persistence.v1.WorkflowMutableState - (*v19.VersionHistory)(nil), // 229: temporal.server.api.history.v1.VersionHistory - (*v116.NamespaceCacheInfo)(nil), // 230: temporal.server.api.namespace.v1.NamespaceCacheInfo - (*v110.ShardInfo)(nil), // 231: temporal.server.api.persistence.v1.ShardInfo - (*v117.ReplicationToken)(nil), // 232: temporal.server.api.replication.v1.ReplicationToken - (*v117.ReplicationTaskInfo)(nil), // 233: temporal.server.api.replication.v1.ReplicationTaskInfo - (*v117.ReplicationTask)(nil), // 234: temporal.server.api.replication.v1.ReplicationTask - (*v1.QueryWorkflowRequest)(nil), // 235: temporal.api.workflowservice.v1.QueryWorkflowRequest - (*v1.QueryWorkflowResponse)(nil), // 236: temporal.api.workflowservice.v1.QueryWorkflowResponse - (*v118.ReapplyEventsRequest)(nil), // 237: temporal.server.api.adminservice.v1.ReapplyEventsRequest - (v112.DeadLetterQueueType)(0), // 238: temporal.server.api.enums.v1.DeadLetterQueueType - (*v118.RefreshWorkflowTasksRequest)(nil), // 239: temporal.server.api.adminservice.v1.RefreshWorkflowTasksRequest - (*v1.UpdateWorkflowExecutionRequest)(nil), // 240: temporal.api.workflowservice.v1.UpdateWorkflowExecutionRequest - (*v1.UpdateWorkflowExecutionResponse)(nil), // 241: temporal.api.workflowservice.v1.UpdateWorkflowExecutionResponse - (*v117.SyncReplicationState)(nil), // 242: temporal.server.api.replication.v1.SyncReplicationState - (*v117.WorkflowReplicationMessages)(nil), // 243: temporal.server.api.replication.v1.WorkflowReplicationMessages - (*v1.PollWorkflowExecutionUpdateRequest)(nil), // 244: temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateRequest - (*v1.PollWorkflowExecutionUpdateResponse)(nil), // 245: temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateResponse - (*v1.GetWorkflowExecutionHistoryRequest)(nil), // 246: temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryRequest - (*v1.GetWorkflowExecutionHistoryResponse)(nil), // 247: temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryResponse - (*v1.GetWorkflowExecutionHistoryReverseRequest)(nil), // 248: temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryReverseRequest - (*v1.GetWorkflowExecutionHistoryReverseResponse)(nil), // 249: temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryReverseResponse - (*v118.GetWorkflowExecutionRawHistoryV2Request)(nil), // 250: temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryV2Request - (*v118.GetWorkflowExecutionRawHistoryV2Response)(nil), // 251: temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryV2Response - (*v118.GetWorkflowExecutionRawHistoryRequest)(nil), // 252: temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryRequest - (*v118.GetWorkflowExecutionRawHistoryResponse)(nil), // 253: temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryResponse - (*v118.DeleteWorkflowExecutionRequest)(nil), // 254: temporal.server.api.adminservice.v1.DeleteWorkflowExecutionRequest - (*v118.DeleteWorkflowExecutionResponse)(nil), // 255: temporal.server.api.adminservice.v1.DeleteWorkflowExecutionResponse - (*v119.HistoryDLQKey)(nil), // 256: temporal.server.api.common.v1.HistoryDLQKey - (*v119.HistoryDLQTask)(nil), // 257: temporal.server.api.common.v1.HistoryDLQTask - (*v119.HistoryDLQTaskMetadata)(nil), // 258: temporal.server.api.common.v1.HistoryDLQTaskMetadata - (*v118.ListHistoryTasksRequest)(nil), // 259: temporal.server.api.adminservice.v1.ListHistoryTasksRequest - (*v118.ListHistoryTasksResponse)(nil), // 260: temporal.server.api.adminservice.v1.ListHistoryTasksResponse - (*v120.NexusOperationCompletion)(nil), // 261: temporal.server.api.token.v1.NexusOperationCompletion - (*v14.Payload)(nil), // 262: temporal.api.common.v1.Payload - (*v121.Failure)(nil), // 263: temporal.api.nexus.v1.Failure - (*v110.StateMachineRef)(nil), // 264: temporal.server.api.persistence.v1.StateMachineRef - (v112.HealthState)(0), // 265: temporal.server.api.enums.v1.HealthState - (*v122.HealthCheck)(nil), // 266: temporal.server.api.health.v1.HealthCheck - (*v117.VersionedTransitionArtifact)(nil), // 267: temporal.server.api.replication.v1.VersionedTransitionArtifact - (*v1.UpdateActivityOptionsRequest)(nil), // 268: temporal.api.workflowservice.v1.UpdateActivityOptionsRequest - (*v123.ActivityOptions)(nil), // 269: temporal.api.activity.v1.ActivityOptions - (*v1.PauseActivityRequest)(nil), // 270: temporal.api.workflowservice.v1.PauseActivityRequest - (*v1.UnpauseActivityRequest)(nil), // 271: temporal.api.workflowservice.v1.UnpauseActivityRequest - (*v1.ResetActivityRequest)(nil), // 272: temporal.api.workflowservice.v1.ResetActivityRequest - (*v1.UpdateWorkflowExecutionOptionsRequest)(nil), // 273: temporal.api.workflowservice.v1.UpdateWorkflowExecutionOptionsRequest - (*v15.WorkflowExecutionOptions)(nil), // 274: temporal.api.workflow.v1.WorkflowExecutionOptions - (*v1.PauseWorkflowExecutionRequest)(nil), // 275: temporal.api.workflowservice.v1.PauseWorkflowExecutionRequest - (*v1.UnpauseWorkflowExecutionRequest)(nil), // 276: temporal.api.workflowservice.v1.UnpauseWorkflowExecutionRequest - (*v121.StartOperationRequest)(nil), // 277: temporal.api.nexus.v1.StartOperationRequest - (*v121.StartOperationResponse)(nil), // 278: temporal.api.nexus.v1.StartOperationResponse - (*v121.CancelOperationRequest)(nil), // 279: temporal.api.nexus.v1.CancelOperationRequest - (*v121.CancelOperationResponse)(nil), // 280: temporal.api.nexus.v1.CancelOperationResponse - (*v114.WorkflowQuery)(nil), // 281: temporal.api.query.v1.WorkflowQuery - (*v117.ReplicationMessages)(nil), // 282: temporal.server.api.replication.v1.ReplicationMessages - (*descriptorpb.MessageOptions)(nil), // 283: google.protobuf.MessageOptions + (*DeleteExecutionRequest)(nil), // 122: temporal.server.api.historyservice.v1.DeleteExecutionRequest + (*DeleteExecutionResponse)(nil), // 123: temporal.server.api.historyservice.v1.DeleteExecutionResponse + (*GetDLQTasksRequest)(nil), // 124: temporal.server.api.historyservice.v1.GetDLQTasksRequest + (*GetDLQTasksResponse)(nil), // 125: temporal.server.api.historyservice.v1.GetDLQTasksResponse + (*DeleteDLQTasksRequest)(nil), // 126: temporal.server.api.historyservice.v1.DeleteDLQTasksRequest + (*DeleteDLQTasksResponse)(nil), // 127: temporal.server.api.historyservice.v1.DeleteDLQTasksResponse + (*ListQueuesRequest)(nil), // 128: temporal.server.api.historyservice.v1.ListQueuesRequest + (*ListQueuesResponse)(nil), // 129: temporal.server.api.historyservice.v1.ListQueuesResponse + (*AddTasksRequest)(nil), // 130: temporal.server.api.historyservice.v1.AddTasksRequest + (*AddTasksResponse)(nil), // 131: temporal.server.api.historyservice.v1.AddTasksResponse + (*ListTasksRequest)(nil), // 132: temporal.server.api.historyservice.v1.ListTasksRequest + (*ListTasksResponse)(nil), // 133: temporal.server.api.historyservice.v1.ListTasksResponse + (*CompleteNexusOperationChasmRequest)(nil), // 134: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest + (*CompleteNexusOperationChasmResponse)(nil), // 135: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmResponse + (*CompleteNexusOperationRequest)(nil), // 136: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest + (*CompleteNexusOperationResponse)(nil), // 137: temporal.server.api.historyservice.v1.CompleteNexusOperationResponse + (*InvokeStateMachineMethodRequest)(nil), // 138: temporal.server.api.historyservice.v1.InvokeStateMachineMethodRequest + (*InvokeStateMachineMethodResponse)(nil), // 139: temporal.server.api.historyservice.v1.InvokeStateMachineMethodResponse + (*DeepHealthCheckRequest)(nil), // 140: temporal.server.api.historyservice.v1.DeepHealthCheckRequest + (*DeepHealthCheckResponse)(nil), // 141: temporal.server.api.historyservice.v1.DeepHealthCheckResponse + (*SyncWorkflowStateRequest)(nil), // 142: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest + (*SyncWorkflowStateResponse)(nil), // 143: temporal.server.api.historyservice.v1.SyncWorkflowStateResponse + (*UpdateActivityOptionsRequest)(nil), // 144: temporal.server.api.historyservice.v1.UpdateActivityOptionsRequest + (*UpdateActivityOptionsResponse)(nil), // 145: temporal.server.api.historyservice.v1.UpdateActivityOptionsResponse + (*PauseActivityRequest)(nil), // 146: temporal.server.api.historyservice.v1.PauseActivityRequest + (*PauseActivityResponse)(nil), // 147: temporal.server.api.historyservice.v1.PauseActivityResponse + (*UnpauseActivityRequest)(nil), // 148: temporal.server.api.historyservice.v1.UnpauseActivityRequest + (*UnpauseActivityResponse)(nil), // 149: temporal.server.api.historyservice.v1.UnpauseActivityResponse + (*ResetActivityRequest)(nil), // 150: temporal.server.api.historyservice.v1.ResetActivityRequest + (*ResetActivityResponse)(nil), // 151: temporal.server.api.historyservice.v1.ResetActivityResponse + (*UpdateWorkflowExecutionOptionsRequest)(nil), // 152: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsRequest + (*UpdateWorkflowExecutionOptionsResponse)(nil), // 153: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsResponse + (*PauseWorkflowExecutionRequest)(nil), // 154: temporal.server.api.historyservice.v1.PauseWorkflowExecutionRequest + (*PauseWorkflowExecutionResponse)(nil), // 155: temporal.server.api.historyservice.v1.PauseWorkflowExecutionResponse + (*UnpauseWorkflowExecutionRequest)(nil), // 156: temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionRequest + (*UnpauseWorkflowExecutionResponse)(nil), // 157: temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionResponse + (*StartNexusOperationRequest)(nil), // 158: temporal.server.api.historyservice.v1.StartNexusOperationRequest + (*StartNexusOperationResponse)(nil), // 159: temporal.server.api.historyservice.v1.StartNexusOperationResponse + (*CancelNexusOperationRequest)(nil), // 160: temporal.server.api.historyservice.v1.CancelNexusOperationRequest + (*CancelNexusOperationResponse)(nil), // 161: temporal.server.api.historyservice.v1.CancelNexusOperationResponse + (*ExecuteMultiOperationRequest_Operation)(nil), // 162: temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.Operation + (*ExecuteMultiOperationResponse_Response)(nil), // 163: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.Response + nil, // 164: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.QueriesEntry + nil, // 165: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.QueriesEntry + nil, // 166: temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.ShardMessagesEntry + nil, // 167: temporal.server.api.historyservice.v1.ShardReplicationStatus.RemoteClustersEntry + nil, // 168: temporal.server.api.historyservice.v1.ShardReplicationStatus.HandoverNamespacesEntry + (*ListQueuesResponse_QueueInfo)(nil), // 169: temporal.server.api.historyservice.v1.ListQueuesResponse.QueueInfo + (*AddTasksRequest_Task)(nil), // 170: temporal.server.api.historyservice.v1.AddTasksRequest.Task + (*v1.StartWorkflowExecutionRequest)(nil), // 171: temporal.api.workflowservice.v1.StartWorkflowExecutionRequest + (*v11.ParentExecutionInfo)(nil), // 172: temporal.server.api.workflow.v1.ParentExecutionInfo + (*timestamppb.Timestamp)(nil), // 173: google.protobuf.Timestamp + (v12.ContinueAsNewInitiator)(0), // 174: temporal.api.enums.v1.ContinueAsNewInitiator + (*v13.Failure)(nil), // 175: temporal.api.failure.v1.Failure + (*v14.Payloads)(nil), // 176: temporal.api.common.v1.Payloads + (*durationpb.Duration)(nil), // 177: google.protobuf.Duration + (*v14.WorkerVersionStamp)(nil), // 178: temporal.api.common.v1.WorkerVersionStamp + (*v11.RootExecutionInfo)(nil), // 179: temporal.server.api.workflow.v1.RootExecutionInfo + (*v15.VersioningOverride)(nil), // 180: temporal.api.workflow.v1.VersioningOverride + (*v16.WorkerDeploymentVersion)(nil), // 181: temporal.api.deployment.v1.WorkerDeploymentVersion + (*v16.InheritedAutoUpgradeInfo)(nil), // 182: temporal.api.deployment.v1.InheritedAutoUpgradeInfo + (*v17.DeclinedTargetVersionUpgrade)(nil), // 183: temporal.api.history.v1.DeclinedTargetVersionUpgrade + (*v18.VectorClock)(nil), // 184: temporal.server.api.clock.v1.VectorClock + (*v1.PollWorkflowTaskQueueResponse)(nil), // 185: temporal.api.workflowservice.v1.PollWorkflowTaskQueueResponse + (v12.WorkflowExecutionStatus)(0), // 186: temporal.api.enums.v1.WorkflowExecutionStatus + (*v14.Link)(nil), // 187: temporal.api.common.v1.Link + (*v14.WorkflowExecution)(nil), // 188: temporal.api.common.v1.WorkflowExecution + (*v19.VersionHistoryItem)(nil), // 189: temporal.server.api.history.v1.VersionHistoryItem + (*v110.VersionedTransition)(nil), // 190: temporal.server.api.persistence.v1.VersionedTransition + (*v14.WorkflowType)(nil), // 191: temporal.api.common.v1.WorkflowType + (*v111.TaskQueue)(nil), // 192: temporal.api.taskqueue.v1.TaskQueue + (v112.WorkflowExecutionState)(0), // 193: temporal.server.api.enums.v1.WorkflowExecutionState + (*v19.VersionHistories)(nil), // 194: temporal.server.api.history.v1.VersionHistories + (*v15.WorkflowExecutionVersioningInfo)(nil), // 195: temporal.api.workflow.v1.WorkflowExecutionVersioningInfo + (*v19.TransientWorkflowTaskInfo)(nil), // 196: temporal.server.api.history.v1.TransientWorkflowTaskInfo + (*v1.PollWorkflowTaskQueueRequest)(nil), // 197: temporal.api.workflowservice.v1.PollWorkflowTaskQueueRequest + (*v113.BuildIdRedirectInfo)(nil), // 198: temporal.server.api.taskqueue.v1.BuildIdRedirectInfo + (*v16.Deployment)(nil), // 199: temporal.api.deployment.v1.Deployment + (*v113.TaskVersionDirective)(nil), // 200: temporal.server.api.taskqueue.v1.TaskVersionDirective + (*v115.Message)(nil), // 201: temporal.api.protocol.v1.Message + (*v17.History)(nil), // 202: temporal.api.history.v1.History + (*v1.PollActivityTaskQueueRequest)(nil), // 203: temporal.api.workflowservice.v1.PollActivityTaskQueueRequest + (*v17.HistoryEvent)(nil), // 204: temporal.api.history.v1.HistoryEvent + (*v14.Priority)(nil), // 205: temporal.api.common.v1.Priority + (*v14.RetryPolicy)(nil), // 206: temporal.api.common.v1.RetryPolicy + (*v1.RespondWorkflowTaskCompletedRequest)(nil), // 207: temporal.api.workflowservice.v1.RespondWorkflowTaskCompletedRequest + (*v1.PollActivityTaskQueueResponse)(nil), // 208: temporal.api.workflowservice.v1.PollActivityTaskQueueResponse + (*v1.RespondWorkflowTaskFailedRequest)(nil), // 209: temporal.api.workflowservice.v1.RespondWorkflowTaskFailedRequest + (*v1.RecordActivityTaskHeartbeatRequest)(nil), // 210: temporal.api.workflowservice.v1.RecordActivityTaskHeartbeatRequest + (*v1.RespondActivityTaskCompletedRequest)(nil), // 211: temporal.api.workflowservice.v1.RespondActivityTaskCompletedRequest + (*v1.RespondActivityTaskFailedRequest)(nil), // 212: temporal.api.workflowservice.v1.RespondActivityTaskFailedRequest + (*v1.RespondActivityTaskCanceledRequest)(nil), // 213: temporal.api.workflowservice.v1.RespondActivityTaskCanceledRequest + (*v1.SignalWorkflowExecutionRequest)(nil), // 214: temporal.api.workflowservice.v1.SignalWorkflowExecutionRequest + (*v1.SignalWithStartWorkflowExecutionRequest)(nil), // 215: temporal.api.workflowservice.v1.SignalWithStartWorkflowExecutionRequest + (*v1.TerminateWorkflowExecutionRequest)(nil), // 216: temporal.api.workflowservice.v1.TerminateWorkflowExecutionRequest + (*v1.ResetWorkflowExecutionRequest)(nil), // 217: temporal.api.workflowservice.v1.ResetWorkflowExecutionRequest + (*v1.RequestCancelWorkflowExecutionRequest)(nil), // 218: temporal.api.workflowservice.v1.RequestCancelWorkflowExecutionRequest + (*v1.DescribeWorkflowExecutionRequest)(nil), // 219: temporal.api.workflowservice.v1.DescribeWorkflowExecutionRequest + (*v15.WorkflowExecutionConfig)(nil), // 220: temporal.api.workflow.v1.WorkflowExecutionConfig + (*v15.WorkflowExecutionInfo)(nil), // 221: temporal.api.workflow.v1.WorkflowExecutionInfo + (*v15.PendingActivityInfo)(nil), // 222: temporal.api.workflow.v1.PendingActivityInfo + (*v15.PendingChildExecutionInfo)(nil), // 223: temporal.api.workflow.v1.PendingChildExecutionInfo + (*v15.PendingWorkflowTaskInfo)(nil), // 224: temporal.api.workflow.v1.PendingWorkflowTaskInfo + (*v15.CallbackInfo)(nil), // 225: temporal.api.workflow.v1.CallbackInfo + (*v15.PendingNexusOperationInfo)(nil), // 226: temporal.api.workflow.v1.PendingNexusOperationInfo + (*v15.WorkflowExecutionExtendedInfo)(nil), // 227: temporal.api.workflow.v1.WorkflowExecutionExtendedInfo + (*v14.DataBlob)(nil), // 228: temporal.api.common.v1.DataBlob + (*v11.BaseExecutionInfo)(nil), // 229: temporal.server.api.workflow.v1.BaseExecutionInfo + (*v110.WorkflowMutableState)(nil), // 230: temporal.server.api.persistence.v1.WorkflowMutableState + (*v19.VersionHistory)(nil), // 231: temporal.server.api.history.v1.VersionHistory + (*v116.NamespaceCacheInfo)(nil), // 232: temporal.server.api.namespace.v1.NamespaceCacheInfo + (*v110.ShardInfo)(nil), // 233: temporal.server.api.persistence.v1.ShardInfo + (*v117.ReplicationToken)(nil), // 234: temporal.server.api.replication.v1.ReplicationToken + (*v117.ReplicationTaskInfo)(nil), // 235: temporal.server.api.replication.v1.ReplicationTaskInfo + (*v117.ReplicationTask)(nil), // 236: temporal.server.api.replication.v1.ReplicationTask + (*v1.QueryWorkflowRequest)(nil), // 237: temporal.api.workflowservice.v1.QueryWorkflowRequest + (*v1.QueryWorkflowResponse)(nil), // 238: temporal.api.workflowservice.v1.QueryWorkflowResponse + (*v118.ReapplyEventsRequest)(nil), // 239: temporal.server.api.adminservice.v1.ReapplyEventsRequest + (v112.DeadLetterQueueType)(0), // 240: temporal.server.api.enums.v1.DeadLetterQueueType + (*v118.RefreshWorkflowTasksRequest)(nil), // 241: temporal.server.api.adminservice.v1.RefreshWorkflowTasksRequest + (*v1.UpdateWorkflowExecutionRequest)(nil), // 242: temporal.api.workflowservice.v1.UpdateWorkflowExecutionRequest + (*v1.UpdateWorkflowExecutionResponse)(nil), // 243: temporal.api.workflowservice.v1.UpdateWorkflowExecutionResponse + (*v117.SyncReplicationState)(nil), // 244: temporal.server.api.replication.v1.SyncReplicationState + (*v117.WorkflowReplicationMessages)(nil), // 245: temporal.server.api.replication.v1.WorkflowReplicationMessages + (*v1.PollWorkflowExecutionUpdateRequest)(nil), // 246: temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateRequest + (*v1.PollWorkflowExecutionUpdateResponse)(nil), // 247: temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateResponse + (*v1.GetWorkflowExecutionHistoryRequest)(nil), // 248: temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryRequest + (*v1.GetWorkflowExecutionHistoryResponse)(nil), // 249: temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryResponse + (*v1.GetWorkflowExecutionHistoryReverseRequest)(nil), // 250: temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryReverseRequest + (*v1.GetWorkflowExecutionHistoryReverseResponse)(nil), // 251: temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryReverseResponse + (*v118.GetWorkflowExecutionRawHistoryV2Request)(nil), // 252: temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryV2Request + (*v118.GetWorkflowExecutionRawHistoryV2Response)(nil), // 253: temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryV2Response + (*v118.GetWorkflowExecutionRawHistoryRequest)(nil), // 254: temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryRequest + (*v118.GetWorkflowExecutionRawHistoryResponse)(nil), // 255: temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryResponse + (*v118.DeleteWorkflowExecutionRequest)(nil), // 256: temporal.server.api.adminservice.v1.DeleteWorkflowExecutionRequest + (*v118.DeleteWorkflowExecutionResponse)(nil), // 257: temporal.server.api.adminservice.v1.DeleteWorkflowExecutionResponse + (*v119.HistoryDLQKey)(nil), // 258: temporal.server.api.common.v1.HistoryDLQKey + (*v119.HistoryDLQTask)(nil), // 259: temporal.server.api.common.v1.HistoryDLQTask + (*v119.HistoryDLQTaskMetadata)(nil), // 260: temporal.server.api.common.v1.HistoryDLQTaskMetadata + (*v118.ListHistoryTasksRequest)(nil), // 261: temporal.server.api.adminservice.v1.ListHistoryTasksRequest + (*v118.ListHistoryTasksResponse)(nil), // 262: temporal.server.api.adminservice.v1.ListHistoryTasksResponse + (*v120.NexusOperationCompletion)(nil), // 263: temporal.server.api.token.v1.NexusOperationCompletion + (*v14.Payload)(nil), // 264: temporal.api.common.v1.Payload + (*v121.Failure)(nil), // 265: temporal.api.nexus.v1.Failure + (*v110.StateMachineRef)(nil), // 266: temporal.server.api.persistence.v1.StateMachineRef + (v112.HealthState)(0), // 267: temporal.server.api.enums.v1.HealthState + (*v122.HealthCheck)(nil), // 268: temporal.server.api.health.v1.HealthCheck + (*v117.VersionedTransitionArtifact)(nil), // 269: temporal.server.api.replication.v1.VersionedTransitionArtifact + (*v1.UpdateActivityOptionsRequest)(nil), // 270: temporal.api.workflowservice.v1.UpdateActivityOptionsRequest + (*v123.ActivityOptions)(nil), // 271: temporal.api.activity.v1.ActivityOptions + (*v1.PauseActivityRequest)(nil), // 272: temporal.api.workflowservice.v1.PauseActivityRequest + (*v1.UnpauseActivityRequest)(nil), // 273: temporal.api.workflowservice.v1.UnpauseActivityRequest + (*v1.ResetActivityRequest)(nil), // 274: temporal.api.workflowservice.v1.ResetActivityRequest + (*v1.UpdateWorkflowExecutionOptionsRequest)(nil), // 275: temporal.api.workflowservice.v1.UpdateWorkflowExecutionOptionsRequest + (*v15.WorkflowExecutionOptions)(nil), // 276: temporal.api.workflow.v1.WorkflowExecutionOptions + (*v1.PauseWorkflowExecutionRequest)(nil), // 277: temporal.api.workflowservice.v1.PauseWorkflowExecutionRequest + (*v1.UnpauseWorkflowExecutionRequest)(nil), // 278: temporal.api.workflowservice.v1.UnpauseWorkflowExecutionRequest + (*v121.StartOperationRequest)(nil), // 279: temporal.api.nexus.v1.StartOperationRequest + (*v121.StartOperationResponse)(nil), // 280: temporal.api.nexus.v1.StartOperationResponse + (*v121.CancelOperationRequest)(nil), // 281: temporal.api.nexus.v1.CancelOperationRequest + (*v121.CancelOperationResponse)(nil), // 282: temporal.api.nexus.v1.CancelOperationResponse + (*v114.WorkflowQuery)(nil), // 283: temporal.api.query.v1.WorkflowQuery + (*v117.ReplicationMessages)(nil), // 284: temporal.server.api.replication.v1.ReplicationMessages + (*descriptorpb.MessageOptions)(nil), // 285: google.protobuf.MessageOptions } var file_temporal_server_api_historyservice_v1_request_response_proto_depIdxs = []int32{ - 169, // 0: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.start_request:type_name -> temporal.api.workflowservice.v1.StartWorkflowExecutionRequest - 170, // 1: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.parent_execution_info:type_name -> temporal.server.api.workflow.v1.ParentExecutionInfo - 171, // 2: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.workflow_execution_expiration_time:type_name -> google.protobuf.Timestamp - 172, // 3: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.continue_as_new_initiator:type_name -> temporal.api.enums.v1.ContinueAsNewInitiator - 173, // 4: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.continued_failure:type_name -> temporal.api.failure.v1.Failure - 174, // 5: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.last_completion_result:type_name -> temporal.api.common.v1.Payloads - 175, // 6: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.first_workflow_task_backoff:type_name -> google.protobuf.Duration - 176, // 7: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.source_version_stamp:type_name -> temporal.api.common.v1.WorkerVersionStamp - 177, // 8: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.root_execution_info:type_name -> temporal.server.api.workflow.v1.RootExecutionInfo - 178, // 9: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.versioning_override:type_name -> temporal.api.workflow.v1.VersioningOverride - 179, // 10: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.inherited_pinned_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion - 180, // 11: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.inherited_auto_upgrade_info:type_name -> temporal.api.deployment.v1.InheritedAutoUpgradeInfo - 181, // 12: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.declined_target_version_upgrade:type_name -> temporal.api.history.v1.DeclinedTargetVersionUpgrade - 175, // 13: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.initial_skipped_duration:type_name -> google.protobuf.Duration - 182, // 14: temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 183, // 15: temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse.eager_workflow_task:type_name -> temporal.api.workflowservice.v1.PollWorkflowTaskQueueResponse - 184, // 16: temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse.status:type_name -> temporal.api.enums.v1.WorkflowExecutionStatus - 185, // 17: temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse.link:type_name -> temporal.api.common.v1.Link - 186, // 18: temporal.server.api.historyservice.v1.GetMutableStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 187, // 19: temporal.server.api.historyservice.v1.GetMutableStateRequest.version_history_item:type_name -> temporal.server.api.history.v1.VersionHistoryItem - 188, // 20: temporal.server.api.historyservice.v1.GetMutableStateRequest.versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition - 186, // 21: temporal.server.api.historyservice.v1.GetMutableStateResponse.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 189, // 22: temporal.server.api.historyservice.v1.GetMutableStateResponse.workflow_type:type_name -> temporal.api.common.v1.WorkflowType - 190, // 23: temporal.server.api.historyservice.v1.GetMutableStateResponse.task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue - 190, // 24: temporal.server.api.historyservice.v1.GetMutableStateResponse.sticky_task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue - 175, // 25: temporal.server.api.historyservice.v1.GetMutableStateResponse.sticky_task_queue_schedule_to_start_timeout:type_name -> google.protobuf.Duration - 191, // 26: temporal.server.api.historyservice.v1.GetMutableStateResponse.workflow_state:type_name -> temporal.server.api.enums.v1.WorkflowExecutionState - 184, // 27: temporal.server.api.historyservice.v1.GetMutableStateResponse.workflow_status:type_name -> temporal.api.enums.v1.WorkflowExecutionStatus - 192, // 28: temporal.server.api.historyservice.v1.GetMutableStateResponse.version_histories:type_name -> temporal.server.api.history.v1.VersionHistories - 176, // 29: temporal.server.api.historyservice.v1.GetMutableStateResponse.most_recent_worker_version_stamp:type_name -> temporal.api.common.v1.WorkerVersionStamp - 188, // 30: temporal.server.api.historyservice.v1.GetMutableStateResponse.transition_history:type_name -> temporal.server.api.persistence.v1.VersionedTransition - 193, // 31: temporal.server.api.historyservice.v1.GetMutableStateResponse.versioning_info:type_name -> temporal.api.workflow.v1.WorkflowExecutionVersioningInfo - 194, // 32: temporal.server.api.historyservice.v1.GetMutableStateResponse.transient_or_speculative_tasks:type_name -> temporal.server.api.history.v1.TransientWorkflowTaskInfo - 186, // 33: temporal.server.api.historyservice.v1.PollMutableStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 187, // 34: temporal.server.api.historyservice.v1.PollMutableStateRequest.version_history_item:type_name -> temporal.server.api.history.v1.VersionHistoryItem - 186, // 35: temporal.server.api.historyservice.v1.PollMutableStateResponse.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 189, // 36: temporal.server.api.historyservice.v1.PollMutableStateResponse.workflow_type:type_name -> temporal.api.common.v1.WorkflowType - 190, // 37: temporal.server.api.historyservice.v1.PollMutableStateResponse.task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue - 190, // 38: temporal.server.api.historyservice.v1.PollMutableStateResponse.sticky_task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue - 175, // 39: temporal.server.api.historyservice.v1.PollMutableStateResponse.sticky_task_queue_schedule_to_start_timeout:type_name -> google.protobuf.Duration - 192, // 40: temporal.server.api.historyservice.v1.PollMutableStateResponse.version_histories:type_name -> temporal.server.api.history.v1.VersionHistories - 191, // 41: temporal.server.api.historyservice.v1.PollMutableStateResponse.workflow_state:type_name -> temporal.server.api.enums.v1.WorkflowExecutionState - 184, // 42: temporal.server.api.historyservice.v1.PollMutableStateResponse.workflow_status:type_name -> temporal.api.enums.v1.WorkflowExecutionStatus - 186, // 43: temporal.server.api.historyservice.v1.ResetStickyTaskQueueRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 160, // 44: temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.operations:type_name -> temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.Operation - 161, // 45: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.responses:type_name -> temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.Response - 186, // 46: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 195, // 47: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.poll_request:type_name -> temporal.api.workflowservice.v1.PollWorkflowTaskQueueRequest - 182, // 48: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 196, // 49: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.build_id_redirect_info:type_name -> temporal.server.api.taskqueue.v1.BuildIdRedirectInfo - 197, // 50: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.scheduled_deployment:type_name -> temporal.api.deployment.v1.Deployment - 198, // 51: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.version_directive:type_name -> temporal.server.api.taskqueue.v1.TaskVersionDirective - 179, // 52: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.target_deployment_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion - 189, // 53: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.workflow_type:type_name -> temporal.api.common.v1.WorkflowType - 194, // 54: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.transient_workflow_task:type_name -> temporal.server.api.history.v1.TransientWorkflowTaskInfo - 190, // 55: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.workflow_execution_task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue - 171, // 56: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.scheduled_time:type_name -> google.protobuf.Timestamp - 171, // 57: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.started_time:type_name -> google.protobuf.Timestamp - 162, // 58: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.queries:type_name -> temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.QueriesEntry - 182, // 59: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 199, // 60: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.messages:type_name -> temporal.api.protocol.v1.Message - 200, // 61: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.history:type_name -> temporal.api.history.v1.History - 200, // 62: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.raw_history:type_name -> temporal.api.history.v1.History - 189, // 63: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.workflow_type:type_name -> temporal.api.common.v1.WorkflowType - 194, // 64: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.transient_workflow_task:type_name -> temporal.server.api.history.v1.TransientWorkflowTaskInfo - 190, // 65: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.workflow_execution_task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue - 171, // 66: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.scheduled_time:type_name -> google.protobuf.Timestamp - 171, // 67: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.started_time:type_name -> google.protobuf.Timestamp - 163, // 68: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.queries:type_name -> temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.QueriesEntry - 182, // 69: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 199, // 70: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.messages:type_name -> temporal.api.protocol.v1.Message - 200, // 71: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.history:type_name -> temporal.api.history.v1.History - 186, // 72: temporal.server.api.historyservice.v1.RecordActivityTaskStartedRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 201, // 73: temporal.server.api.historyservice.v1.RecordActivityTaskStartedRequest.poll_request:type_name -> temporal.api.workflowservice.v1.PollActivityTaskQueueRequest - 182, // 74: temporal.server.api.historyservice.v1.RecordActivityTaskStartedRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 196, // 75: temporal.server.api.historyservice.v1.RecordActivityTaskStartedRequest.build_id_redirect_info:type_name -> temporal.server.api.taskqueue.v1.BuildIdRedirectInfo - 197, // 76: temporal.server.api.historyservice.v1.RecordActivityTaskStartedRequest.scheduled_deployment:type_name -> temporal.api.deployment.v1.Deployment - 198, // 77: temporal.server.api.historyservice.v1.RecordActivityTaskStartedRequest.version_directive:type_name -> temporal.server.api.taskqueue.v1.TaskVersionDirective - 202, // 78: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.scheduled_event:type_name -> temporal.api.history.v1.HistoryEvent - 171, // 79: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.started_time:type_name -> google.protobuf.Timestamp - 171, // 80: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.current_attempt_scheduled_time:type_name -> google.protobuf.Timestamp - 174, // 81: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.heartbeat_details:type_name -> temporal.api.common.v1.Payloads - 189, // 82: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.workflow_type:type_name -> temporal.api.common.v1.WorkflowType - 182, // 83: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 203, // 84: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.priority:type_name -> temporal.api.common.v1.Priority - 204, // 85: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.retry_policy:type_name -> temporal.api.common.v1.RetryPolicy - 205, // 86: temporal.server.api.historyservice.v1.RespondWorkflowTaskCompletedRequest.complete_request:type_name -> temporal.api.workflowservice.v1.RespondWorkflowTaskCompletedRequest + 171, // 0: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.start_request:type_name -> temporal.api.workflowservice.v1.StartWorkflowExecutionRequest + 172, // 1: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.parent_execution_info:type_name -> temporal.server.api.workflow.v1.ParentExecutionInfo + 173, // 2: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.workflow_execution_expiration_time:type_name -> google.protobuf.Timestamp + 174, // 3: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.continue_as_new_initiator:type_name -> temporal.api.enums.v1.ContinueAsNewInitiator + 175, // 4: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.continued_failure:type_name -> temporal.api.failure.v1.Failure + 176, // 5: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.last_completion_result:type_name -> temporal.api.common.v1.Payloads + 177, // 6: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.first_workflow_task_backoff:type_name -> google.protobuf.Duration + 178, // 7: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.source_version_stamp:type_name -> temporal.api.common.v1.WorkerVersionStamp + 179, // 8: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.root_execution_info:type_name -> temporal.server.api.workflow.v1.RootExecutionInfo + 180, // 9: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.versioning_override:type_name -> temporal.api.workflow.v1.VersioningOverride + 181, // 10: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.inherited_pinned_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion + 182, // 11: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.inherited_auto_upgrade_info:type_name -> temporal.api.deployment.v1.InheritedAutoUpgradeInfo + 183, // 12: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.declined_target_version_upgrade:type_name -> temporal.api.history.v1.DeclinedTargetVersionUpgrade + 177, // 13: temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest.initial_skipped_duration:type_name -> google.protobuf.Duration + 184, // 14: temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 185, // 15: temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse.eager_workflow_task:type_name -> temporal.api.workflowservice.v1.PollWorkflowTaskQueueResponse + 186, // 16: temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse.status:type_name -> temporal.api.enums.v1.WorkflowExecutionStatus + 187, // 17: temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse.link:type_name -> temporal.api.common.v1.Link + 188, // 18: temporal.server.api.historyservice.v1.GetMutableStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 189, // 19: temporal.server.api.historyservice.v1.GetMutableStateRequest.version_history_item:type_name -> temporal.server.api.history.v1.VersionHistoryItem + 190, // 20: temporal.server.api.historyservice.v1.GetMutableStateRequest.versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition + 188, // 21: temporal.server.api.historyservice.v1.GetMutableStateResponse.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 191, // 22: temporal.server.api.historyservice.v1.GetMutableStateResponse.workflow_type:type_name -> temporal.api.common.v1.WorkflowType + 192, // 23: temporal.server.api.historyservice.v1.GetMutableStateResponse.task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue + 192, // 24: temporal.server.api.historyservice.v1.GetMutableStateResponse.sticky_task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue + 177, // 25: temporal.server.api.historyservice.v1.GetMutableStateResponse.sticky_task_queue_schedule_to_start_timeout:type_name -> google.protobuf.Duration + 193, // 26: temporal.server.api.historyservice.v1.GetMutableStateResponse.workflow_state:type_name -> temporal.server.api.enums.v1.WorkflowExecutionState + 186, // 27: temporal.server.api.historyservice.v1.GetMutableStateResponse.workflow_status:type_name -> temporal.api.enums.v1.WorkflowExecutionStatus + 194, // 28: temporal.server.api.historyservice.v1.GetMutableStateResponse.version_histories:type_name -> temporal.server.api.history.v1.VersionHistories + 178, // 29: temporal.server.api.historyservice.v1.GetMutableStateResponse.most_recent_worker_version_stamp:type_name -> temporal.api.common.v1.WorkerVersionStamp + 190, // 30: temporal.server.api.historyservice.v1.GetMutableStateResponse.transition_history:type_name -> temporal.server.api.persistence.v1.VersionedTransition + 195, // 31: temporal.server.api.historyservice.v1.GetMutableStateResponse.versioning_info:type_name -> temporal.api.workflow.v1.WorkflowExecutionVersioningInfo + 196, // 32: temporal.server.api.historyservice.v1.GetMutableStateResponse.transient_or_speculative_tasks:type_name -> temporal.server.api.history.v1.TransientWorkflowTaskInfo + 188, // 33: temporal.server.api.historyservice.v1.PollMutableStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 189, // 34: temporal.server.api.historyservice.v1.PollMutableStateRequest.version_history_item:type_name -> temporal.server.api.history.v1.VersionHistoryItem + 188, // 35: temporal.server.api.historyservice.v1.PollMutableStateResponse.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 191, // 36: temporal.server.api.historyservice.v1.PollMutableStateResponse.workflow_type:type_name -> temporal.api.common.v1.WorkflowType + 192, // 37: temporal.server.api.historyservice.v1.PollMutableStateResponse.task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue + 192, // 38: temporal.server.api.historyservice.v1.PollMutableStateResponse.sticky_task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue + 177, // 39: temporal.server.api.historyservice.v1.PollMutableStateResponse.sticky_task_queue_schedule_to_start_timeout:type_name -> google.protobuf.Duration + 194, // 40: temporal.server.api.historyservice.v1.PollMutableStateResponse.version_histories:type_name -> temporal.server.api.history.v1.VersionHistories + 193, // 41: temporal.server.api.historyservice.v1.PollMutableStateResponse.workflow_state:type_name -> temporal.server.api.enums.v1.WorkflowExecutionState + 186, // 42: temporal.server.api.historyservice.v1.PollMutableStateResponse.workflow_status:type_name -> temporal.api.enums.v1.WorkflowExecutionStatus + 188, // 43: temporal.server.api.historyservice.v1.ResetStickyTaskQueueRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 162, // 44: temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.operations:type_name -> temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.Operation + 163, // 45: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.responses:type_name -> temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.Response + 188, // 46: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 197, // 47: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.poll_request:type_name -> temporal.api.workflowservice.v1.PollWorkflowTaskQueueRequest + 184, // 48: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 198, // 49: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.build_id_redirect_info:type_name -> temporal.server.api.taskqueue.v1.BuildIdRedirectInfo + 199, // 50: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.scheduled_deployment:type_name -> temporal.api.deployment.v1.Deployment + 200, // 51: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.version_directive:type_name -> temporal.server.api.taskqueue.v1.TaskVersionDirective + 181, // 52: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedRequest.target_deployment_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion + 191, // 53: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.workflow_type:type_name -> temporal.api.common.v1.WorkflowType + 196, // 54: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.transient_workflow_task:type_name -> temporal.server.api.history.v1.TransientWorkflowTaskInfo + 192, // 55: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.workflow_execution_task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue + 173, // 56: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.scheduled_time:type_name -> google.protobuf.Timestamp + 173, // 57: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.started_time:type_name -> google.protobuf.Timestamp + 164, // 58: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.queries:type_name -> temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.QueriesEntry + 184, // 59: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 201, // 60: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.messages:type_name -> temporal.api.protocol.v1.Message + 202, // 61: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.history:type_name -> temporal.api.history.v1.History + 202, // 62: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.raw_history:type_name -> temporal.api.history.v1.History + 191, // 63: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.workflow_type:type_name -> temporal.api.common.v1.WorkflowType + 196, // 64: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.transient_workflow_task:type_name -> temporal.server.api.history.v1.TransientWorkflowTaskInfo + 192, // 65: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.workflow_execution_task_queue:type_name -> temporal.api.taskqueue.v1.TaskQueue + 173, // 66: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.scheduled_time:type_name -> google.protobuf.Timestamp + 173, // 67: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.started_time:type_name -> google.protobuf.Timestamp + 165, // 68: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.queries:type_name -> temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.QueriesEntry + 184, // 69: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 201, // 70: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.messages:type_name -> temporal.api.protocol.v1.Message + 202, // 71: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.history:type_name -> temporal.api.history.v1.History + 188, // 72: temporal.server.api.historyservice.v1.RecordActivityTaskStartedRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 203, // 73: temporal.server.api.historyservice.v1.RecordActivityTaskStartedRequest.poll_request:type_name -> temporal.api.workflowservice.v1.PollActivityTaskQueueRequest + 184, // 74: temporal.server.api.historyservice.v1.RecordActivityTaskStartedRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 198, // 75: temporal.server.api.historyservice.v1.RecordActivityTaskStartedRequest.build_id_redirect_info:type_name -> temporal.server.api.taskqueue.v1.BuildIdRedirectInfo + 199, // 76: temporal.server.api.historyservice.v1.RecordActivityTaskStartedRequest.scheduled_deployment:type_name -> temporal.api.deployment.v1.Deployment + 200, // 77: temporal.server.api.historyservice.v1.RecordActivityTaskStartedRequest.version_directive:type_name -> temporal.server.api.taskqueue.v1.TaskVersionDirective + 204, // 78: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.scheduled_event:type_name -> temporal.api.history.v1.HistoryEvent + 173, // 79: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.started_time:type_name -> google.protobuf.Timestamp + 173, // 80: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.current_attempt_scheduled_time:type_name -> google.protobuf.Timestamp + 176, // 81: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.heartbeat_details:type_name -> temporal.api.common.v1.Payloads + 191, // 82: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.workflow_type:type_name -> temporal.api.common.v1.WorkflowType + 184, // 83: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 205, // 84: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.priority:type_name -> temporal.api.common.v1.Priority + 206, // 85: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse.retry_policy:type_name -> temporal.api.common.v1.RetryPolicy + 207, // 86: temporal.server.api.historyservice.v1.RespondWorkflowTaskCompletedRequest.complete_request:type_name -> temporal.api.workflowservice.v1.RespondWorkflowTaskCompletedRequest 12, // 87: temporal.server.api.historyservice.v1.RespondWorkflowTaskCompletedResponse.started_response:type_name -> temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse - 206, // 88: temporal.server.api.historyservice.v1.RespondWorkflowTaskCompletedResponse.activity_tasks:type_name -> temporal.api.workflowservice.v1.PollActivityTaskQueueResponse - 183, // 89: temporal.server.api.historyservice.v1.RespondWorkflowTaskCompletedResponse.new_workflow_task:type_name -> temporal.api.workflowservice.v1.PollWorkflowTaskQueueResponse - 207, // 90: temporal.server.api.historyservice.v1.RespondWorkflowTaskFailedRequest.failed_request:type_name -> temporal.api.workflowservice.v1.RespondWorkflowTaskFailedRequest - 186, // 91: temporal.server.api.historyservice.v1.IsWorkflowTaskValidRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 182, // 92: temporal.server.api.historyservice.v1.IsWorkflowTaskValidRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 208, // 93: temporal.server.api.historyservice.v1.RecordActivityTaskHeartbeatRequest.heartbeat_request:type_name -> temporal.api.workflowservice.v1.RecordActivityTaskHeartbeatRequest - 209, // 94: temporal.server.api.historyservice.v1.RespondActivityTaskCompletedRequest.complete_request:type_name -> temporal.api.workflowservice.v1.RespondActivityTaskCompletedRequest - 210, // 95: temporal.server.api.historyservice.v1.RespondActivityTaskFailedRequest.failed_request:type_name -> temporal.api.workflowservice.v1.RespondActivityTaskFailedRequest - 211, // 96: temporal.server.api.historyservice.v1.RespondActivityTaskCanceledRequest.cancel_request:type_name -> temporal.api.workflowservice.v1.RespondActivityTaskCanceledRequest - 186, // 97: temporal.server.api.historyservice.v1.IsActivityTaskValidRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 182, // 98: temporal.server.api.historyservice.v1.IsActivityTaskValidRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 212, // 99: temporal.server.api.historyservice.v1.SignalWorkflowExecutionRequest.signal_request:type_name -> temporal.api.workflowservice.v1.SignalWorkflowExecutionRequest - 186, // 100: temporal.server.api.historyservice.v1.SignalWorkflowExecutionRequest.external_workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 185, // 101: temporal.server.api.historyservice.v1.SignalWorkflowExecutionResponse.link:type_name -> temporal.api.common.v1.Link - 213, // 102: temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionRequest.signal_with_start_request:type_name -> temporal.api.workflowservice.v1.SignalWithStartWorkflowExecutionRequest - 185, // 103: temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionResponse.signal_link:type_name -> temporal.api.common.v1.Link - 186, // 104: temporal.server.api.historyservice.v1.RemoveSignalMutableStateRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 214, // 105: temporal.server.api.historyservice.v1.TerminateWorkflowExecutionRequest.terminate_request:type_name -> temporal.api.workflowservice.v1.TerminateWorkflowExecutionRequest - 186, // 106: temporal.server.api.historyservice.v1.TerminateWorkflowExecutionRequest.external_workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 186, // 107: temporal.server.api.historyservice.v1.DeleteWorkflowExecutionRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 215, // 108: temporal.server.api.historyservice.v1.ResetWorkflowExecutionRequest.reset_request:type_name -> temporal.api.workflowservice.v1.ResetWorkflowExecutionRequest - 216, // 109: temporal.server.api.historyservice.v1.RequestCancelWorkflowExecutionRequest.cancel_request:type_name -> temporal.api.workflowservice.v1.RequestCancelWorkflowExecutionRequest - 186, // 110: temporal.server.api.historyservice.v1.RequestCancelWorkflowExecutionRequest.external_workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 186, // 111: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 182, // 112: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskRequest.child_clock:type_name -> temporal.server.api.clock.v1.VectorClock - 182, // 113: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskRequest.parent_clock:type_name -> temporal.server.api.clock.v1.VectorClock - 186, // 114: temporal.server.api.historyservice.v1.VerifyFirstWorkflowTaskScheduledRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 182, // 115: temporal.server.api.historyservice.v1.VerifyFirstWorkflowTaskScheduledRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 186, // 116: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.parent_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 186, // 117: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.child_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 202, // 118: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.completion_event:type_name -> temporal.api.history.v1.HistoryEvent - 182, // 119: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 186, // 120: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedRequest.parent_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 186, // 121: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedRequest.child_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 182, // 122: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 217, // 123: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionRequest.request:type_name -> temporal.api.workflowservice.v1.DescribeWorkflowExecutionRequest - 218, // 124: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.execution_config:type_name -> temporal.api.workflow.v1.WorkflowExecutionConfig - 219, // 125: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.workflow_execution_info:type_name -> temporal.api.workflow.v1.WorkflowExecutionInfo - 220, // 126: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_activities:type_name -> temporal.api.workflow.v1.PendingActivityInfo - 221, // 127: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_children:type_name -> temporal.api.workflow.v1.PendingChildExecutionInfo - 222, // 128: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_workflow_task:type_name -> temporal.api.workflow.v1.PendingWorkflowTaskInfo - 223, // 129: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.callbacks:type_name -> temporal.api.workflow.v1.CallbackInfo - 224, // 130: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_nexus_operations:type_name -> temporal.api.workflow.v1.PendingNexusOperationInfo - 225, // 131: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.workflow_extended_info:type_name -> temporal.api.workflow.v1.WorkflowExecutionExtendedInfo - 186, // 132: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 187, // 133: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.version_history_items:type_name -> temporal.server.api.history.v1.VersionHistoryItem - 226, // 134: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.events:type_name -> temporal.api.common.v1.DataBlob - 226, // 135: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.new_run_events:type_name -> temporal.api.common.v1.DataBlob - 227, // 136: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.base_execution_info:type_name -> temporal.server.api.workflow.v1.BaseExecutionInfo - 228, // 137: temporal.server.api.historyservice.v1.ReplicateWorkflowStateRequest.workflow_state:type_name -> temporal.server.api.persistence.v1.WorkflowMutableState - 171, // 138: temporal.server.api.historyservice.v1.SyncShardStatusRequest.status_time:type_name -> google.protobuf.Timestamp - 171, // 139: temporal.server.api.historyservice.v1.SyncActivityRequest.scheduled_time:type_name -> google.protobuf.Timestamp - 171, // 140: temporal.server.api.historyservice.v1.SyncActivityRequest.started_time:type_name -> google.protobuf.Timestamp - 171, // 141: temporal.server.api.historyservice.v1.SyncActivityRequest.last_heartbeat_time:type_name -> google.protobuf.Timestamp - 174, // 142: temporal.server.api.historyservice.v1.SyncActivityRequest.details:type_name -> temporal.api.common.v1.Payloads - 173, // 143: temporal.server.api.historyservice.v1.SyncActivityRequest.last_failure:type_name -> temporal.api.failure.v1.Failure - 229, // 144: temporal.server.api.historyservice.v1.SyncActivityRequest.version_history:type_name -> temporal.server.api.history.v1.VersionHistory - 227, // 145: temporal.server.api.historyservice.v1.SyncActivityRequest.base_execution_info:type_name -> temporal.server.api.workflow.v1.BaseExecutionInfo - 171, // 146: temporal.server.api.historyservice.v1.SyncActivityRequest.first_scheduled_time:type_name -> google.protobuf.Timestamp - 171, // 147: temporal.server.api.historyservice.v1.SyncActivityRequest.last_attempt_complete_time:type_name -> google.protobuf.Timestamp - 175, // 148: temporal.server.api.historyservice.v1.SyncActivityRequest.retry_initial_interval:type_name -> google.protobuf.Duration - 175, // 149: temporal.server.api.historyservice.v1.SyncActivityRequest.retry_maximum_interval:type_name -> google.protobuf.Duration + 208, // 88: temporal.server.api.historyservice.v1.RespondWorkflowTaskCompletedResponse.activity_tasks:type_name -> temporal.api.workflowservice.v1.PollActivityTaskQueueResponse + 185, // 89: temporal.server.api.historyservice.v1.RespondWorkflowTaskCompletedResponse.new_workflow_task:type_name -> temporal.api.workflowservice.v1.PollWorkflowTaskQueueResponse + 209, // 90: temporal.server.api.historyservice.v1.RespondWorkflowTaskFailedRequest.failed_request:type_name -> temporal.api.workflowservice.v1.RespondWorkflowTaskFailedRequest + 188, // 91: temporal.server.api.historyservice.v1.IsWorkflowTaskValidRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 184, // 92: temporal.server.api.historyservice.v1.IsWorkflowTaskValidRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 210, // 93: temporal.server.api.historyservice.v1.RecordActivityTaskHeartbeatRequest.heartbeat_request:type_name -> temporal.api.workflowservice.v1.RecordActivityTaskHeartbeatRequest + 211, // 94: temporal.server.api.historyservice.v1.RespondActivityTaskCompletedRequest.complete_request:type_name -> temporal.api.workflowservice.v1.RespondActivityTaskCompletedRequest + 212, // 95: temporal.server.api.historyservice.v1.RespondActivityTaskFailedRequest.failed_request:type_name -> temporal.api.workflowservice.v1.RespondActivityTaskFailedRequest + 213, // 96: temporal.server.api.historyservice.v1.RespondActivityTaskCanceledRequest.cancel_request:type_name -> temporal.api.workflowservice.v1.RespondActivityTaskCanceledRequest + 188, // 97: temporal.server.api.historyservice.v1.IsActivityTaskValidRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 184, // 98: temporal.server.api.historyservice.v1.IsActivityTaskValidRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 214, // 99: temporal.server.api.historyservice.v1.SignalWorkflowExecutionRequest.signal_request:type_name -> temporal.api.workflowservice.v1.SignalWorkflowExecutionRequest + 188, // 100: temporal.server.api.historyservice.v1.SignalWorkflowExecutionRequest.external_workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 187, // 101: temporal.server.api.historyservice.v1.SignalWorkflowExecutionResponse.link:type_name -> temporal.api.common.v1.Link + 215, // 102: temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionRequest.signal_with_start_request:type_name -> temporal.api.workflowservice.v1.SignalWithStartWorkflowExecutionRequest + 187, // 103: temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionResponse.signal_link:type_name -> temporal.api.common.v1.Link + 188, // 104: temporal.server.api.historyservice.v1.RemoveSignalMutableStateRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 216, // 105: temporal.server.api.historyservice.v1.TerminateWorkflowExecutionRequest.terminate_request:type_name -> temporal.api.workflowservice.v1.TerminateWorkflowExecutionRequest + 188, // 106: temporal.server.api.historyservice.v1.TerminateWorkflowExecutionRequest.external_workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 188, // 107: temporal.server.api.historyservice.v1.DeleteWorkflowExecutionRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 217, // 108: temporal.server.api.historyservice.v1.ResetWorkflowExecutionRequest.reset_request:type_name -> temporal.api.workflowservice.v1.ResetWorkflowExecutionRequest + 218, // 109: temporal.server.api.historyservice.v1.RequestCancelWorkflowExecutionRequest.cancel_request:type_name -> temporal.api.workflowservice.v1.RequestCancelWorkflowExecutionRequest + 188, // 110: temporal.server.api.historyservice.v1.RequestCancelWorkflowExecutionRequest.external_workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 188, // 111: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 184, // 112: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskRequest.child_clock:type_name -> temporal.server.api.clock.v1.VectorClock + 184, // 113: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskRequest.parent_clock:type_name -> temporal.server.api.clock.v1.VectorClock + 188, // 114: temporal.server.api.historyservice.v1.VerifyFirstWorkflowTaskScheduledRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 184, // 115: temporal.server.api.historyservice.v1.VerifyFirstWorkflowTaskScheduledRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 188, // 116: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.parent_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 188, // 117: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.child_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 204, // 118: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.completion_event:type_name -> temporal.api.history.v1.HistoryEvent + 184, // 119: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 188, // 120: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedRequest.parent_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 188, // 121: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedRequest.child_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 184, // 122: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedRequest.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 219, // 123: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionRequest.request:type_name -> temporal.api.workflowservice.v1.DescribeWorkflowExecutionRequest + 220, // 124: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.execution_config:type_name -> temporal.api.workflow.v1.WorkflowExecutionConfig + 221, // 125: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.workflow_execution_info:type_name -> temporal.api.workflow.v1.WorkflowExecutionInfo + 222, // 126: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_activities:type_name -> temporal.api.workflow.v1.PendingActivityInfo + 223, // 127: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_children:type_name -> temporal.api.workflow.v1.PendingChildExecutionInfo + 224, // 128: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_workflow_task:type_name -> temporal.api.workflow.v1.PendingWorkflowTaskInfo + 225, // 129: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.callbacks:type_name -> temporal.api.workflow.v1.CallbackInfo + 226, // 130: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.pending_nexus_operations:type_name -> temporal.api.workflow.v1.PendingNexusOperationInfo + 227, // 131: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse.workflow_extended_info:type_name -> temporal.api.workflow.v1.WorkflowExecutionExtendedInfo + 188, // 132: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 189, // 133: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.version_history_items:type_name -> temporal.server.api.history.v1.VersionHistoryItem + 228, // 134: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.events:type_name -> temporal.api.common.v1.DataBlob + 228, // 135: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.new_run_events:type_name -> temporal.api.common.v1.DataBlob + 229, // 136: temporal.server.api.historyservice.v1.ReplicateEventsV2Request.base_execution_info:type_name -> temporal.server.api.workflow.v1.BaseExecutionInfo + 230, // 137: temporal.server.api.historyservice.v1.ReplicateWorkflowStateRequest.workflow_state:type_name -> temporal.server.api.persistence.v1.WorkflowMutableState + 173, // 138: temporal.server.api.historyservice.v1.SyncShardStatusRequest.status_time:type_name -> google.protobuf.Timestamp + 173, // 139: temporal.server.api.historyservice.v1.SyncActivityRequest.scheduled_time:type_name -> google.protobuf.Timestamp + 173, // 140: temporal.server.api.historyservice.v1.SyncActivityRequest.started_time:type_name -> google.protobuf.Timestamp + 173, // 141: temporal.server.api.historyservice.v1.SyncActivityRequest.last_heartbeat_time:type_name -> google.protobuf.Timestamp + 176, // 142: temporal.server.api.historyservice.v1.SyncActivityRequest.details:type_name -> temporal.api.common.v1.Payloads + 175, // 143: temporal.server.api.historyservice.v1.SyncActivityRequest.last_failure:type_name -> temporal.api.failure.v1.Failure + 231, // 144: temporal.server.api.historyservice.v1.SyncActivityRequest.version_history:type_name -> temporal.server.api.history.v1.VersionHistory + 229, // 145: temporal.server.api.historyservice.v1.SyncActivityRequest.base_execution_info:type_name -> temporal.server.api.workflow.v1.BaseExecutionInfo + 173, // 146: temporal.server.api.historyservice.v1.SyncActivityRequest.first_scheduled_time:type_name -> google.protobuf.Timestamp + 173, // 147: temporal.server.api.historyservice.v1.SyncActivityRequest.last_attempt_complete_time:type_name -> google.protobuf.Timestamp + 177, // 148: temporal.server.api.historyservice.v1.SyncActivityRequest.retry_initial_interval:type_name -> google.protobuf.Duration + 177, // 149: temporal.server.api.historyservice.v1.SyncActivityRequest.retry_maximum_interval:type_name -> google.protobuf.Duration 64, // 150: temporal.server.api.historyservice.v1.SyncActivitiesRequest.activities_info:type_name -> temporal.server.api.historyservice.v1.ActivitySyncInfo - 171, // 151: temporal.server.api.historyservice.v1.ActivitySyncInfo.scheduled_time:type_name -> google.protobuf.Timestamp - 171, // 152: temporal.server.api.historyservice.v1.ActivitySyncInfo.started_time:type_name -> google.protobuf.Timestamp - 171, // 153: temporal.server.api.historyservice.v1.ActivitySyncInfo.last_heartbeat_time:type_name -> google.protobuf.Timestamp - 174, // 154: temporal.server.api.historyservice.v1.ActivitySyncInfo.details:type_name -> temporal.api.common.v1.Payloads - 173, // 155: temporal.server.api.historyservice.v1.ActivitySyncInfo.last_failure:type_name -> temporal.api.failure.v1.Failure - 229, // 156: temporal.server.api.historyservice.v1.ActivitySyncInfo.version_history:type_name -> temporal.server.api.history.v1.VersionHistory - 171, // 157: temporal.server.api.historyservice.v1.ActivitySyncInfo.first_scheduled_time:type_name -> google.protobuf.Timestamp - 171, // 158: temporal.server.api.historyservice.v1.ActivitySyncInfo.last_attempt_complete_time:type_name -> google.protobuf.Timestamp - 175, // 159: temporal.server.api.historyservice.v1.ActivitySyncInfo.retry_initial_interval:type_name -> google.protobuf.Duration - 175, // 160: temporal.server.api.historyservice.v1.ActivitySyncInfo.retry_maximum_interval:type_name -> google.protobuf.Duration - 186, // 161: temporal.server.api.historyservice.v1.DescribeMutableStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 228, // 162: temporal.server.api.historyservice.v1.DescribeMutableStateResponse.cache_mutable_state:type_name -> temporal.server.api.persistence.v1.WorkflowMutableState - 228, // 163: temporal.server.api.historyservice.v1.DescribeMutableStateResponse.database_mutable_state:type_name -> temporal.server.api.persistence.v1.WorkflowMutableState - 186, // 164: temporal.server.api.historyservice.v1.DescribeHistoryHostRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution - 230, // 165: temporal.server.api.historyservice.v1.DescribeHistoryHostResponse.namespace_cache:type_name -> temporal.server.api.namespace.v1.NamespaceCacheInfo - 231, // 166: temporal.server.api.historyservice.v1.GetShardResponse.shard_info:type_name -> temporal.server.api.persistence.v1.ShardInfo - 171, // 167: temporal.server.api.historyservice.v1.RemoveTaskRequest.visibility_time:type_name -> google.protobuf.Timestamp - 232, // 168: temporal.server.api.historyservice.v1.GetReplicationMessagesRequest.tokens:type_name -> temporal.server.api.replication.v1.ReplicationToken - 164, // 169: temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.shard_messages:type_name -> temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.ShardMessagesEntry - 233, // 170: temporal.server.api.historyservice.v1.GetDLQReplicationMessagesRequest.task_infos:type_name -> temporal.server.api.replication.v1.ReplicationTaskInfo - 234, // 171: temporal.server.api.historyservice.v1.GetDLQReplicationMessagesResponse.replication_tasks:type_name -> temporal.server.api.replication.v1.ReplicationTask - 235, // 172: temporal.server.api.historyservice.v1.QueryWorkflowRequest.request:type_name -> temporal.api.workflowservice.v1.QueryWorkflowRequest - 236, // 173: temporal.server.api.historyservice.v1.QueryWorkflowResponse.response:type_name -> temporal.api.workflowservice.v1.QueryWorkflowResponse - 237, // 174: temporal.server.api.historyservice.v1.ReapplyEventsRequest.request:type_name -> temporal.server.api.adminservice.v1.ReapplyEventsRequest - 238, // 175: temporal.server.api.historyservice.v1.GetDLQMessagesRequest.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType - 238, // 176: temporal.server.api.historyservice.v1.GetDLQMessagesResponse.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType - 234, // 177: temporal.server.api.historyservice.v1.GetDLQMessagesResponse.replication_tasks:type_name -> temporal.server.api.replication.v1.ReplicationTask - 233, // 178: temporal.server.api.historyservice.v1.GetDLQMessagesResponse.replication_tasks_info:type_name -> temporal.server.api.replication.v1.ReplicationTaskInfo - 238, // 179: temporal.server.api.historyservice.v1.PurgeDLQMessagesRequest.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType - 238, // 180: temporal.server.api.historyservice.v1.MergeDLQMessagesRequest.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType - 239, // 181: temporal.server.api.historyservice.v1.RefreshWorkflowTasksRequest.request:type_name -> temporal.server.api.adminservice.v1.RefreshWorkflowTasksRequest - 186, // 182: temporal.server.api.historyservice.v1.GenerateLastHistoryReplicationTasksRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 173, // 151: temporal.server.api.historyservice.v1.ActivitySyncInfo.scheduled_time:type_name -> google.protobuf.Timestamp + 173, // 152: temporal.server.api.historyservice.v1.ActivitySyncInfo.started_time:type_name -> google.protobuf.Timestamp + 173, // 153: temporal.server.api.historyservice.v1.ActivitySyncInfo.last_heartbeat_time:type_name -> google.protobuf.Timestamp + 176, // 154: temporal.server.api.historyservice.v1.ActivitySyncInfo.details:type_name -> temporal.api.common.v1.Payloads + 175, // 155: temporal.server.api.historyservice.v1.ActivitySyncInfo.last_failure:type_name -> temporal.api.failure.v1.Failure + 231, // 156: temporal.server.api.historyservice.v1.ActivitySyncInfo.version_history:type_name -> temporal.server.api.history.v1.VersionHistory + 173, // 157: temporal.server.api.historyservice.v1.ActivitySyncInfo.first_scheduled_time:type_name -> google.protobuf.Timestamp + 173, // 158: temporal.server.api.historyservice.v1.ActivitySyncInfo.last_attempt_complete_time:type_name -> google.protobuf.Timestamp + 177, // 159: temporal.server.api.historyservice.v1.ActivitySyncInfo.retry_initial_interval:type_name -> google.protobuf.Duration + 177, // 160: temporal.server.api.historyservice.v1.ActivitySyncInfo.retry_maximum_interval:type_name -> google.protobuf.Duration + 188, // 161: temporal.server.api.historyservice.v1.DescribeMutableStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 230, // 162: temporal.server.api.historyservice.v1.DescribeMutableStateResponse.cache_mutable_state:type_name -> temporal.server.api.persistence.v1.WorkflowMutableState + 230, // 163: temporal.server.api.historyservice.v1.DescribeMutableStateResponse.database_mutable_state:type_name -> temporal.server.api.persistence.v1.WorkflowMutableState + 188, // 164: temporal.server.api.historyservice.v1.DescribeHistoryHostRequest.workflow_execution:type_name -> temporal.api.common.v1.WorkflowExecution + 232, // 165: temporal.server.api.historyservice.v1.DescribeHistoryHostResponse.namespace_cache:type_name -> temporal.server.api.namespace.v1.NamespaceCacheInfo + 233, // 166: temporal.server.api.historyservice.v1.GetShardResponse.shard_info:type_name -> temporal.server.api.persistence.v1.ShardInfo + 173, // 167: temporal.server.api.historyservice.v1.RemoveTaskRequest.visibility_time:type_name -> google.protobuf.Timestamp + 234, // 168: temporal.server.api.historyservice.v1.GetReplicationMessagesRequest.tokens:type_name -> temporal.server.api.replication.v1.ReplicationToken + 166, // 169: temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.shard_messages:type_name -> temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.ShardMessagesEntry + 235, // 170: temporal.server.api.historyservice.v1.GetDLQReplicationMessagesRequest.task_infos:type_name -> temporal.server.api.replication.v1.ReplicationTaskInfo + 236, // 171: temporal.server.api.historyservice.v1.GetDLQReplicationMessagesResponse.replication_tasks:type_name -> temporal.server.api.replication.v1.ReplicationTask + 237, // 172: temporal.server.api.historyservice.v1.QueryWorkflowRequest.request:type_name -> temporal.api.workflowservice.v1.QueryWorkflowRequest + 238, // 173: temporal.server.api.historyservice.v1.QueryWorkflowResponse.response:type_name -> temporal.api.workflowservice.v1.QueryWorkflowResponse + 239, // 174: temporal.server.api.historyservice.v1.ReapplyEventsRequest.request:type_name -> temporal.server.api.adminservice.v1.ReapplyEventsRequest + 240, // 175: temporal.server.api.historyservice.v1.GetDLQMessagesRequest.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType + 240, // 176: temporal.server.api.historyservice.v1.GetDLQMessagesResponse.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType + 236, // 177: temporal.server.api.historyservice.v1.GetDLQMessagesResponse.replication_tasks:type_name -> temporal.server.api.replication.v1.ReplicationTask + 235, // 178: temporal.server.api.historyservice.v1.GetDLQMessagesResponse.replication_tasks_info:type_name -> temporal.server.api.replication.v1.ReplicationTaskInfo + 240, // 179: temporal.server.api.historyservice.v1.PurgeDLQMessagesRequest.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType + 240, // 180: temporal.server.api.historyservice.v1.MergeDLQMessagesRequest.type:type_name -> temporal.server.api.enums.v1.DeadLetterQueueType + 241, // 181: temporal.server.api.historyservice.v1.RefreshWorkflowTasksRequest.request:type_name -> temporal.server.api.adminservice.v1.RefreshWorkflowTasksRequest + 188, // 182: temporal.server.api.historyservice.v1.GenerateLastHistoryReplicationTasksRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution 96, // 183: temporal.server.api.historyservice.v1.GetReplicationStatusResponse.shards:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatus - 171, // 184: temporal.server.api.historyservice.v1.ShardReplicationStatus.shard_local_time:type_name -> google.protobuf.Timestamp - 165, // 185: temporal.server.api.historyservice.v1.ShardReplicationStatus.remote_clusters:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatus.RemoteClustersEntry - 166, // 186: temporal.server.api.historyservice.v1.ShardReplicationStatus.handover_namespaces:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatus.HandoverNamespacesEntry - 171, // 187: temporal.server.api.historyservice.v1.ShardReplicationStatus.max_replication_task_visibility_time:type_name -> google.protobuf.Timestamp - 171, // 188: temporal.server.api.historyservice.v1.ShardReplicationStatusPerCluster.acked_task_visibility_time:type_name -> google.protobuf.Timestamp - 186, // 189: temporal.server.api.historyservice.v1.RebuildMutableStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 186, // 190: temporal.server.api.historyservice.v1.ImportWorkflowExecutionRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 226, // 191: temporal.server.api.historyservice.v1.ImportWorkflowExecutionRequest.history_batches:type_name -> temporal.api.common.v1.DataBlob - 229, // 192: temporal.server.api.historyservice.v1.ImportWorkflowExecutionRequest.version_history:type_name -> temporal.server.api.history.v1.VersionHistory - 186, // 193: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 171, // 194: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest.workflow_start_time:type_name -> google.protobuf.Timestamp - 171, // 195: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest.workflow_close_time:type_name -> google.protobuf.Timestamp - 240, // 196: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionRequest.request:type_name -> temporal.api.workflowservice.v1.UpdateWorkflowExecutionRequest - 241, // 197: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse.response:type_name -> temporal.api.workflowservice.v1.UpdateWorkflowExecutionResponse - 242, // 198: temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesRequest.sync_replication_state:type_name -> temporal.server.api.replication.v1.SyncReplicationState - 243, // 199: temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesResponse.messages:type_name -> temporal.server.api.replication.v1.WorkflowReplicationMessages - 244, // 200: temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateRequest.request:type_name -> temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateRequest - 245, // 201: temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateResponse.response:type_name -> temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateResponse - 246, // 202: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryRequest.request:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryRequest - 247, // 203: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponse.response:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryResponse - 200, // 204: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponse.history:type_name -> temporal.api.history.v1.History - 247, // 205: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponseWithRaw.response:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryResponse - 248, // 206: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseRequest.request:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryReverseRequest - 249, // 207: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseResponse.response:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryReverseResponse - 250, // 208: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Request.request:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryV2Request - 251, // 209: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Response.response:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryV2Response - 252, // 210: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryRequest.request:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryRequest - 253, // 211: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryResponse.response:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryResponse - 254, // 212: temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionRequest.request:type_name -> temporal.server.api.adminservice.v1.DeleteWorkflowExecutionRequest - 255, // 213: temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionResponse.response:type_name -> temporal.server.api.adminservice.v1.DeleteWorkflowExecutionResponse - 256, // 214: temporal.server.api.historyservice.v1.GetDLQTasksRequest.dlq_key:type_name -> temporal.server.api.common.v1.HistoryDLQKey - 257, // 215: temporal.server.api.historyservice.v1.GetDLQTasksResponse.dlq_tasks:type_name -> temporal.server.api.common.v1.HistoryDLQTask - 256, // 216: temporal.server.api.historyservice.v1.DeleteDLQTasksRequest.dlq_key:type_name -> temporal.server.api.common.v1.HistoryDLQKey - 258, // 217: temporal.server.api.historyservice.v1.DeleteDLQTasksRequest.inclusive_max_task_metadata:type_name -> temporal.server.api.common.v1.HistoryDLQTaskMetadata - 167, // 218: temporal.server.api.historyservice.v1.ListQueuesResponse.queues:type_name -> temporal.server.api.historyservice.v1.ListQueuesResponse.QueueInfo - 168, // 219: temporal.server.api.historyservice.v1.AddTasksRequest.tasks:type_name -> temporal.server.api.historyservice.v1.AddTasksRequest.Task - 259, // 220: temporal.server.api.historyservice.v1.ListTasksRequest.request:type_name -> temporal.server.api.adminservice.v1.ListHistoryTasksRequest - 260, // 221: temporal.server.api.historyservice.v1.ListTasksResponse.response:type_name -> temporal.server.api.adminservice.v1.ListHistoryTasksResponse - 261, // 222: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.completion:type_name -> temporal.server.api.token.v1.NexusOperationCompletion - 262, // 223: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.success:type_name -> temporal.api.common.v1.Payload - 173, // 224: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.failure:type_name -> temporal.api.failure.v1.Failure - 171, // 225: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.close_time:type_name -> google.protobuf.Timestamp - 185, // 226: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.links:type_name -> temporal.api.common.v1.Link - 171, // 227: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.start_time:type_name -> google.protobuf.Timestamp - 261, // 228: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.completion:type_name -> temporal.server.api.token.v1.NexusOperationCompletion - 262, // 229: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.success:type_name -> temporal.api.common.v1.Payload - 263, // 230: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.failure:type_name -> temporal.api.nexus.v1.Failure - 171, // 231: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.start_time:type_name -> google.protobuf.Timestamp - 185, // 232: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.links:type_name -> temporal.api.common.v1.Link - 264, // 233: temporal.server.api.historyservice.v1.InvokeStateMachineMethodRequest.ref:type_name -> temporal.server.api.persistence.v1.StateMachineRef - 265, // 234: temporal.server.api.historyservice.v1.DeepHealthCheckResponse.state:type_name -> temporal.server.api.enums.v1.HealthState - 266, // 235: temporal.server.api.historyservice.v1.DeepHealthCheckResponse.checks:type_name -> temporal.server.api.health.v1.HealthCheck - 186, // 236: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution - 188, // 237: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest.versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition - 192, // 238: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest.version_histories:type_name -> temporal.server.api.history.v1.VersionHistories - 267, // 239: temporal.server.api.historyservice.v1.SyncWorkflowStateResponse.versioned_transition_artifact:type_name -> temporal.server.api.replication.v1.VersionedTransitionArtifact - 268, // 240: temporal.server.api.historyservice.v1.UpdateActivityOptionsRequest.update_request:type_name -> temporal.api.workflowservice.v1.UpdateActivityOptionsRequest - 269, // 241: temporal.server.api.historyservice.v1.UpdateActivityOptionsResponse.activity_options:type_name -> temporal.api.activity.v1.ActivityOptions - 270, // 242: temporal.server.api.historyservice.v1.PauseActivityRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.PauseActivityRequest - 271, // 243: temporal.server.api.historyservice.v1.UnpauseActivityRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.UnpauseActivityRequest - 272, // 244: temporal.server.api.historyservice.v1.ResetActivityRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.ResetActivityRequest - 273, // 245: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsRequest.update_request:type_name -> temporal.api.workflowservice.v1.UpdateWorkflowExecutionOptionsRequest - 274, // 246: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsResponse.workflow_execution_options:type_name -> temporal.api.workflow.v1.WorkflowExecutionOptions - 275, // 247: temporal.server.api.historyservice.v1.PauseWorkflowExecutionRequest.pause_request:type_name -> temporal.api.workflowservice.v1.PauseWorkflowExecutionRequest - 276, // 248: temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionRequest.unpause_request:type_name -> temporal.api.workflowservice.v1.UnpauseWorkflowExecutionRequest - 277, // 249: temporal.server.api.historyservice.v1.StartNexusOperationRequest.request:type_name -> temporal.api.nexus.v1.StartOperationRequest - 278, // 250: temporal.server.api.historyservice.v1.StartNexusOperationResponse.response:type_name -> temporal.api.nexus.v1.StartOperationResponse - 279, // 251: temporal.server.api.historyservice.v1.CancelNexusOperationRequest.request:type_name -> temporal.api.nexus.v1.CancelOperationRequest - 280, // 252: temporal.server.api.historyservice.v1.CancelNexusOperationResponse.response:type_name -> temporal.api.nexus.v1.CancelOperationResponse - 1, // 253: temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.Operation.start_workflow:type_name -> temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest - 105, // 254: temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.Operation.update_workflow:type_name -> temporal.server.api.historyservice.v1.UpdateWorkflowExecutionRequest - 2, // 255: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.Response.start_workflow:type_name -> temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse - 106, // 256: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.Response.update_workflow:type_name -> temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse - 281, // 257: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.QueriesEntry.value:type_name -> temporal.api.query.v1.WorkflowQuery - 281, // 258: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.QueriesEntry.value:type_name -> temporal.api.query.v1.WorkflowQuery - 282, // 259: temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.ShardMessagesEntry.value:type_name -> temporal.server.api.replication.v1.ReplicationMessages - 98, // 260: temporal.server.api.historyservice.v1.ShardReplicationStatus.RemoteClustersEntry.value:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatusPerCluster - 97, // 261: temporal.server.api.historyservice.v1.ShardReplicationStatus.HandoverNamespacesEntry.value:type_name -> temporal.server.api.historyservice.v1.HandoverNamespaceInfo - 226, // 262: temporal.server.api.historyservice.v1.AddTasksRequest.Task.blob:type_name -> temporal.api.common.v1.DataBlob - 283, // 263: temporal.server.api.historyservice.v1.routing:extendee -> google.protobuf.MessageOptions - 0, // 264: temporal.server.api.historyservice.v1.routing:type_name -> temporal.server.api.historyservice.v1.RoutingOptions - 265, // [265:265] is the sub-list for method output_type - 265, // [265:265] is the sub-list for method input_type - 264, // [264:265] is the sub-list for extension type_name - 263, // [263:264] is the sub-list for extension extendee - 0, // [0:263] is the sub-list for field type_name + 173, // 184: temporal.server.api.historyservice.v1.ShardReplicationStatus.shard_local_time:type_name -> google.protobuf.Timestamp + 167, // 185: temporal.server.api.historyservice.v1.ShardReplicationStatus.remote_clusters:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatus.RemoteClustersEntry + 168, // 186: temporal.server.api.historyservice.v1.ShardReplicationStatus.handover_namespaces:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatus.HandoverNamespacesEntry + 173, // 187: temporal.server.api.historyservice.v1.ShardReplicationStatus.max_replication_task_visibility_time:type_name -> google.protobuf.Timestamp + 173, // 188: temporal.server.api.historyservice.v1.ShardReplicationStatusPerCluster.acked_task_visibility_time:type_name -> google.protobuf.Timestamp + 188, // 189: temporal.server.api.historyservice.v1.RebuildMutableStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 188, // 190: temporal.server.api.historyservice.v1.ImportWorkflowExecutionRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 228, // 191: temporal.server.api.historyservice.v1.ImportWorkflowExecutionRequest.history_batches:type_name -> temporal.api.common.v1.DataBlob + 231, // 192: temporal.server.api.historyservice.v1.ImportWorkflowExecutionRequest.version_history:type_name -> temporal.server.api.history.v1.VersionHistory + 188, // 193: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 173, // 194: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest.workflow_start_time:type_name -> google.protobuf.Timestamp + 173, // 195: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordRequest.workflow_close_time:type_name -> google.protobuf.Timestamp + 242, // 196: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionRequest.request:type_name -> temporal.api.workflowservice.v1.UpdateWorkflowExecutionRequest + 243, // 197: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse.response:type_name -> temporal.api.workflowservice.v1.UpdateWorkflowExecutionResponse + 244, // 198: temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesRequest.sync_replication_state:type_name -> temporal.server.api.replication.v1.SyncReplicationState + 245, // 199: temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesResponse.messages:type_name -> temporal.server.api.replication.v1.WorkflowReplicationMessages + 246, // 200: temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateRequest.request:type_name -> temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateRequest + 247, // 201: temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateResponse.response:type_name -> temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateResponse + 248, // 202: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryRequest.request:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryRequest + 249, // 203: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponse.response:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryResponse + 202, // 204: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponse.history:type_name -> temporal.api.history.v1.History + 249, // 205: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponseWithRaw.response:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryResponse + 250, // 206: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseRequest.request:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryReverseRequest + 251, // 207: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseResponse.response:type_name -> temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryReverseResponse + 252, // 208: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Request.request:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryV2Request + 253, // 209: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Response.response:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryV2Response + 254, // 210: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryRequest.request:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryRequest + 255, // 211: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryResponse.response:type_name -> temporal.server.api.adminservice.v1.GetWorkflowExecutionRawHistoryResponse + 256, // 212: temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionRequest.request:type_name -> temporal.server.api.adminservice.v1.DeleteWorkflowExecutionRequest + 257, // 213: temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionResponse.response:type_name -> temporal.server.api.adminservice.v1.DeleteWorkflowExecutionResponse + 188, // 214: temporal.server.api.historyservice.v1.DeleteExecutionRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 258, // 215: temporal.server.api.historyservice.v1.GetDLQTasksRequest.dlq_key:type_name -> temporal.server.api.common.v1.HistoryDLQKey + 259, // 216: temporal.server.api.historyservice.v1.GetDLQTasksResponse.dlq_tasks:type_name -> temporal.server.api.common.v1.HistoryDLQTask + 258, // 217: temporal.server.api.historyservice.v1.DeleteDLQTasksRequest.dlq_key:type_name -> temporal.server.api.common.v1.HistoryDLQKey + 260, // 218: temporal.server.api.historyservice.v1.DeleteDLQTasksRequest.inclusive_max_task_metadata:type_name -> temporal.server.api.common.v1.HistoryDLQTaskMetadata + 169, // 219: temporal.server.api.historyservice.v1.ListQueuesResponse.queues:type_name -> temporal.server.api.historyservice.v1.ListQueuesResponse.QueueInfo + 170, // 220: temporal.server.api.historyservice.v1.AddTasksRequest.tasks:type_name -> temporal.server.api.historyservice.v1.AddTasksRequest.Task + 261, // 221: temporal.server.api.historyservice.v1.ListTasksRequest.request:type_name -> temporal.server.api.adminservice.v1.ListHistoryTasksRequest + 262, // 222: temporal.server.api.historyservice.v1.ListTasksResponse.response:type_name -> temporal.server.api.adminservice.v1.ListHistoryTasksResponse + 263, // 223: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.completion:type_name -> temporal.server.api.token.v1.NexusOperationCompletion + 264, // 224: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.success:type_name -> temporal.api.common.v1.Payload + 175, // 225: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.failure:type_name -> temporal.api.failure.v1.Failure + 173, // 226: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.close_time:type_name -> google.protobuf.Timestamp + 187, // 227: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.links:type_name -> temporal.api.common.v1.Link + 173, // 228: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest.start_time:type_name -> google.protobuf.Timestamp + 263, // 229: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.completion:type_name -> temporal.server.api.token.v1.NexusOperationCompletion + 264, // 230: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.success:type_name -> temporal.api.common.v1.Payload + 265, // 231: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.failure:type_name -> temporal.api.nexus.v1.Failure + 173, // 232: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.start_time:type_name -> google.protobuf.Timestamp + 187, // 233: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest.links:type_name -> temporal.api.common.v1.Link + 266, // 234: temporal.server.api.historyservice.v1.InvokeStateMachineMethodRequest.ref:type_name -> temporal.server.api.persistence.v1.StateMachineRef + 267, // 235: temporal.server.api.historyservice.v1.DeepHealthCheckResponse.state:type_name -> temporal.server.api.enums.v1.HealthState + 268, // 236: temporal.server.api.historyservice.v1.DeepHealthCheckResponse.checks:type_name -> temporal.server.api.health.v1.HealthCheck + 188, // 237: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest.execution:type_name -> temporal.api.common.v1.WorkflowExecution + 190, // 238: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest.versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition + 194, // 239: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest.version_histories:type_name -> temporal.server.api.history.v1.VersionHistories + 269, // 240: temporal.server.api.historyservice.v1.SyncWorkflowStateResponse.versioned_transition_artifact:type_name -> temporal.server.api.replication.v1.VersionedTransitionArtifact + 270, // 241: temporal.server.api.historyservice.v1.UpdateActivityOptionsRequest.update_request:type_name -> temporal.api.workflowservice.v1.UpdateActivityOptionsRequest + 271, // 242: temporal.server.api.historyservice.v1.UpdateActivityOptionsResponse.activity_options:type_name -> temporal.api.activity.v1.ActivityOptions + 272, // 243: temporal.server.api.historyservice.v1.PauseActivityRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.PauseActivityRequest + 273, // 244: temporal.server.api.historyservice.v1.UnpauseActivityRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.UnpauseActivityRequest + 274, // 245: temporal.server.api.historyservice.v1.ResetActivityRequest.frontend_request:type_name -> temporal.api.workflowservice.v1.ResetActivityRequest + 275, // 246: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsRequest.update_request:type_name -> temporal.api.workflowservice.v1.UpdateWorkflowExecutionOptionsRequest + 276, // 247: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsResponse.workflow_execution_options:type_name -> temporal.api.workflow.v1.WorkflowExecutionOptions + 277, // 248: temporal.server.api.historyservice.v1.PauseWorkflowExecutionRequest.pause_request:type_name -> temporal.api.workflowservice.v1.PauseWorkflowExecutionRequest + 278, // 249: temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionRequest.unpause_request:type_name -> temporal.api.workflowservice.v1.UnpauseWorkflowExecutionRequest + 279, // 250: temporal.server.api.historyservice.v1.StartNexusOperationRequest.request:type_name -> temporal.api.nexus.v1.StartOperationRequest + 280, // 251: temporal.server.api.historyservice.v1.StartNexusOperationResponse.response:type_name -> temporal.api.nexus.v1.StartOperationResponse + 281, // 252: temporal.server.api.historyservice.v1.CancelNexusOperationRequest.request:type_name -> temporal.api.nexus.v1.CancelOperationRequest + 282, // 253: temporal.server.api.historyservice.v1.CancelNexusOperationResponse.response:type_name -> temporal.api.nexus.v1.CancelOperationResponse + 1, // 254: temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.Operation.start_workflow:type_name -> temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest + 105, // 255: temporal.server.api.historyservice.v1.ExecuteMultiOperationRequest.Operation.update_workflow:type_name -> temporal.server.api.historyservice.v1.UpdateWorkflowExecutionRequest + 2, // 256: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.Response.start_workflow:type_name -> temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse + 106, // 257: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse.Response.update_workflow:type_name -> temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse + 283, // 258: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse.QueriesEntry.value:type_name -> temporal.api.query.v1.WorkflowQuery + 283, // 259: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponseWithRawHistory.QueriesEntry.value:type_name -> temporal.api.query.v1.WorkflowQuery + 284, // 260: temporal.server.api.historyservice.v1.GetReplicationMessagesResponse.ShardMessagesEntry.value:type_name -> temporal.server.api.replication.v1.ReplicationMessages + 98, // 261: temporal.server.api.historyservice.v1.ShardReplicationStatus.RemoteClustersEntry.value:type_name -> temporal.server.api.historyservice.v1.ShardReplicationStatusPerCluster + 97, // 262: temporal.server.api.historyservice.v1.ShardReplicationStatus.HandoverNamespacesEntry.value:type_name -> temporal.server.api.historyservice.v1.HandoverNamespaceInfo + 228, // 263: temporal.server.api.historyservice.v1.AddTasksRequest.Task.blob:type_name -> temporal.api.common.v1.DataBlob + 285, // 264: temporal.server.api.historyservice.v1.routing:extendee -> google.protobuf.MessageOptions + 0, // 265: temporal.server.api.historyservice.v1.routing:type_name -> temporal.server.api.historyservice.v1.RoutingOptions + 266, // [266:266] is the sub-list for method output_type + 266, // [266:266] is the sub-list for method input_type + 265, // [265:266] is the sub-list for extension type_name + 264, // [264:265] is the sub-list for extension extendee + 0, // [0:264] is the sub-list for field type_name } func init() { file_temporal_server_api_historyservice_v1_request_response_proto_init() } @@ -11808,19 +11931,19 @@ func file_temporal_server_api_historyservice_v1_request_response_proto_init() { file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[108].OneofWrappers = []any{ (*StreamWorkflowReplicationMessagesResponse_Messages)(nil), } - file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[132].OneofWrappers = []any{ + file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[134].OneofWrappers = []any{ (*CompleteNexusOperationChasmRequest_Success)(nil), (*CompleteNexusOperationChasmRequest_Failure)(nil), } - file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[134].OneofWrappers = []any{ + file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[136].OneofWrappers = []any{ (*CompleteNexusOperationRequest_Success)(nil), (*CompleteNexusOperationRequest_Failure)(nil), } - file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[160].OneofWrappers = []any{ + file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[162].OneofWrappers = []any{ (*ExecuteMultiOperationRequest_Operation_StartWorkflow)(nil), (*ExecuteMultiOperationRequest_Operation_UpdateWorkflow)(nil), } - file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[161].OneofWrappers = []any{ + file_temporal_server_api_historyservice_v1_request_response_proto_msgTypes[163].OneofWrappers = []any{ (*ExecuteMultiOperationResponse_Response_StartWorkflow)(nil), (*ExecuteMultiOperationResponse_Response_UpdateWorkflow)(nil), } @@ -11830,7 +11953,7 @@ func file_temporal_server_api_historyservice_v1_request_response_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_temporal_server_api_historyservice_v1_request_response_proto_rawDesc), len(file_temporal_server_api_historyservice_v1_request_response_proto_rawDesc)), NumEnums: 0, - NumMessages: 169, + NumMessages: 171, NumExtensions: 1, NumServices: 0, }, diff --git a/api/historyservice/v1/service.pb.go b/api/historyservice/v1/service.pb.go index 634ddb51a15..ece13b2ef31 100644 --- a/api/historyservice/v1/service.pb.go +++ b/api/historyservice/v1/service.pb.go @@ -26,7 +26,7 @@ var File_temporal_server_api_historyservice_v1_service_proto protoreflect.FileDe const file_temporal_server_api_historyservice_v1_service_proto_rawDesc = "" + "\n" + - "3temporal/server/api/historyservice/v1/service.proto\x12%temporal.server.api.historyservice.v1\x1a0temporal/server/api/common/v1/api_category.proto\x1a.temporal.server.api.historyservice.v1.GetMutableStateResponse\"\x06\x8a\xb5\x18\x02\b\x01\x12\x9b\x01\n" + @@ -87,7 +87,8 @@ const file_temporal_server_api_historyservice_v1_service_proto_rawDesc = "" + "\"GetWorkflowExecutionHistoryReverse\x12P.temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseRequest\x1aQ.temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseResponse\"\x06\x8a\xb5\x18\x02\b\x01\x12\xcb\x01\n" + " GetWorkflowExecutionRawHistoryV2\x12N.temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Request\x1aO.temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Response\"\x06\x8a\xb5\x18\x02\b\x01\x12\xc5\x01\n" + "\x1eGetWorkflowExecutionRawHistory\x12L.temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryRequest\x1aM.temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryResponse\"\x06\x8a\xb5\x18\x02\b\x01\x12\xbf\x01\n" + - "\x1cForceDeleteWorkflowExecution\x12J.temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionRequest\x1aK.temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionResponse\"\x06\x8a\xb5\x18\x02\b\x01\x12\x8c\x01\n" + + "\x1cForceDeleteWorkflowExecution\x12J.temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionRequest\x1aK.temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionResponse\"\x06\x8a\xb5\x18\x02\b\x01\x12\x98\x01\n" + + "\x0fDeleteExecution\x12=.temporal.server.api.historyservice.v1.DeleteExecutionRequest\x1a>.temporal.server.api.historyservice.v1.DeleteExecutionResponse\"\x06\x8a\xb5\x18\x02\b\x01\x12\x8c\x01\n" + "\vGetDLQTasks\x129.temporal.server.api.historyservice.v1.GetDLQTasksRequest\x1a:.temporal.server.api.historyservice.v1.GetDLQTasksResponse\"\x06\x8a\xb5\x18\x02\b\x03\x12\x95\x01\n" + "\x0eDeleteDLQTasks\x12<.temporal.server.api.historyservice.v1.DeleteDLQTasksRequest\x1a=.temporal.server.api.historyservice.v1.DeleteDLQTasksResponse\"\x06\x8a\xb5\x18\x02\b\x03\x12\x89\x01\n" + "\n" + @@ -167,100 +168,102 @@ var file_temporal_server_api_historyservice_v1_service_proto_goTypes = []any{ (*GetWorkflowExecutionRawHistoryV2Request)(nil), // 55: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Request (*GetWorkflowExecutionRawHistoryRequest)(nil), // 56: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryRequest (*ForceDeleteWorkflowExecutionRequest)(nil), // 57: temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionRequest - (*GetDLQTasksRequest)(nil), // 58: temporal.server.api.historyservice.v1.GetDLQTasksRequest - (*DeleteDLQTasksRequest)(nil), // 59: temporal.server.api.historyservice.v1.DeleteDLQTasksRequest - (*ListQueuesRequest)(nil), // 60: temporal.server.api.historyservice.v1.ListQueuesRequest - (*AddTasksRequest)(nil), // 61: temporal.server.api.historyservice.v1.AddTasksRequest - (*ListTasksRequest)(nil), // 62: temporal.server.api.historyservice.v1.ListTasksRequest - (*CompleteNexusOperationRequest)(nil), // 63: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest - (*CompleteNexusOperationChasmRequest)(nil), // 64: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest - (*InvokeStateMachineMethodRequest)(nil), // 65: temporal.server.api.historyservice.v1.InvokeStateMachineMethodRequest - (*DeepHealthCheckRequest)(nil), // 66: temporal.server.api.historyservice.v1.DeepHealthCheckRequest - (*SyncWorkflowStateRequest)(nil), // 67: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest - (*UpdateActivityOptionsRequest)(nil), // 68: temporal.server.api.historyservice.v1.UpdateActivityOptionsRequest - (*PauseActivityRequest)(nil), // 69: temporal.server.api.historyservice.v1.PauseActivityRequest - (*UnpauseActivityRequest)(nil), // 70: temporal.server.api.historyservice.v1.UnpauseActivityRequest - (*ResetActivityRequest)(nil), // 71: temporal.server.api.historyservice.v1.ResetActivityRequest - (*PauseWorkflowExecutionRequest)(nil), // 72: temporal.server.api.historyservice.v1.PauseWorkflowExecutionRequest - (*UnpauseWorkflowExecutionRequest)(nil), // 73: temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionRequest - (*StartNexusOperationRequest)(nil), // 74: temporal.server.api.historyservice.v1.StartNexusOperationRequest - (*CancelNexusOperationRequest)(nil), // 75: temporal.server.api.historyservice.v1.CancelNexusOperationRequest - (*StartWorkflowExecutionResponse)(nil), // 76: temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse - (*GetMutableStateResponse)(nil), // 77: temporal.server.api.historyservice.v1.GetMutableStateResponse - (*PollMutableStateResponse)(nil), // 78: temporal.server.api.historyservice.v1.PollMutableStateResponse - (*ResetStickyTaskQueueResponse)(nil), // 79: temporal.server.api.historyservice.v1.ResetStickyTaskQueueResponse - (*RecordWorkflowTaskStartedResponse)(nil), // 80: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse - (*RecordActivityTaskStartedResponse)(nil), // 81: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse - (*RespondWorkflowTaskCompletedResponse)(nil), // 82: temporal.server.api.historyservice.v1.RespondWorkflowTaskCompletedResponse - (*RespondWorkflowTaskFailedResponse)(nil), // 83: temporal.server.api.historyservice.v1.RespondWorkflowTaskFailedResponse - (*IsWorkflowTaskValidResponse)(nil), // 84: temporal.server.api.historyservice.v1.IsWorkflowTaskValidResponse - (*RecordActivityTaskHeartbeatResponse)(nil), // 85: temporal.server.api.historyservice.v1.RecordActivityTaskHeartbeatResponse - (*RespondActivityTaskCompletedResponse)(nil), // 86: temporal.server.api.historyservice.v1.RespondActivityTaskCompletedResponse - (*RespondActivityTaskFailedResponse)(nil), // 87: temporal.server.api.historyservice.v1.RespondActivityTaskFailedResponse - (*RespondActivityTaskCanceledResponse)(nil), // 88: temporal.server.api.historyservice.v1.RespondActivityTaskCanceledResponse - (*IsActivityTaskValidResponse)(nil), // 89: temporal.server.api.historyservice.v1.IsActivityTaskValidResponse - (*SignalWorkflowExecutionResponse)(nil), // 90: temporal.server.api.historyservice.v1.SignalWorkflowExecutionResponse - (*SignalWithStartWorkflowExecutionResponse)(nil), // 91: temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionResponse - (*ExecuteMultiOperationResponse)(nil), // 92: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse - (*RemoveSignalMutableStateResponse)(nil), // 93: temporal.server.api.historyservice.v1.RemoveSignalMutableStateResponse - (*TerminateWorkflowExecutionResponse)(nil), // 94: temporal.server.api.historyservice.v1.TerminateWorkflowExecutionResponse - (*DeleteWorkflowExecutionResponse)(nil), // 95: temporal.server.api.historyservice.v1.DeleteWorkflowExecutionResponse - (*ResetWorkflowExecutionResponse)(nil), // 96: temporal.server.api.historyservice.v1.ResetWorkflowExecutionResponse - (*UpdateWorkflowExecutionOptionsResponse)(nil), // 97: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsResponse - (*RequestCancelWorkflowExecutionResponse)(nil), // 98: temporal.server.api.historyservice.v1.RequestCancelWorkflowExecutionResponse - (*ScheduleWorkflowTaskResponse)(nil), // 99: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskResponse - (*VerifyFirstWorkflowTaskScheduledResponse)(nil), // 100: temporal.server.api.historyservice.v1.VerifyFirstWorkflowTaskScheduledResponse - (*RecordChildExecutionCompletedResponse)(nil), // 101: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedResponse - (*VerifyChildExecutionCompletionRecordedResponse)(nil), // 102: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedResponse - (*DescribeWorkflowExecutionResponse)(nil), // 103: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse - (*ReplicateEventsV2Response)(nil), // 104: temporal.server.api.historyservice.v1.ReplicateEventsV2Response - (*ReplicateWorkflowStateResponse)(nil), // 105: temporal.server.api.historyservice.v1.ReplicateWorkflowStateResponse - (*SyncShardStatusResponse)(nil), // 106: temporal.server.api.historyservice.v1.SyncShardStatusResponse - (*SyncActivityResponse)(nil), // 107: temporal.server.api.historyservice.v1.SyncActivityResponse - (*DescribeMutableStateResponse)(nil), // 108: temporal.server.api.historyservice.v1.DescribeMutableStateResponse - (*DescribeHistoryHostResponse)(nil), // 109: temporal.server.api.historyservice.v1.DescribeHistoryHostResponse - (*CloseShardResponse)(nil), // 110: temporal.server.api.historyservice.v1.CloseShardResponse - (*GetShardResponse)(nil), // 111: temporal.server.api.historyservice.v1.GetShardResponse - (*RemoveTaskResponse)(nil), // 112: temporal.server.api.historyservice.v1.RemoveTaskResponse - (*GetReplicationMessagesResponse)(nil), // 113: temporal.server.api.historyservice.v1.GetReplicationMessagesResponse - (*GetDLQReplicationMessagesResponse)(nil), // 114: temporal.server.api.historyservice.v1.GetDLQReplicationMessagesResponse - (*QueryWorkflowResponse)(nil), // 115: temporal.server.api.historyservice.v1.QueryWorkflowResponse - (*ReapplyEventsResponse)(nil), // 116: temporal.server.api.historyservice.v1.ReapplyEventsResponse - (*GetDLQMessagesResponse)(nil), // 117: temporal.server.api.historyservice.v1.GetDLQMessagesResponse - (*PurgeDLQMessagesResponse)(nil), // 118: temporal.server.api.historyservice.v1.PurgeDLQMessagesResponse - (*MergeDLQMessagesResponse)(nil), // 119: temporal.server.api.historyservice.v1.MergeDLQMessagesResponse - (*RefreshWorkflowTasksResponse)(nil), // 120: temporal.server.api.historyservice.v1.RefreshWorkflowTasksResponse - (*GenerateLastHistoryReplicationTasksResponse)(nil), // 121: temporal.server.api.historyservice.v1.GenerateLastHistoryReplicationTasksResponse - (*GetReplicationStatusResponse)(nil), // 122: temporal.server.api.historyservice.v1.GetReplicationStatusResponse - (*RebuildMutableStateResponse)(nil), // 123: temporal.server.api.historyservice.v1.RebuildMutableStateResponse - (*ImportWorkflowExecutionResponse)(nil), // 124: temporal.server.api.historyservice.v1.ImportWorkflowExecutionResponse - (*DeleteWorkflowVisibilityRecordResponse)(nil), // 125: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordResponse - (*UpdateWorkflowExecutionResponse)(nil), // 126: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse - (*PollWorkflowExecutionUpdateResponse)(nil), // 127: temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateResponse - (*StreamWorkflowReplicationMessagesResponse)(nil), // 128: temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesResponse - (*GetWorkflowExecutionHistoryResponse)(nil), // 129: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponse - (*GetWorkflowExecutionHistoryReverseResponse)(nil), // 130: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseResponse - (*GetWorkflowExecutionRawHistoryV2Response)(nil), // 131: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Response - (*GetWorkflowExecutionRawHistoryResponse)(nil), // 132: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryResponse - (*ForceDeleteWorkflowExecutionResponse)(nil), // 133: temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionResponse - (*GetDLQTasksResponse)(nil), // 134: temporal.server.api.historyservice.v1.GetDLQTasksResponse - (*DeleteDLQTasksResponse)(nil), // 135: temporal.server.api.historyservice.v1.DeleteDLQTasksResponse - (*ListQueuesResponse)(nil), // 136: temporal.server.api.historyservice.v1.ListQueuesResponse - (*AddTasksResponse)(nil), // 137: temporal.server.api.historyservice.v1.AddTasksResponse - (*ListTasksResponse)(nil), // 138: temporal.server.api.historyservice.v1.ListTasksResponse - (*CompleteNexusOperationResponse)(nil), // 139: temporal.server.api.historyservice.v1.CompleteNexusOperationResponse - (*CompleteNexusOperationChasmResponse)(nil), // 140: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmResponse - (*InvokeStateMachineMethodResponse)(nil), // 141: temporal.server.api.historyservice.v1.InvokeStateMachineMethodResponse - (*DeepHealthCheckResponse)(nil), // 142: temporal.server.api.historyservice.v1.DeepHealthCheckResponse - (*SyncWorkflowStateResponse)(nil), // 143: temporal.server.api.historyservice.v1.SyncWorkflowStateResponse - (*UpdateActivityOptionsResponse)(nil), // 144: temporal.server.api.historyservice.v1.UpdateActivityOptionsResponse - (*PauseActivityResponse)(nil), // 145: temporal.server.api.historyservice.v1.PauseActivityResponse - (*UnpauseActivityResponse)(nil), // 146: temporal.server.api.historyservice.v1.UnpauseActivityResponse - (*ResetActivityResponse)(nil), // 147: temporal.server.api.historyservice.v1.ResetActivityResponse - (*PauseWorkflowExecutionResponse)(nil), // 148: temporal.server.api.historyservice.v1.PauseWorkflowExecutionResponse - (*UnpauseWorkflowExecutionResponse)(nil), // 149: temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionResponse - (*StartNexusOperationResponse)(nil), // 150: temporal.server.api.historyservice.v1.StartNexusOperationResponse - (*CancelNexusOperationResponse)(nil), // 151: temporal.server.api.historyservice.v1.CancelNexusOperationResponse + (*DeleteExecutionRequest)(nil), // 58: temporal.server.api.historyservice.v1.DeleteExecutionRequest + (*GetDLQTasksRequest)(nil), // 59: temporal.server.api.historyservice.v1.GetDLQTasksRequest + (*DeleteDLQTasksRequest)(nil), // 60: temporal.server.api.historyservice.v1.DeleteDLQTasksRequest + (*ListQueuesRequest)(nil), // 61: temporal.server.api.historyservice.v1.ListQueuesRequest + (*AddTasksRequest)(nil), // 62: temporal.server.api.historyservice.v1.AddTasksRequest + (*ListTasksRequest)(nil), // 63: temporal.server.api.historyservice.v1.ListTasksRequest + (*CompleteNexusOperationRequest)(nil), // 64: temporal.server.api.historyservice.v1.CompleteNexusOperationRequest + (*CompleteNexusOperationChasmRequest)(nil), // 65: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest + (*InvokeStateMachineMethodRequest)(nil), // 66: temporal.server.api.historyservice.v1.InvokeStateMachineMethodRequest + (*DeepHealthCheckRequest)(nil), // 67: temporal.server.api.historyservice.v1.DeepHealthCheckRequest + (*SyncWorkflowStateRequest)(nil), // 68: temporal.server.api.historyservice.v1.SyncWorkflowStateRequest + (*UpdateActivityOptionsRequest)(nil), // 69: temporal.server.api.historyservice.v1.UpdateActivityOptionsRequest + (*PauseActivityRequest)(nil), // 70: temporal.server.api.historyservice.v1.PauseActivityRequest + (*UnpauseActivityRequest)(nil), // 71: temporal.server.api.historyservice.v1.UnpauseActivityRequest + (*ResetActivityRequest)(nil), // 72: temporal.server.api.historyservice.v1.ResetActivityRequest + (*PauseWorkflowExecutionRequest)(nil), // 73: temporal.server.api.historyservice.v1.PauseWorkflowExecutionRequest + (*UnpauseWorkflowExecutionRequest)(nil), // 74: temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionRequest + (*StartNexusOperationRequest)(nil), // 75: temporal.server.api.historyservice.v1.StartNexusOperationRequest + (*CancelNexusOperationRequest)(nil), // 76: temporal.server.api.historyservice.v1.CancelNexusOperationRequest + (*StartWorkflowExecutionResponse)(nil), // 77: temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse + (*GetMutableStateResponse)(nil), // 78: temporal.server.api.historyservice.v1.GetMutableStateResponse + (*PollMutableStateResponse)(nil), // 79: temporal.server.api.historyservice.v1.PollMutableStateResponse + (*ResetStickyTaskQueueResponse)(nil), // 80: temporal.server.api.historyservice.v1.ResetStickyTaskQueueResponse + (*RecordWorkflowTaskStartedResponse)(nil), // 81: temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse + (*RecordActivityTaskStartedResponse)(nil), // 82: temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse + (*RespondWorkflowTaskCompletedResponse)(nil), // 83: temporal.server.api.historyservice.v1.RespondWorkflowTaskCompletedResponse + (*RespondWorkflowTaskFailedResponse)(nil), // 84: temporal.server.api.historyservice.v1.RespondWorkflowTaskFailedResponse + (*IsWorkflowTaskValidResponse)(nil), // 85: temporal.server.api.historyservice.v1.IsWorkflowTaskValidResponse + (*RecordActivityTaskHeartbeatResponse)(nil), // 86: temporal.server.api.historyservice.v1.RecordActivityTaskHeartbeatResponse + (*RespondActivityTaskCompletedResponse)(nil), // 87: temporal.server.api.historyservice.v1.RespondActivityTaskCompletedResponse + (*RespondActivityTaskFailedResponse)(nil), // 88: temporal.server.api.historyservice.v1.RespondActivityTaskFailedResponse + (*RespondActivityTaskCanceledResponse)(nil), // 89: temporal.server.api.historyservice.v1.RespondActivityTaskCanceledResponse + (*IsActivityTaskValidResponse)(nil), // 90: temporal.server.api.historyservice.v1.IsActivityTaskValidResponse + (*SignalWorkflowExecutionResponse)(nil), // 91: temporal.server.api.historyservice.v1.SignalWorkflowExecutionResponse + (*SignalWithStartWorkflowExecutionResponse)(nil), // 92: temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionResponse + (*ExecuteMultiOperationResponse)(nil), // 93: temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse + (*RemoveSignalMutableStateResponse)(nil), // 94: temporal.server.api.historyservice.v1.RemoveSignalMutableStateResponse + (*TerminateWorkflowExecutionResponse)(nil), // 95: temporal.server.api.historyservice.v1.TerminateWorkflowExecutionResponse + (*DeleteWorkflowExecutionResponse)(nil), // 96: temporal.server.api.historyservice.v1.DeleteWorkflowExecutionResponse + (*ResetWorkflowExecutionResponse)(nil), // 97: temporal.server.api.historyservice.v1.ResetWorkflowExecutionResponse + (*UpdateWorkflowExecutionOptionsResponse)(nil), // 98: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsResponse + (*RequestCancelWorkflowExecutionResponse)(nil), // 99: temporal.server.api.historyservice.v1.RequestCancelWorkflowExecutionResponse + (*ScheduleWorkflowTaskResponse)(nil), // 100: temporal.server.api.historyservice.v1.ScheduleWorkflowTaskResponse + (*VerifyFirstWorkflowTaskScheduledResponse)(nil), // 101: temporal.server.api.historyservice.v1.VerifyFirstWorkflowTaskScheduledResponse + (*RecordChildExecutionCompletedResponse)(nil), // 102: temporal.server.api.historyservice.v1.RecordChildExecutionCompletedResponse + (*VerifyChildExecutionCompletionRecordedResponse)(nil), // 103: temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedResponse + (*DescribeWorkflowExecutionResponse)(nil), // 104: temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse + (*ReplicateEventsV2Response)(nil), // 105: temporal.server.api.historyservice.v1.ReplicateEventsV2Response + (*ReplicateWorkflowStateResponse)(nil), // 106: temporal.server.api.historyservice.v1.ReplicateWorkflowStateResponse + (*SyncShardStatusResponse)(nil), // 107: temporal.server.api.historyservice.v1.SyncShardStatusResponse + (*SyncActivityResponse)(nil), // 108: temporal.server.api.historyservice.v1.SyncActivityResponse + (*DescribeMutableStateResponse)(nil), // 109: temporal.server.api.historyservice.v1.DescribeMutableStateResponse + (*DescribeHistoryHostResponse)(nil), // 110: temporal.server.api.historyservice.v1.DescribeHistoryHostResponse + (*CloseShardResponse)(nil), // 111: temporal.server.api.historyservice.v1.CloseShardResponse + (*GetShardResponse)(nil), // 112: temporal.server.api.historyservice.v1.GetShardResponse + (*RemoveTaskResponse)(nil), // 113: temporal.server.api.historyservice.v1.RemoveTaskResponse + (*GetReplicationMessagesResponse)(nil), // 114: temporal.server.api.historyservice.v1.GetReplicationMessagesResponse + (*GetDLQReplicationMessagesResponse)(nil), // 115: temporal.server.api.historyservice.v1.GetDLQReplicationMessagesResponse + (*QueryWorkflowResponse)(nil), // 116: temporal.server.api.historyservice.v1.QueryWorkflowResponse + (*ReapplyEventsResponse)(nil), // 117: temporal.server.api.historyservice.v1.ReapplyEventsResponse + (*GetDLQMessagesResponse)(nil), // 118: temporal.server.api.historyservice.v1.GetDLQMessagesResponse + (*PurgeDLQMessagesResponse)(nil), // 119: temporal.server.api.historyservice.v1.PurgeDLQMessagesResponse + (*MergeDLQMessagesResponse)(nil), // 120: temporal.server.api.historyservice.v1.MergeDLQMessagesResponse + (*RefreshWorkflowTasksResponse)(nil), // 121: temporal.server.api.historyservice.v1.RefreshWorkflowTasksResponse + (*GenerateLastHistoryReplicationTasksResponse)(nil), // 122: temporal.server.api.historyservice.v1.GenerateLastHistoryReplicationTasksResponse + (*GetReplicationStatusResponse)(nil), // 123: temporal.server.api.historyservice.v1.GetReplicationStatusResponse + (*RebuildMutableStateResponse)(nil), // 124: temporal.server.api.historyservice.v1.RebuildMutableStateResponse + (*ImportWorkflowExecutionResponse)(nil), // 125: temporal.server.api.historyservice.v1.ImportWorkflowExecutionResponse + (*DeleteWorkflowVisibilityRecordResponse)(nil), // 126: temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordResponse + (*UpdateWorkflowExecutionResponse)(nil), // 127: temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse + (*PollWorkflowExecutionUpdateResponse)(nil), // 128: temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateResponse + (*StreamWorkflowReplicationMessagesResponse)(nil), // 129: temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesResponse + (*GetWorkflowExecutionHistoryResponse)(nil), // 130: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponse + (*GetWorkflowExecutionHistoryReverseResponse)(nil), // 131: temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseResponse + (*GetWorkflowExecutionRawHistoryV2Response)(nil), // 132: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Response + (*GetWorkflowExecutionRawHistoryResponse)(nil), // 133: temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryResponse + (*ForceDeleteWorkflowExecutionResponse)(nil), // 134: temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionResponse + (*DeleteExecutionResponse)(nil), // 135: temporal.server.api.historyservice.v1.DeleteExecutionResponse + (*GetDLQTasksResponse)(nil), // 136: temporal.server.api.historyservice.v1.GetDLQTasksResponse + (*DeleteDLQTasksResponse)(nil), // 137: temporal.server.api.historyservice.v1.DeleteDLQTasksResponse + (*ListQueuesResponse)(nil), // 138: temporal.server.api.historyservice.v1.ListQueuesResponse + (*AddTasksResponse)(nil), // 139: temporal.server.api.historyservice.v1.AddTasksResponse + (*ListTasksResponse)(nil), // 140: temporal.server.api.historyservice.v1.ListTasksResponse + (*CompleteNexusOperationResponse)(nil), // 141: temporal.server.api.historyservice.v1.CompleteNexusOperationResponse + (*CompleteNexusOperationChasmResponse)(nil), // 142: temporal.server.api.historyservice.v1.CompleteNexusOperationChasmResponse + (*InvokeStateMachineMethodResponse)(nil), // 143: temporal.server.api.historyservice.v1.InvokeStateMachineMethodResponse + (*DeepHealthCheckResponse)(nil), // 144: temporal.server.api.historyservice.v1.DeepHealthCheckResponse + (*SyncWorkflowStateResponse)(nil), // 145: temporal.server.api.historyservice.v1.SyncWorkflowStateResponse + (*UpdateActivityOptionsResponse)(nil), // 146: temporal.server.api.historyservice.v1.UpdateActivityOptionsResponse + (*PauseActivityResponse)(nil), // 147: temporal.server.api.historyservice.v1.PauseActivityResponse + (*UnpauseActivityResponse)(nil), // 148: temporal.server.api.historyservice.v1.UnpauseActivityResponse + (*ResetActivityResponse)(nil), // 149: temporal.server.api.historyservice.v1.ResetActivityResponse + (*PauseWorkflowExecutionResponse)(nil), // 150: temporal.server.api.historyservice.v1.PauseWorkflowExecutionResponse + (*UnpauseWorkflowExecutionResponse)(nil), // 151: temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionResponse + (*StartNexusOperationResponse)(nil), // 152: temporal.server.api.historyservice.v1.StartNexusOperationResponse + (*CancelNexusOperationResponse)(nil), // 153: temporal.server.api.historyservice.v1.CancelNexusOperationResponse } var file_temporal_server_api_historyservice_v1_service_proto_depIdxs = []int32{ 0, // 0: temporal.server.api.historyservice.v1.HistoryService.StartWorkflowExecution:input_type -> temporal.server.api.historyservice.v1.StartWorkflowExecutionRequest @@ -321,102 +324,104 @@ var file_temporal_server_api_historyservice_v1_service_proto_depIdxs = []int32{ 55, // 55: temporal.server.api.historyservice.v1.HistoryService.GetWorkflowExecutionRawHistoryV2:input_type -> temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Request 56, // 56: temporal.server.api.historyservice.v1.HistoryService.GetWorkflowExecutionRawHistory:input_type -> temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryRequest 57, // 57: temporal.server.api.historyservice.v1.HistoryService.ForceDeleteWorkflowExecution:input_type -> temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionRequest - 58, // 58: temporal.server.api.historyservice.v1.HistoryService.GetDLQTasks:input_type -> temporal.server.api.historyservice.v1.GetDLQTasksRequest - 59, // 59: temporal.server.api.historyservice.v1.HistoryService.DeleteDLQTasks:input_type -> temporal.server.api.historyservice.v1.DeleteDLQTasksRequest - 60, // 60: temporal.server.api.historyservice.v1.HistoryService.ListQueues:input_type -> temporal.server.api.historyservice.v1.ListQueuesRequest - 61, // 61: temporal.server.api.historyservice.v1.HistoryService.AddTasks:input_type -> temporal.server.api.historyservice.v1.AddTasksRequest - 62, // 62: temporal.server.api.historyservice.v1.HistoryService.ListTasks:input_type -> temporal.server.api.historyservice.v1.ListTasksRequest - 63, // 63: temporal.server.api.historyservice.v1.HistoryService.CompleteNexusOperation:input_type -> temporal.server.api.historyservice.v1.CompleteNexusOperationRequest - 64, // 64: temporal.server.api.historyservice.v1.HistoryService.CompleteNexusOperationChasm:input_type -> temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest - 65, // 65: temporal.server.api.historyservice.v1.HistoryService.InvokeStateMachineMethod:input_type -> temporal.server.api.historyservice.v1.InvokeStateMachineMethodRequest - 66, // 66: temporal.server.api.historyservice.v1.HistoryService.DeepHealthCheck:input_type -> temporal.server.api.historyservice.v1.DeepHealthCheckRequest - 67, // 67: temporal.server.api.historyservice.v1.HistoryService.SyncWorkflowState:input_type -> temporal.server.api.historyservice.v1.SyncWorkflowStateRequest - 68, // 68: temporal.server.api.historyservice.v1.HistoryService.UpdateActivityOptions:input_type -> temporal.server.api.historyservice.v1.UpdateActivityOptionsRequest - 69, // 69: temporal.server.api.historyservice.v1.HistoryService.PauseActivity:input_type -> temporal.server.api.historyservice.v1.PauseActivityRequest - 70, // 70: temporal.server.api.historyservice.v1.HistoryService.UnpauseActivity:input_type -> temporal.server.api.historyservice.v1.UnpauseActivityRequest - 71, // 71: temporal.server.api.historyservice.v1.HistoryService.ResetActivity:input_type -> temporal.server.api.historyservice.v1.ResetActivityRequest - 72, // 72: temporal.server.api.historyservice.v1.HistoryService.PauseWorkflowExecution:input_type -> temporal.server.api.historyservice.v1.PauseWorkflowExecutionRequest - 73, // 73: temporal.server.api.historyservice.v1.HistoryService.UnpauseWorkflowExecution:input_type -> temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionRequest - 74, // 74: temporal.server.api.historyservice.v1.HistoryService.StartNexusOperation:input_type -> temporal.server.api.historyservice.v1.StartNexusOperationRequest - 75, // 75: temporal.server.api.historyservice.v1.HistoryService.CancelNexusOperation:input_type -> temporal.server.api.historyservice.v1.CancelNexusOperationRequest - 76, // 76: temporal.server.api.historyservice.v1.HistoryService.StartWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse - 77, // 77: temporal.server.api.historyservice.v1.HistoryService.GetMutableState:output_type -> temporal.server.api.historyservice.v1.GetMutableStateResponse - 78, // 78: temporal.server.api.historyservice.v1.HistoryService.PollMutableState:output_type -> temporal.server.api.historyservice.v1.PollMutableStateResponse - 79, // 79: temporal.server.api.historyservice.v1.HistoryService.ResetStickyTaskQueue:output_type -> temporal.server.api.historyservice.v1.ResetStickyTaskQueueResponse - 80, // 80: temporal.server.api.historyservice.v1.HistoryService.RecordWorkflowTaskStarted:output_type -> temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse - 81, // 81: temporal.server.api.historyservice.v1.HistoryService.RecordActivityTaskStarted:output_type -> temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse - 82, // 82: temporal.server.api.historyservice.v1.HistoryService.RespondWorkflowTaskCompleted:output_type -> temporal.server.api.historyservice.v1.RespondWorkflowTaskCompletedResponse - 83, // 83: temporal.server.api.historyservice.v1.HistoryService.RespondWorkflowTaskFailed:output_type -> temporal.server.api.historyservice.v1.RespondWorkflowTaskFailedResponse - 84, // 84: temporal.server.api.historyservice.v1.HistoryService.IsWorkflowTaskValid:output_type -> temporal.server.api.historyservice.v1.IsWorkflowTaskValidResponse - 85, // 85: temporal.server.api.historyservice.v1.HistoryService.RecordActivityTaskHeartbeat:output_type -> temporal.server.api.historyservice.v1.RecordActivityTaskHeartbeatResponse - 86, // 86: temporal.server.api.historyservice.v1.HistoryService.RespondActivityTaskCompleted:output_type -> temporal.server.api.historyservice.v1.RespondActivityTaskCompletedResponse - 87, // 87: temporal.server.api.historyservice.v1.HistoryService.RespondActivityTaskFailed:output_type -> temporal.server.api.historyservice.v1.RespondActivityTaskFailedResponse - 88, // 88: temporal.server.api.historyservice.v1.HistoryService.RespondActivityTaskCanceled:output_type -> temporal.server.api.historyservice.v1.RespondActivityTaskCanceledResponse - 89, // 89: temporal.server.api.historyservice.v1.HistoryService.IsActivityTaskValid:output_type -> temporal.server.api.historyservice.v1.IsActivityTaskValidResponse - 90, // 90: temporal.server.api.historyservice.v1.HistoryService.SignalWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.SignalWorkflowExecutionResponse - 91, // 91: temporal.server.api.historyservice.v1.HistoryService.SignalWithStartWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionResponse - 92, // 92: temporal.server.api.historyservice.v1.HistoryService.ExecuteMultiOperation:output_type -> temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse - 93, // 93: temporal.server.api.historyservice.v1.HistoryService.RemoveSignalMutableState:output_type -> temporal.server.api.historyservice.v1.RemoveSignalMutableStateResponse - 94, // 94: temporal.server.api.historyservice.v1.HistoryService.TerminateWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.TerminateWorkflowExecutionResponse - 95, // 95: temporal.server.api.historyservice.v1.HistoryService.DeleteWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.DeleteWorkflowExecutionResponse - 96, // 96: temporal.server.api.historyservice.v1.HistoryService.ResetWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.ResetWorkflowExecutionResponse - 97, // 97: temporal.server.api.historyservice.v1.HistoryService.UpdateWorkflowExecutionOptions:output_type -> temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsResponse - 98, // 98: temporal.server.api.historyservice.v1.HistoryService.RequestCancelWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.RequestCancelWorkflowExecutionResponse - 99, // 99: temporal.server.api.historyservice.v1.HistoryService.ScheduleWorkflowTask:output_type -> temporal.server.api.historyservice.v1.ScheduleWorkflowTaskResponse - 100, // 100: temporal.server.api.historyservice.v1.HistoryService.VerifyFirstWorkflowTaskScheduled:output_type -> temporal.server.api.historyservice.v1.VerifyFirstWorkflowTaskScheduledResponse - 101, // 101: temporal.server.api.historyservice.v1.HistoryService.RecordChildExecutionCompleted:output_type -> temporal.server.api.historyservice.v1.RecordChildExecutionCompletedResponse - 102, // 102: temporal.server.api.historyservice.v1.HistoryService.VerifyChildExecutionCompletionRecorded:output_type -> temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedResponse - 103, // 103: temporal.server.api.historyservice.v1.HistoryService.DescribeWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse - 104, // 104: temporal.server.api.historyservice.v1.HistoryService.ReplicateEventsV2:output_type -> temporal.server.api.historyservice.v1.ReplicateEventsV2Response - 105, // 105: temporal.server.api.historyservice.v1.HistoryService.ReplicateWorkflowState:output_type -> temporal.server.api.historyservice.v1.ReplicateWorkflowStateResponse - 106, // 106: temporal.server.api.historyservice.v1.HistoryService.SyncShardStatus:output_type -> temporal.server.api.historyservice.v1.SyncShardStatusResponse - 107, // 107: temporal.server.api.historyservice.v1.HistoryService.SyncActivity:output_type -> temporal.server.api.historyservice.v1.SyncActivityResponse - 108, // 108: temporal.server.api.historyservice.v1.HistoryService.DescribeMutableState:output_type -> temporal.server.api.historyservice.v1.DescribeMutableStateResponse - 109, // 109: temporal.server.api.historyservice.v1.HistoryService.DescribeHistoryHost:output_type -> temporal.server.api.historyservice.v1.DescribeHistoryHostResponse - 110, // 110: temporal.server.api.historyservice.v1.HistoryService.CloseShard:output_type -> temporal.server.api.historyservice.v1.CloseShardResponse - 111, // 111: temporal.server.api.historyservice.v1.HistoryService.GetShard:output_type -> temporal.server.api.historyservice.v1.GetShardResponse - 112, // 112: temporal.server.api.historyservice.v1.HistoryService.RemoveTask:output_type -> temporal.server.api.historyservice.v1.RemoveTaskResponse - 113, // 113: temporal.server.api.historyservice.v1.HistoryService.GetReplicationMessages:output_type -> temporal.server.api.historyservice.v1.GetReplicationMessagesResponse - 114, // 114: temporal.server.api.historyservice.v1.HistoryService.GetDLQReplicationMessages:output_type -> temporal.server.api.historyservice.v1.GetDLQReplicationMessagesResponse - 115, // 115: temporal.server.api.historyservice.v1.HistoryService.QueryWorkflow:output_type -> temporal.server.api.historyservice.v1.QueryWorkflowResponse - 116, // 116: temporal.server.api.historyservice.v1.HistoryService.ReapplyEvents:output_type -> temporal.server.api.historyservice.v1.ReapplyEventsResponse - 117, // 117: temporal.server.api.historyservice.v1.HistoryService.GetDLQMessages:output_type -> temporal.server.api.historyservice.v1.GetDLQMessagesResponse - 118, // 118: temporal.server.api.historyservice.v1.HistoryService.PurgeDLQMessages:output_type -> temporal.server.api.historyservice.v1.PurgeDLQMessagesResponse - 119, // 119: temporal.server.api.historyservice.v1.HistoryService.MergeDLQMessages:output_type -> temporal.server.api.historyservice.v1.MergeDLQMessagesResponse - 120, // 120: temporal.server.api.historyservice.v1.HistoryService.RefreshWorkflowTasks:output_type -> temporal.server.api.historyservice.v1.RefreshWorkflowTasksResponse - 121, // 121: temporal.server.api.historyservice.v1.HistoryService.GenerateLastHistoryReplicationTasks:output_type -> temporal.server.api.historyservice.v1.GenerateLastHistoryReplicationTasksResponse - 122, // 122: temporal.server.api.historyservice.v1.HistoryService.GetReplicationStatus:output_type -> temporal.server.api.historyservice.v1.GetReplicationStatusResponse - 123, // 123: temporal.server.api.historyservice.v1.HistoryService.RebuildMutableState:output_type -> temporal.server.api.historyservice.v1.RebuildMutableStateResponse - 124, // 124: temporal.server.api.historyservice.v1.HistoryService.ImportWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.ImportWorkflowExecutionResponse - 125, // 125: temporal.server.api.historyservice.v1.HistoryService.DeleteWorkflowVisibilityRecord:output_type -> temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordResponse - 126, // 126: temporal.server.api.historyservice.v1.HistoryService.UpdateWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse - 127, // 127: temporal.server.api.historyservice.v1.HistoryService.PollWorkflowExecutionUpdate:output_type -> temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateResponse - 128, // 128: temporal.server.api.historyservice.v1.HistoryService.StreamWorkflowReplicationMessages:output_type -> temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesResponse - 129, // 129: temporal.server.api.historyservice.v1.HistoryService.GetWorkflowExecutionHistory:output_type -> temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponse - 130, // 130: temporal.server.api.historyservice.v1.HistoryService.GetWorkflowExecutionHistoryReverse:output_type -> temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseResponse - 131, // 131: temporal.server.api.historyservice.v1.HistoryService.GetWorkflowExecutionRawHistoryV2:output_type -> temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Response - 132, // 132: temporal.server.api.historyservice.v1.HistoryService.GetWorkflowExecutionRawHistory:output_type -> temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryResponse - 133, // 133: temporal.server.api.historyservice.v1.HistoryService.ForceDeleteWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionResponse - 134, // 134: temporal.server.api.historyservice.v1.HistoryService.GetDLQTasks:output_type -> temporal.server.api.historyservice.v1.GetDLQTasksResponse - 135, // 135: temporal.server.api.historyservice.v1.HistoryService.DeleteDLQTasks:output_type -> temporal.server.api.historyservice.v1.DeleteDLQTasksResponse - 136, // 136: temporal.server.api.historyservice.v1.HistoryService.ListQueues:output_type -> temporal.server.api.historyservice.v1.ListQueuesResponse - 137, // 137: temporal.server.api.historyservice.v1.HistoryService.AddTasks:output_type -> temporal.server.api.historyservice.v1.AddTasksResponse - 138, // 138: temporal.server.api.historyservice.v1.HistoryService.ListTasks:output_type -> temporal.server.api.historyservice.v1.ListTasksResponse - 139, // 139: temporal.server.api.historyservice.v1.HistoryService.CompleteNexusOperation:output_type -> temporal.server.api.historyservice.v1.CompleteNexusOperationResponse - 140, // 140: temporal.server.api.historyservice.v1.HistoryService.CompleteNexusOperationChasm:output_type -> temporal.server.api.historyservice.v1.CompleteNexusOperationChasmResponse - 141, // 141: temporal.server.api.historyservice.v1.HistoryService.InvokeStateMachineMethod:output_type -> temporal.server.api.historyservice.v1.InvokeStateMachineMethodResponse - 142, // 142: temporal.server.api.historyservice.v1.HistoryService.DeepHealthCheck:output_type -> temporal.server.api.historyservice.v1.DeepHealthCheckResponse - 143, // 143: temporal.server.api.historyservice.v1.HistoryService.SyncWorkflowState:output_type -> temporal.server.api.historyservice.v1.SyncWorkflowStateResponse - 144, // 144: temporal.server.api.historyservice.v1.HistoryService.UpdateActivityOptions:output_type -> temporal.server.api.historyservice.v1.UpdateActivityOptionsResponse - 145, // 145: temporal.server.api.historyservice.v1.HistoryService.PauseActivity:output_type -> temporal.server.api.historyservice.v1.PauseActivityResponse - 146, // 146: temporal.server.api.historyservice.v1.HistoryService.UnpauseActivity:output_type -> temporal.server.api.historyservice.v1.UnpauseActivityResponse - 147, // 147: temporal.server.api.historyservice.v1.HistoryService.ResetActivity:output_type -> temporal.server.api.historyservice.v1.ResetActivityResponse - 148, // 148: temporal.server.api.historyservice.v1.HistoryService.PauseWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.PauseWorkflowExecutionResponse - 149, // 149: temporal.server.api.historyservice.v1.HistoryService.UnpauseWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionResponse - 150, // 150: temporal.server.api.historyservice.v1.HistoryService.StartNexusOperation:output_type -> temporal.server.api.historyservice.v1.StartNexusOperationResponse - 151, // 151: temporal.server.api.historyservice.v1.HistoryService.CancelNexusOperation:output_type -> temporal.server.api.historyservice.v1.CancelNexusOperationResponse - 76, // [76:152] is the sub-list for method output_type - 0, // [0:76] is the sub-list for method input_type + 58, // 58: temporal.server.api.historyservice.v1.HistoryService.DeleteExecution:input_type -> temporal.server.api.historyservice.v1.DeleteExecutionRequest + 59, // 59: temporal.server.api.historyservice.v1.HistoryService.GetDLQTasks:input_type -> temporal.server.api.historyservice.v1.GetDLQTasksRequest + 60, // 60: temporal.server.api.historyservice.v1.HistoryService.DeleteDLQTasks:input_type -> temporal.server.api.historyservice.v1.DeleteDLQTasksRequest + 61, // 61: temporal.server.api.historyservice.v1.HistoryService.ListQueues:input_type -> temporal.server.api.historyservice.v1.ListQueuesRequest + 62, // 62: temporal.server.api.historyservice.v1.HistoryService.AddTasks:input_type -> temporal.server.api.historyservice.v1.AddTasksRequest + 63, // 63: temporal.server.api.historyservice.v1.HistoryService.ListTasks:input_type -> temporal.server.api.historyservice.v1.ListTasksRequest + 64, // 64: temporal.server.api.historyservice.v1.HistoryService.CompleteNexusOperation:input_type -> temporal.server.api.historyservice.v1.CompleteNexusOperationRequest + 65, // 65: temporal.server.api.historyservice.v1.HistoryService.CompleteNexusOperationChasm:input_type -> temporal.server.api.historyservice.v1.CompleteNexusOperationChasmRequest + 66, // 66: temporal.server.api.historyservice.v1.HistoryService.InvokeStateMachineMethod:input_type -> temporal.server.api.historyservice.v1.InvokeStateMachineMethodRequest + 67, // 67: temporal.server.api.historyservice.v1.HistoryService.DeepHealthCheck:input_type -> temporal.server.api.historyservice.v1.DeepHealthCheckRequest + 68, // 68: temporal.server.api.historyservice.v1.HistoryService.SyncWorkflowState:input_type -> temporal.server.api.historyservice.v1.SyncWorkflowStateRequest + 69, // 69: temporal.server.api.historyservice.v1.HistoryService.UpdateActivityOptions:input_type -> temporal.server.api.historyservice.v1.UpdateActivityOptionsRequest + 70, // 70: temporal.server.api.historyservice.v1.HistoryService.PauseActivity:input_type -> temporal.server.api.historyservice.v1.PauseActivityRequest + 71, // 71: temporal.server.api.historyservice.v1.HistoryService.UnpauseActivity:input_type -> temporal.server.api.historyservice.v1.UnpauseActivityRequest + 72, // 72: temporal.server.api.historyservice.v1.HistoryService.ResetActivity:input_type -> temporal.server.api.historyservice.v1.ResetActivityRequest + 73, // 73: temporal.server.api.historyservice.v1.HistoryService.PauseWorkflowExecution:input_type -> temporal.server.api.historyservice.v1.PauseWorkflowExecutionRequest + 74, // 74: temporal.server.api.historyservice.v1.HistoryService.UnpauseWorkflowExecution:input_type -> temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionRequest + 75, // 75: temporal.server.api.historyservice.v1.HistoryService.StartNexusOperation:input_type -> temporal.server.api.historyservice.v1.StartNexusOperationRequest + 76, // 76: temporal.server.api.historyservice.v1.HistoryService.CancelNexusOperation:input_type -> temporal.server.api.historyservice.v1.CancelNexusOperationRequest + 77, // 77: temporal.server.api.historyservice.v1.HistoryService.StartWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.StartWorkflowExecutionResponse + 78, // 78: temporal.server.api.historyservice.v1.HistoryService.GetMutableState:output_type -> temporal.server.api.historyservice.v1.GetMutableStateResponse + 79, // 79: temporal.server.api.historyservice.v1.HistoryService.PollMutableState:output_type -> temporal.server.api.historyservice.v1.PollMutableStateResponse + 80, // 80: temporal.server.api.historyservice.v1.HistoryService.ResetStickyTaskQueue:output_type -> temporal.server.api.historyservice.v1.ResetStickyTaskQueueResponse + 81, // 81: temporal.server.api.historyservice.v1.HistoryService.RecordWorkflowTaskStarted:output_type -> temporal.server.api.historyservice.v1.RecordWorkflowTaskStartedResponse + 82, // 82: temporal.server.api.historyservice.v1.HistoryService.RecordActivityTaskStarted:output_type -> temporal.server.api.historyservice.v1.RecordActivityTaskStartedResponse + 83, // 83: temporal.server.api.historyservice.v1.HistoryService.RespondWorkflowTaskCompleted:output_type -> temporal.server.api.historyservice.v1.RespondWorkflowTaskCompletedResponse + 84, // 84: temporal.server.api.historyservice.v1.HistoryService.RespondWorkflowTaskFailed:output_type -> temporal.server.api.historyservice.v1.RespondWorkflowTaskFailedResponse + 85, // 85: temporal.server.api.historyservice.v1.HistoryService.IsWorkflowTaskValid:output_type -> temporal.server.api.historyservice.v1.IsWorkflowTaskValidResponse + 86, // 86: temporal.server.api.historyservice.v1.HistoryService.RecordActivityTaskHeartbeat:output_type -> temporal.server.api.historyservice.v1.RecordActivityTaskHeartbeatResponse + 87, // 87: temporal.server.api.historyservice.v1.HistoryService.RespondActivityTaskCompleted:output_type -> temporal.server.api.historyservice.v1.RespondActivityTaskCompletedResponse + 88, // 88: temporal.server.api.historyservice.v1.HistoryService.RespondActivityTaskFailed:output_type -> temporal.server.api.historyservice.v1.RespondActivityTaskFailedResponse + 89, // 89: temporal.server.api.historyservice.v1.HistoryService.RespondActivityTaskCanceled:output_type -> temporal.server.api.historyservice.v1.RespondActivityTaskCanceledResponse + 90, // 90: temporal.server.api.historyservice.v1.HistoryService.IsActivityTaskValid:output_type -> temporal.server.api.historyservice.v1.IsActivityTaskValidResponse + 91, // 91: temporal.server.api.historyservice.v1.HistoryService.SignalWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.SignalWorkflowExecutionResponse + 92, // 92: temporal.server.api.historyservice.v1.HistoryService.SignalWithStartWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.SignalWithStartWorkflowExecutionResponse + 93, // 93: temporal.server.api.historyservice.v1.HistoryService.ExecuteMultiOperation:output_type -> temporal.server.api.historyservice.v1.ExecuteMultiOperationResponse + 94, // 94: temporal.server.api.historyservice.v1.HistoryService.RemoveSignalMutableState:output_type -> temporal.server.api.historyservice.v1.RemoveSignalMutableStateResponse + 95, // 95: temporal.server.api.historyservice.v1.HistoryService.TerminateWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.TerminateWorkflowExecutionResponse + 96, // 96: temporal.server.api.historyservice.v1.HistoryService.DeleteWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.DeleteWorkflowExecutionResponse + 97, // 97: temporal.server.api.historyservice.v1.HistoryService.ResetWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.ResetWorkflowExecutionResponse + 98, // 98: temporal.server.api.historyservice.v1.HistoryService.UpdateWorkflowExecutionOptions:output_type -> temporal.server.api.historyservice.v1.UpdateWorkflowExecutionOptionsResponse + 99, // 99: temporal.server.api.historyservice.v1.HistoryService.RequestCancelWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.RequestCancelWorkflowExecutionResponse + 100, // 100: temporal.server.api.historyservice.v1.HistoryService.ScheduleWorkflowTask:output_type -> temporal.server.api.historyservice.v1.ScheduleWorkflowTaskResponse + 101, // 101: temporal.server.api.historyservice.v1.HistoryService.VerifyFirstWorkflowTaskScheduled:output_type -> temporal.server.api.historyservice.v1.VerifyFirstWorkflowTaskScheduledResponse + 102, // 102: temporal.server.api.historyservice.v1.HistoryService.RecordChildExecutionCompleted:output_type -> temporal.server.api.historyservice.v1.RecordChildExecutionCompletedResponse + 103, // 103: temporal.server.api.historyservice.v1.HistoryService.VerifyChildExecutionCompletionRecorded:output_type -> temporal.server.api.historyservice.v1.VerifyChildExecutionCompletionRecordedResponse + 104, // 104: temporal.server.api.historyservice.v1.HistoryService.DescribeWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.DescribeWorkflowExecutionResponse + 105, // 105: temporal.server.api.historyservice.v1.HistoryService.ReplicateEventsV2:output_type -> temporal.server.api.historyservice.v1.ReplicateEventsV2Response + 106, // 106: temporal.server.api.historyservice.v1.HistoryService.ReplicateWorkflowState:output_type -> temporal.server.api.historyservice.v1.ReplicateWorkflowStateResponse + 107, // 107: temporal.server.api.historyservice.v1.HistoryService.SyncShardStatus:output_type -> temporal.server.api.historyservice.v1.SyncShardStatusResponse + 108, // 108: temporal.server.api.historyservice.v1.HistoryService.SyncActivity:output_type -> temporal.server.api.historyservice.v1.SyncActivityResponse + 109, // 109: temporal.server.api.historyservice.v1.HistoryService.DescribeMutableState:output_type -> temporal.server.api.historyservice.v1.DescribeMutableStateResponse + 110, // 110: temporal.server.api.historyservice.v1.HistoryService.DescribeHistoryHost:output_type -> temporal.server.api.historyservice.v1.DescribeHistoryHostResponse + 111, // 111: temporal.server.api.historyservice.v1.HistoryService.CloseShard:output_type -> temporal.server.api.historyservice.v1.CloseShardResponse + 112, // 112: temporal.server.api.historyservice.v1.HistoryService.GetShard:output_type -> temporal.server.api.historyservice.v1.GetShardResponse + 113, // 113: temporal.server.api.historyservice.v1.HistoryService.RemoveTask:output_type -> temporal.server.api.historyservice.v1.RemoveTaskResponse + 114, // 114: temporal.server.api.historyservice.v1.HistoryService.GetReplicationMessages:output_type -> temporal.server.api.historyservice.v1.GetReplicationMessagesResponse + 115, // 115: temporal.server.api.historyservice.v1.HistoryService.GetDLQReplicationMessages:output_type -> temporal.server.api.historyservice.v1.GetDLQReplicationMessagesResponse + 116, // 116: temporal.server.api.historyservice.v1.HistoryService.QueryWorkflow:output_type -> temporal.server.api.historyservice.v1.QueryWorkflowResponse + 117, // 117: temporal.server.api.historyservice.v1.HistoryService.ReapplyEvents:output_type -> temporal.server.api.historyservice.v1.ReapplyEventsResponse + 118, // 118: temporal.server.api.historyservice.v1.HistoryService.GetDLQMessages:output_type -> temporal.server.api.historyservice.v1.GetDLQMessagesResponse + 119, // 119: temporal.server.api.historyservice.v1.HistoryService.PurgeDLQMessages:output_type -> temporal.server.api.historyservice.v1.PurgeDLQMessagesResponse + 120, // 120: temporal.server.api.historyservice.v1.HistoryService.MergeDLQMessages:output_type -> temporal.server.api.historyservice.v1.MergeDLQMessagesResponse + 121, // 121: temporal.server.api.historyservice.v1.HistoryService.RefreshWorkflowTasks:output_type -> temporal.server.api.historyservice.v1.RefreshWorkflowTasksResponse + 122, // 122: temporal.server.api.historyservice.v1.HistoryService.GenerateLastHistoryReplicationTasks:output_type -> temporal.server.api.historyservice.v1.GenerateLastHistoryReplicationTasksResponse + 123, // 123: temporal.server.api.historyservice.v1.HistoryService.GetReplicationStatus:output_type -> temporal.server.api.historyservice.v1.GetReplicationStatusResponse + 124, // 124: temporal.server.api.historyservice.v1.HistoryService.RebuildMutableState:output_type -> temporal.server.api.historyservice.v1.RebuildMutableStateResponse + 125, // 125: temporal.server.api.historyservice.v1.HistoryService.ImportWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.ImportWorkflowExecutionResponse + 126, // 126: temporal.server.api.historyservice.v1.HistoryService.DeleteWorkflowVisibilityRecord:output_type -> temporal.server.api.historyservice.v1.DeleteWorkflowVisibilityRecordResponse + 127, // 127: temporal.server.api.historyservice.v1.HistoryService.UpdateWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.UpdateWorkflowExecutionResponse + 128, // 128: temporal.server.api.historyservice.v1.HistoryService.PollWorkflowExecutionUpdate:output_type -> temporal.server.api.historyservice.v1.PollWorkflowExecutionUpdateResponse + 129, // 129: temporal.server.api.historyservice.v1.HistoryService.StreamWorkflowReplicationMessages:output_type -> temporal.server.api.historyservice.v1.StreamWorkflowReplicationMessagesResponse + 130, // 130: temporal.server.api.historyservice.v1.HistoryService.GetWorkflowExecutionHistory:output_type -> temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryResponse + 131, // 131: temporal.server.api.historyservice.v1.HistoryService.GetWorkflowExecutionHistoryReverse:output_type -> temporal.server.api.historyservice.v1.GetWorkflowExecutionHistoryReverseResponse + 132, // 132: temporal.server.api.historyservice.v1.HistoryService.GetWorkflowExecutionRawHistoryV2:output_type -> temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryV2Response + 133, // 133: temporal.server.api.historyservice.v1.HistoryService.GetWorkflowExecutionRawHistory:output_type -> temporal.server.api.historyservice.v1.GetWorkflowExecutionRawHistoryResponse + 134, // 134: temporal.server.api.historyservice.v1.HistoryService.ForceDeleteWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.ForceDeleteWorkflowExecutionResponse + 135, // 135: temporal.server.api.historyservice.v1.HistoryService.DeleteExecution:output_type -> temporal.server.api.historyservice.v1.DeleteExecutionResponse + 136, // 136: temporal.server.api.historyservice.v1.HistoryService.GetDLQTasks:output_type -> temporal.server.api.historyservice.v1.GetDLQTasksResponse + 137, // 137: temporal.server.api.historyservice.v1.HistoryService.DeleteDLQTasks:output_type -> temporal.server.api.historyservice.v1.DeleteDLQTasksResponse + 138, // 138: temporal.server.api.historyservice.v1.HistoryService.ListQueues:output_type -> temporal.server.api.historyservice.v1.ListQueuesResponse + 139, // 139: temporal.server.api.historyservice.v1.HistoryService.AddTasks:output_type -> temporal.server.api.historyservice.v1.AddTasksResponse + 140, // 140: temporal.server.api.historyservice.v1.HistoryService.ListTasks:output_type -> temporal.server.api.historyservice.v1.ListTasksResponse + 141, // 141: temporal.server.api.historyservice.v1.HistoryService.CompleteNexusOperation:output_type -> temporal.server.api.historyservice.v1.CompleteNexusOperationResponse + 142, // 142: temporal.server.api.historyservice.v1.HistoryService.CompleteNexusOperationChasm:output_type -> temporal.server.api.historyservice.v1.CompleteNexusOperationChasmResponse + 143, // 143: temporal.server.api.historyservice.v1.HistoryService.InvokeStateMachineMethod:output_type -> temporal.server.api.historyservice.v1.InvokeStateMachineMethodResponse + 144, // 144: temporal.server.api.historyservice.v1.HistoryService.DeepHealthCheck:output_type -> temporal.server.api.historyservice.v1.DeepHealthCheckResponse + 145, // 145: temporal.server.api.historyservice.v1.HistoryService.SyncWorkflowState:output_type -> temporal.server.api.historyservice.v1.SyncWorkflowStateResponse + 146, // 146: temporal.server.api.historyservice.v1.HistoryService.UpdateActivityOptions:output_type -> temporal.server.api.historyservice.v1.UpdateActivityOptionsResponse + 147, // 147: temporal.server.api.historyservice.v1.HistoryService.PauseActivity:output_type -> temporal.server.api.historyservice.v1.PauseActivityResponse + 148, // 148: temporal.server.api.historyservice.v1.HistoryService.UnpauseActivity:output_type -> temporal.server.api.historyservice.v1.UnpauseActivityResponse + 149, // 149: temporal.server.api.historyservice.v1.HistoryService.ResetActivity:output_type -> temporal.server.api.historyservice.v1.ResetActivityResponse + 150, // 150: temporal.server.api.historyservice.v1.HistoryService.PauseWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.PauseWorkflowExecutionResponse + 151, // 151: temporal.server.api.historyservice.v1.HistoryService.UnpauseWorkflowExecution:output_type -> temporal.server.api.historyservice.v1.UnpauseWorkflowExecutionResponse + 152, // 152: temporal.server.api.historyservice.v1.HistoryService.StartNexusOperation:output_type -> temporal.server.api.historyservice.v1.StartNexusOperationResponse + 153, // 153: temporal.server.api.historyservice.v1.HistoryService.CancelNexusOperation:output_type -> temporal.server.api.historyservice.v1.CancelNexusOperationResponse + 77, // [77:154] is the sub-list for method output_type + 0, // [0:77] is the sub-list for method input_type 0, // [0:0] is the sub-list for extension type_name 0, // [0:0] is the sub-list for extension extendee 0, // [0:0] is the sub-list for field type_name diff --git a/api/historyservice/v1/service_grpc.pb.go b/api/historyservice/v1/service_grpc.pb.go index ddc94fcfafa..8400c0a89eb 100644 --- a/api/historyservice/v1/service_grpc.pb.go +++ b/api/historyservice/v1/service_grpc.pb.go @@ -78,6 +78,7 @@ const ( HistoryService_GetWorkflowExecutionRawHistoryV2_FullMethodName = "/temporal.server.api.historyservice.v1.HistoryService/GetWorkflowExecutionRawHistoryV2" HistoryService_GetWorkflowExecutionRawHistory_FullMethodName = "/temporal.server.api.historyservice.v1.HistoryService/GetWorkflowExecutionRawHistory" HistoryService_ForceDeleteWorkflowExecution_FullMethodName = "/temporal.server.api.historyservice.v1.HistoryService/ForceDeleteWorkflowExecution" + HistoryService_DeleteExecution_FullMethodName = "/temporal.server.api.historyservice.v1.HistoryService/DeleteExecution" HistoryService_GetDLQTasks_FullMethodName = "/temporal.server.api.historyservice.v1.HistoryService/GetDLQTasks" HistoryService_DeleteDLQTasks_FullMethodName = "/temporal.server.api.historyservice.v1.HistoryService/DeleteDLQTasks" HistoryService_ListQueues_FullMethodName = "/temporal.server.api.historyservice.v1.HistoryService/ListQueues" @@ -295,6 +296,7 @@ type HistoryServiceClient interface { GetWorkflowExecutionRawHistoryV2(ctx context.Context, in *GetWorkflowExecutionRawHistoryV2Request, opts ...grpc.CallOption) (*GetWorkflowExecutionRawHistoryV2Response, error) GetWorkflowExecutionRawHistory(ctx context.Context, in *GetWorkflowExecutionRawHistoryRequest, opts ...grpc.CallOption) (*GetWorkflowExecutionRawHistoryResponse, error) ForceDeleteWorkflowExecution(ctx context.Context, in *ForceDeleteWorkflowExecutionRequest, opts ...grpc.CallOption) (*ForceDeleteWorkflowExecutionResponse, error) + DeleteExecution(ctx context.Context, in *DeleteExecutionRequest, opts ...grpc.CallOption) (*DeleteExecutionResponse, error) GetDLQTasks(ctx context.Context, in *GetDLQTasksRequest, opts ...grpc.CallOption) (*GetDLQTasksResponse, error) DeleteDLQTasks(ctx context.Context, in *DeleteDLQTasksRequest, opts ...grpc.CallOption) (*DeleteDLQTasksResponse, error) ListQueues(ctx context.Context, in *ListQueuesRequest, opts ...grpc.CallOption) (*ListQueuesResponse, error) @@ -941,6 +943,15 @@ func (c *historyServiceClient) ForceDeleteWorkflowExecution(ctx context.Context, return out, nil } +func (c *historyServiceClient) DeleteExecution(ctx context.Context, in *DeleteExecutionRequest, opts ...grpc.CallOption) (*DeleteExecutionResponse, error) { + out := new(DeleteExecutionResponse) + err := c.cc.Invoke(ctx, HistoryService_DeleteExecution_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + func (c *historyServiceClient) GetDLQTasks(ctx context.Context, in *GetDLQTasksRequest, opts ...grpc.CallOption) (*GetDLQTasksResponse, error) { out := new(GetDLQTasksResponse) err := c.cc.Invoke(ctx, HistoryService_GetDLQTasks_FullMethodName, in, out, opts...) @@ -1300,6 +1311,7 @@ type HistoryServiceServer interface { GetWorkflowExecutionRawHistoryV2(context.Context, *GetWorkflowExecutionRawHistoryV2Request) (*GetWorkflowExecutionRawHistoryV2Response, error) GetWorkflowExecutionRawHistory(context.Context, *GetWorkflowExecutionRawHistoryRequest) (*GetWorkflowExecutionRawHistoryResponse, error) ForceDeleteWorkflowExecution(context.Context, *ForceDeleteWorkflowExecutionRequest) (*ForceDeleteWorkflowExecutionResponse, error) + DeleteExecution(context.Context, *DeleteExecutionRequest) (*DeleteExecutionResponse, error) GetDLQTasks(context.Context, *GetDLQTasksRequest) (*GetDLQTasksResponse, error) DeleteDLQTasks(context.Context, *DeleteDLQTasksRequest) (*DeleteDLQTasksResponse, error) ListQueues(context.Context, *ListQueuesRequest) (*ListQueuesResponse, error) @@ -1573,6 +1585,9 @@ func (UnimplementedHistoryServiceServer) GetWorkflowExecutionRawHistory(context. func (UnimplementedHistoryServiceServer) ForceDeleteWorkflowExecution(context.Context, *ForceDeleteWorkflowExecutionRequest) (*ForceDeleteWorkflowExecutionResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method ForceDeleteWorkflowExecution not implemented") } +func (UnimplementedHistoryServiceServer) DeleteExecution(context.Context, *DeleteExecutionRequest) (*DeleteExecutionResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method DeleteExecution not implemented") +} func (UnimplementedHistoryServiceServer) GetDLQTasks(context.Context, *GetDLQTasksRequest) (*GetDLQTasksResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method GetDLQTasks not implemented") } @@ -2692,6 +2707,24 @@ func _HistoryService_ForceDeleteWorkflowExecution_Handler(srv interface{}, ctx c return interceptor(ctx, in, info, handler) } +func _HistoryService_DeleteExecution_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(DeleteExecutionRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(HistoryServiceServer).DeleteExecution(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: HistoryService_DeleteExecution_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(HistoryServiceServer).DeleteExecution(ctx, req.(*DeleteExecutionRequest)) + } + return interceptor(ctx, in, info, handler) +} + func _HistoryService_GetDLQTasks_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { in := new(GetDLQTasksRequest) if err := dec(in); err != nil { @@ -3251,6 +3284,10 @@ var HistoryService_ServiceDesc = grpc.ServiceDesc{ MethodName: "ForceDeleteWorkflowExecution", Handler: _HistoryService_ForceDeleteWorkflowExecution_Handler, }, + { + MethodName: "DeleteExecution", + Handler: _HistoryService_DeleteExecution_Handler, + }, { MethodName: "GetDLQTasks", Handler: _HistoryService_GetDLQTasks_Handler, diff --git a/api/historyservicemock/v1/service_grpc.pb.mock.go b/api/historyservicemock/v1/service_grpc.pb.mock.go index 581b7f844bf..bf216105dbd 100644 --- a/api/historyservicemock/v1/service_grpc.pb.mock.go +++ b/api/historyservicemock/v1/service_grpc.pb.mock.go @@ -183,6 +183,26 @@ func (mr *MockHistoryServiceClientMockRecorder) DeleteDLQTasks(ctx, in any, opts return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteDLQTasks", reflect.TypeOf((*MockHistoryServiceClient)(nil).DeleteDLQTasks), varargs...) } +// DeleteExecution mocks base method. +func (m *MockHistoryServiceClient) DeleteExecution(ctx context.Context, in *historyservice.DeleteExecutionRequest, opts ...grpc.CallOption) (*historyservice.DeleteExecutionResponse, error) { + m.ctrl.T.Helper() + varargs := []any{ctx, in} + for _, a := range opts { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "DeleteExecution", varargs...) + ret0, _ := ret[0].(*historyservice.DeleteExecutionResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DeleteExecution indicates an expected call of DeleteExecution. +func (mr *MockHistoryServiceClientMockRecorder) DeleteExecution(ctx, in any, opts ...any) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]any{ctx, in}, opts...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteExecution", reflect.TypeOf((*MockHistoryServiceClient)(nil).DeleteExecution), varargs...) +} + // DeleteWorkflowExecution mocks base method. func (m *MockHistoryServiceClient) DeleteWorkflowExecution(ctx context.Context, in *historyservice.DeleteWorkflowExecutionRequest, opts ...grpc.CallOption) (*historyservice.DeleteWorkflowExecutionResponse, error) { m.ctrl.T.Helper() @@ -1830,6 +1850,21 @@ func (mr *MockHistoryServiceServerMockRecorder) DeleteDLQTasks(arg0, arg1 any) * return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteDLQTasks", reflect.TypeOf((*MockHistoryServiceServer)(nil).DeleteDLQTasks), arg0, arg1) } +// DeleteExecution mocks base method. +func (m *MockHistoryServiceServer) DeleteExecution(arg0 context.Context, arg1 *historyservice.DeleteExecutionRequest) (*historyservice.DeleteExecutionResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteExecution", arg0, arg1) + ret0, _ := ret[0].(*historyservice.DeleteExecutionResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DeleteExecution indicates an expected call of DeleteExecution. +func (mr *MockHistoryServiceServerMockRecorder) DeleteExecution(arg0, arg1 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteExecution", reflect.TypeOf((*MockHistoryServiceServer)(nil).DeleteExecution), arg0, arg1) +} + // DeleteWorkflowExecution mocks base method. func (m *MockHistoryServiceServer) DeleteWorkflowExecution(arg0 context.Context, arg1 *historyservice.DeleteWorkflowExecutionRequest) (*historyservice.DeleteWorkflowExecutionResponse, error) { m.ctrl.T.Helper() diff --git a/client/history/client_gen.go b/client/history/client_gen.go index dc631c3c52a..494b6135e9b 100644 --- a/client/history/client_gen.go +++ b/client/history/client_gen.go @@ -135,6 +135,26 @@ func (c *clientImpl) DeleteDLQTasks( return response, nil } +func (c *clientImpl) DeleteExecution( + ctx context.Context, + request *historyservice.DeleteExecutionRequest, + opts ...grpc.CallOption, +) (*historyservice.DeleteExecutionResponse, error) { + shardID := c.shardIDFromWorkflowID(request.GetNamespaceId(), request.GetExecution().GetWorkflowId()) + var response *historyservice.DeleteExecutionResponse + op := func(ctx context.Context, client historyservice.HistoryServiceClient) error { + var err error + ctx, cancel := c.createContext(ctx) + defer cancel() + response, err = client.DeleteExecution(ctx, request, opts...) + return err + } + if err := c.executeWithRedirect(ctx, shardID, op); err != nil { + return nil, err + } + return response, nil +} + func (c *clientImpl) DeleteWorkflowExecution( ctx context.Context, request *historyservice.DeleteWorkflowExecutionRequest, diff --git a/client/history/metric_client_gen.go b/client/history/metric_client_gen.go index bbfe7511d8a..767aaf576c7 100644 --- a/client/history/metric_client_gen.go +++ b/client/history/metric_client_gen.go @@ -93,6 +93,20 @@ func (c *metricClient) DeleteDLQTasks( return c.client.DeleteDLQTasks(ctx, request, opts...) } +func (c *metricClient) DeleteExecution( + ctx context.Context, + request *historyservice.DeleteExecutionRequest, + opts ...grpc.CallOption, +) (_ *historyservice.DeleteExecutionResponse, retError error) { + + metricsHandler, startTime := c.startMetricsRecording(ctx, "HistoryClientDeleteExecution") + defer func() { + c.finishMetricsRecording(metricsHandler, startTime, retError) + }() + + return c.client.DeleteExecution(ctx, request, opts...) +} + func (c *metricClient) DeleteWorkflowExecution( ctx context.Context, request *historyservice.DeleteWorkflowExecutionRequest, diff --git a/client/history/retryable_client_gen.go b/client/history/retryable_client_gen.go index 2582a04f0a6..7a608e08a10 100644 --- a/client/history/retryable_client_gen.go +++ b/client/history/retryable_client_gen.go @@ -116,6 +116,21 @@ func (c *retryableClient) DeleteDLQTasks( return resp, err } +func (c *retryableClient) DeleteExecution( + ctx context.Context, + request *historyservice.DeleteExecutionRequest, + opts ...grpc.CallOption, +) (*historyservice.DeleteExecutionResponse, error) { + var resp *historyservice.DeleteExecutionResponse + op := func(ctx context.Context) error { + var err error + resp, err = c.client.DeleteExecution(ctx, request, opts...) + return err + } + err := backoff.ThrottleRetryContext(ctx, op, c.policy, c.isRetryable) + return resp, err +} + func (c *retryableClient) DeleteWorkflowExecution( ctx context.Context, request *historyservice.DeleteWorkflowExecutionRequest, diff --git a/common/dynamicconfig/constants.go b/common/dynamicconfig/constants.go index 30bc93c04a2..267f5e1fd5a 100644 --- a/common/dynamicconfig/constants.go +++ b/common/dynamicconfig/constants.go @@ -1079,6 +1079,15 @@ to allow waiting on the "Accepted" lifecycle stage.`, `FrontendEnableWorkerVersioningRuleAPIs enables worker versioning in workflow progress APIs.`, ) + DeleteNamespaceUseChasmDeleteExecution = NewGlobalBoolSetting( + "frontend.deleteNamespaceUseChasmDeleteExecution", + false, + `DeleteNamespaceUseChasmDeleteExecution controls whether the delete namespace workflow uses the +DeleteExecution history service API (CHASM engine path) for non-workflow CHASM executions, instead +of ForceDeleteWorkflowExecution. Only enable after all history and worker services have been upgraded +to a version that supports the DeleteExecution API.`, + ) + DeleteNamespaceDeleteActivityRPS = NewGlobalIntSetting( "frontend.deleteNamespaceDeleteActivityRPS", 100, diff --git a/common/rpc/interceptor/logtags/history_service_server_gen.go b/common/rpc/interceptor/logtags/history_service_server_gen.go index 06b436d6d0e..e79170db9cf 100644 --- a/common/rpc/interceptor/logtags/history_service_server_gen.go +++ b/common/rpc/interceptor/logtags/history_service_server_gen.go @@ -45,6 +45,13 @@ func (wt *WorkflowTags) extractFromHistoryServiceServerMessage(message any) []ta return nil case *historyservice.DeleteDLQTasksResponse: return nil + case *historyservice.DeleteExecutionRequest: + return []tag.Tag{ + tag.WorkflowID(r.GetExecution().GetWorkflowId()), + tag.WorkflowRunID(r.GetExecution().GetRunId()), + } + case *historyservice.DeleteExecutionResponse: + return nil case *historyservice.DeleteWorkflowExecutionRequest: return []tag.Tag{ tag.WorkflowID(r.GetWorkflowExecution().GetWorkflowId()), diff --git a/proto/internal/temporal/server/api/historyservice/v1/request_response.proto b/proto/internal/temporal/server/api/historyservice/v1/request_response.proto index 7269a7ca33b..ba91e5a008f 100644 --- a/proto/internal/temporal/server/api/historyservice/v1/request_response.proto +++ b/proto/internal/temporal/server/api/historyservice/v1/request_response.proto @@ -1120,6 +1120,19 @@ message ForceDeleteWorkflowExecutionResponse { temporal.server.api.adminservice.v1.DeleteWorkflowExecutionResponse response = 1; } +message DeleteExecutionRequest { + option (routing).workflow_id = "execution.workflow_id"; + + string namespace_id = 1; + temporal.api.common.v1.WorkflowExecution execution = 2; + // (-- api-linter: core::0141::forbidden-types=disabled --) + uint32 archetype_id = 3; + string reason = 4; + string identity = 5; +} + +message DeleteExecutionResponse {} + message GetDLQTasksRequest { option (routing).any_host = true; diff --git a/proto/internal/temporal/server/api/historyservice/v1/service.proto b/proto/internal/temporal/server/api/historyservice/v1/service.proto index 4dcd9ce0779..dd27cbd34d6 100644 --- a/proto/internal/temporal/server/api/historyservice/v1/service.proto +++ b/proto/internal/temporal/server/api/historyservice/v1/service.proto @@ -371,6 +371,10 @@ service HistoryService { option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; } + rpc DeleteExecution(DeleteExecutionRequest) returns (DeleteExecutionResponse) { + option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_STANDARD; + } + rpc GetDLQTasks(GetDLQTasksRequest) returns (GetDLQTasksResponse) { option (temporal.server.api.common.v1.api_category).category = API_CATEGORY_SYSTEM; } diff --git a/service/history/api/deleteexecution/api.go b/service/history/api/deleteexecution/api.go new file mode 100644 index 00000000000..89284763dfc --- /dev/null +++ b/service/history/api/deleteexecution/api.go @@ -0,0 +1,30 @@ +package deleteexecution + +import ( + "context" + + "go.temporal.io/server/api/historyservice/v1" + "go.temporal.io/server/chasm" +) + +func Invoke( + ctx context.Context, + chasmEngine chasm.Engine, + request *historyservice.DeleteExecutionRequest, +) (*historyservice.DeleteExecutionResponse, error) { + key := chasm.ExecutionKey{ + NamespaceID: request.GetNamespaceId(), + BusinessID: request.GetExecution().GetWorkflowId(), + RunID: request.GetExecution().GetRunId(), + } + ref := chasm.NewComponentRefByArchetypeID(key, request.GetArchetypeId()) + if err := chasmEngine.DeleteExecution(ctx, ref, chasm.DeleteExecutionRequest{ + TerminateComponentRequest: chasm.TerminateComponentRequest{ + Reason: request.GetReason(), + Identity: request.GetIdentity(), + }, + }); err != nil { + return nil, err + } + return &historyservice.DeleteExecutionResponse{}, nil +} diff --git a/service/history/handler.go b/service/history/handler.go index 2248f7c6a12..41c36f1e2a3 100644 --- a/service/history/handler.go +++ b/service/history/handler.go @@ -54,6 +54,7 @@ import ( "go.temporal.io/server/components/nexusoperations" "go.temporal.io/server/service/history/api" "go.temporal.io/server/service/history/api/deletedlqtasks" + "go.temporal.io/server/service/history/api/deleteexecution" "go.temporal.io/server/service/history/api/forcedeleteworkflowexecution" "go.temporal.io/server/service/history/api/getdlqtasks" "go.temporal.io/server/service/history/api/listqueues" @@ -2019,6 +2020,21 @@ func (h *Handler) ForceDeleteWorkflowExecution( ) } +func (h *Handler) DeleteExecution( + ctx context.Context, + request *historyservice.DeleteExecutionRequest, +) (*historyservice.DeleteExecutionResponse, error) { + namespaceID := namespace.ID(request.GetNamespaceId()) + if err := api.ValidateNamespaceUUID(namespaceID); err != nil { + return nil, err + } + h.logger.Info("DeleteExecution requested", + tag.WorkflowNamespaceID(request.GetNamespaceId()), + tag.WorkflowID(request.GetExecution().GetWorkflowId()), + tag.WorkflowRunID(request.GetExecution().GetRunId())) + return deleteexecution.Invoke(ctx, h.chasmEngine, request) +} + func (h *Handler) GetDLQTasks( ctx context.Context, request *historyservice.GetDLQTasksRequest, diff --git a/service/worker/deletenamespace/deleteexecutions/activities.go b/service/worker/deletenamespace/deleteexecutions/activities.go index 6cfc666b352..b49d9a30a14 100644 --- a/service/worker/deletenamespace/deleteexecutions/activities.go +++ b/service/worker/deletenamespace/deleteexecutions/activities.go @@ -28,7 +28,8 @@ type ( visibilityManager manager.VisibilityManager historyClient historyservice.HistoryServiceClient - deleteActivityRPS dynamicconfig.TypedSubscribable[int] + deleteActivityRPS dynamicconfig.TypedSubscribable[int] + useChasmDeleteExecution dynamicconfig.BoolPropertyFn metricsHandler metrics.Handler logger log.Logger @@ -67,15 +68,17 @@ func NewActivities( visibilityManager manager.VisibilityManager, historyClient historyservice.HistoryServiceClient, deleteActivityRPS dynamicconfig.TypedSubscribable[int], + useChasmDeleteExecution dynamicconfig.BoolPropertyFn, metricsHandler metrics.Handler, logger log.Logger, ) *Activities { return &Activities{ - visibilityManager: visibilityManager, - historyClient: historyClient, - deleteActivityRPS: deleteActivityRPS, - metricsHandler: metricsHandler, - logger: logger, + visibilityManager: visibilityManager, + historyClient: historyClient, + deleteActivityRPS: deleteActivityRPS, + useChasmDeleteExecution: useChasmDeleteExecution, + metricsHandler: metricsHandler, + logger: logger, } } @@ -195,17 +198,18 @@ func (a *Activities) DeleteExecutionsActivity(ctx context.Context, params Delete NamespaceId: params.NamespaceID.String(), WorkflowExecution: execution.Execution, }) + } else if a.useChasmDeleteExecution() { + _, err = a.historyClient.DeleteExecution(ctx, &historyservice.DeleteExecutionRequest{ + NamespaceId: params.NamespaceID.String(), + Execution: execution.Execution, + ArchetypeId: archetypeID, + Reason: "Namespace delete", + }) } else { - // NOTE: ForceDeleteWorkflowExecution is NOT design as a API to be consumed programmatically, - // and only performs best effort deletion on execution histories. - // It works for CHASM now as CHASM executions don't have any history events, so as long as this API, - // returns nil error, it means we have successfully deleted the mutable state and visibility records. _, err = a.historyClient.ForceDeleteWorkflowExecution(ctx, &historyservice.ForceDeleteWorkflowExecutionRequest{ NamespaceId: params.NamespaceID.String(), - ArchetypeId: archetypeID, + ArchetypeId: uint32(archetypeID), Request: &adminservice.DeleteWorkflowExecutionRequest{ - // Namespace and Archetype fields are not required since we are calling history - // service directly. Execution: execution.Execution, }, }) diff --git a/service/worker/deletenamespace/deleteexecutions/workflow_test.go b/service/worker/deletenamespace/deleteexecutions/workflow_test.go index 9c72e3e2826..5b50e0d5d5f 100644 --- a/service/worker/deletenamespace/deleteexecutions/workflow_test.go +++ b/service/worker/deletenamespace/deleteexecutions/workflow_test.go @@ -18,7 +18,6 @@ import ( "go.temporal.io/sdk/temporal" "go.temporal.io/sdk/testsuite" "go.temporal.io/sdk/workflow" - "go.temporal.io/server/api/adminservice/v1" "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/api/historyservicemock/v1" "go.temporal.io/server/common/log" @@ -94,8 +93,9 @@ func Test_DeleteExecutionsWorkflow_NoActivityMocks_NoExecutions(t *testing.T) { deleteActivityRPS: func(callback func(int)) (v int, cancel func()) { return 100, func() {} }, - metricsHandler: nil, - logger: nil, + useChasmDeleteExecution: func() bool { return false }, + metricsHandler: nil, + logger: nil, } la := &LocalActivities{ visibilityManager: visibilityManager, @@ -318,8 +318,9 @@ func Test_DeleteExecutionsWorkflow_NoActivityMocks_ManyExecutions(t *testing.T) deleteActivityRPS: func(callback func(int)) (v int, cancel func()) { return 100, func() {} }, - metricsHandler: metrics.NoopMetricsHandler, - logger: log.NewTestLogger(), + useChasmDeleteExecution: func() bool { return false }, + metricsHandler: metrics.NoopMetricsHandler, + logger: log.NewTestLogger(), } la := &LocalActivities{ visibilityManager: visibilityManager, @@ -395,19 +396,17 @@ func Test_DeleteExecutionsWorkflow_NoActivityMocks_ChasmExecutions(t *testing.T) }, nil).Times(2) historyClient := historyservicemock.NewMockHistoryServiceClient(ctrl) - historyClient.EXPECT().ForceDeleteWorkflowExecution(gomock.Any(), &historyservice.ForceDeleteWorkflowExecutionRequest{ + historyClient.EXPECT().DeleteExecution(gomock.Any(), &historyservice.DeleteExecutionRequest{ NamespaceId: "namespace-id", + Execution: execution1, ArchetypeId: uint32(archetypeID1), - Request: &adminservice.DeleteWorkflowExecutionRequest{ - Execution: execution1, - }, + Reason: "Namespace delete", }).Return(nil, nil).Times(1) - historyClient.EXPECT().ForceDeleteWorkflowExecution(gomock.Any(), &historyservice.ForceDeleteWorkflowExecutionRequest{ + historyClient.EXPECT().DeleteExecution(gomock.Any(), &historyservice.DeleteExecutionRequest{ NamespaceId: "namespace-id", + Execution: execution2, ArchetypeId: uint32(archetypeID2), - Request: &adminservice.DeleteWorkflowExecutionRequest{ - Execution: execution2, - }, + Reason: "Namespace delete", }).Return(nil, nil).Times(1) a := &Activities{ @@ -416,8 +415,9 @@ func Test_DeleteExecutionsWorkflow_NoActivityMocks_ChasmExecutions(t *testing.T) deleteActivityRPS: func(callback func(int)) (v int, cancel func()) { return 100, func() {} }, - metricsHandler: metrics.NoopMetricsHandler, - logger: log.NewTestLogger(), + useChasmDeleteExecution: func() bool { return true }, + metricsHandler: metrics.NoopMetricsHandler, + logger: log.NewTestLogger(), } la := &LocalActivities{ visibilityManager: visibilityManager, @@ -515,8 +515,9 @@ func Test_DeleteExecutionsWorkflow_NoActivityMocks_HistoryClientError(t *testing deleteActivityRPS: func(callback func(int)) (v int, cancel func()) { return 100, func() {} }, - metricsHandler: metrics.NoopMetricsHandler, - logger: log.NewTestLogger(), + useChasmDeleteExecution: func() bool { return false }, + metricsHandler: metrics.NoopMetricsHandler, + logger: log.NewTestLogger(), } la := &LocalActivities{ visibilityManager: visibilityManager, diff --git a/service/worker/deletenamespace/fx.go b/service/worker/deletenamespace/fx.go index 122aca21447..1275c5bf7da 100644 --- a/service/worker/deletenamespace/fx.go +++ b/service/worker/deletenamespace/fx.go @@ -36,6 +36,7 @@ type ( allowDeleteNamespaceIfNexusEndpointTarget dynamicconfig.BoolPropertyFn nexusEndpointListDefaultPageSize dynamicconfig.IntPropertyFn deleteActivityRPS dynamicconfig.TypedSubscribable[int] + useChasmDeleteExecution dynamicconfig.BoolPropertyFn namespaceCacheRefreshInterval dynamicconfig.DurationPropertyFn } componentParams struct { @@ -69,6 +70,7 @@ func newComponent( allowDeleteNamespaceIfNexusEndpointTarget: dynamicconfig.AllowDeleteNamespaceIfNexusEndpointTarget.Get(params.DynamicCollection), nexusEndpointListDefaultPageSize: dynamicconfig.NexusEndpointListDefaultPageSize.Get(params.DynamicCollection), deleteActivityRPS: dynamicconfig.DeleteNamespaceDeleteActivityRPS.Subscribe(params.DynamicCollection), + useChasmDeleteExecution: dynamicconfig.DeleteNamespaceUseChasmDeleteExecution.Get(params.DynamicCollection), namespaceCacheRefreshInterval: dynamicconfig.NamespaceCacheRefreshInterval.Get(params.DynamicCollection), } } @@ -131,6 +133,7 @@ func (wc *deleteNamespaceComponent) deleteExecutionsActivities() *deleteexecutio wc.visibilityManager, wc.historyClient, wc.deleteActivityRPS, + wc.useChasmDeleteExecution, wc.metricsHandler, wc.logger, ) diff --git a/tests/chasm_test.go b/tests/chasm_test.go index 7d68ab6bf72..e4c973dbad4 100644 --- a/tests/chasm_test.go +++ b/tests/chasm_test.go @@ -3,6 +3,7 @@ package tests import ( "context" "crypto/rand" + "errors" "fmt" "strconv" "testing" @@ -12,6 +13,8 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" commonpb "go.temporal.io/api/common/v1" + enumspb "go.temporal.io/api/enums/v1" + "go.temporal.io/api/operatorservice/v1" "go.temporal.io/api/serviceerror" workflowpb "go.temporal.io/api/workflow/v1" "go.temporal.io/api/workflowservice/v1" @@ -21,10 +24,13 @@ import ( "go.temporal.io/server/chasm/lib/tests/gen/testspb/v1" "go.temporal.io/server/common/debug" "go.temporal.io/server/common/dynamicconfig" + "go.temporal.io/server/common/namespace" "go.temporal.io/server/common/payload" "go.temporal.io/server/common/searchattribute/sadefs" + "go.temporal.io/server/common/testing/await" "go.temporal.io/server/common/testing/testvars" "go.temporal.io/server/tests/testcore" + "google.golang.org/protobuf/types/known/durationpb" ) const ( @@ -53,8 +59,9 @@ func TestChasmTestSuite(t *testing.T) { func (s *ChasmTestSuite) SetupSuite() { s.FunctionalTestBase.SetupSuiteWithCluster( testcore.WithDynamicConfigOverrides(map[dynamicconfig.Key]any{ - dynamicconfig.EnableChasm.Key(): true, - dynamicconfig.VisibilityEnableUnifiedQueryConverter.Key(): s.enableUnifiedQueryConverter, + dynamicconfig.EnableChasm.Key(): true, + dynamicconfig.VisibilityEnableUnifiedQueryConverter.Key(): s.enableUnifiedQueryConverter, + dynamicconfig.DeleteNamespaceUseChasmDeleteExecution.Key(): true, }), ) @@ -1000,4 +1007,71 @@ func (s *ChasmTestSuite) TestPayloadStore_ApproximateExecutionSize() { s.InDelta(adminDescResp.DatabaseMutableState.Size(), currentApproxSize, sizeDelta) } +// TestNamespaceDelete_WithChasmExecutions verifies that running CHASM executions are cleaned +// up when their namespace is deleted, exercising the DeleteExecution history service API. +func (s *ChasmTestSuite) TestNamespaceDelete_WithChasmExecutions() { + tv := testvars.New(s.T()) + + // Register a fresh namespace for this test. + namespaceName := "ns-chasm-delete-" + tv.Any().String()[:8] + _, err := s.FrontendClient().RegisterNamespace(testcore.NewContext(), &workflowservice.RegisterNamespaceRequest{ + Namespace: namespaceName, + WorkflowExecutionRetentionPeriod: durationpb.New(24 * time.Hour), + HistoryArchivalState: enumspb.ARCHIVAL_STATE_DISABLED, + VisibilityArchivalState: enumspb.ARCHIVAL_STATE_DISABLED, + }) + s.NoError(err) + + descResp, err := s.FrontendClient().DescribeNamespace(testcore.NewContext(), &workflowservice.DescribeNamespaceRequest{ + Namespace: namespaceName, + }) + s.NoError(err) + nsID := namespace.ID(descResp.GetNamespaceInfo().GetId()) + + // Create running CHASM executions in the new namespace. + const numExecutions = 3 + for range numExecutions { + _, err = tests.NewPayloadStoreHandler(s.chasmContext, tests.NewPayloadStoreRequest{ + NamespaceID: nsID, + StoreID: tv.Any().String(), + IDReusePolicy: chasm.BusinessIDReusePolicyRejectDuplicate, + IDConflictPolicy: chasm.BusinessIDConflictPolicyFail, + }) + s.NoError(err) + } + + // Wait for visibility records to appear. + visQuery := fmt.Sprintf("TemporalNamespaceDivision = '%d'", tests.ArchetypeID) + await.Require(testcore.NewContext(), s.T(), func(t *await.T) { + resp, err := s.FrontendClient().ListWorkflowExecutions(t.Context(), &workflowservice.ListWorkflowExecutionsRequest{ + Namespace: namespaceName, + PageSize: 10, + Query: visQuery, + }) + require.NoError(t, err) + require.Len(t, resp.Executions, numExecutions) + }, testcore.WaitForESToSettle, 100*time.Millisecond) + + // Delete the namespace, which should trigger DeleteExecution for all CHASM executions. + _, err = s.OperatorClient().DeleteNamespace(testcore.NewContext(), &operatorservice.DeleteNamespaceRequest{ + Namespace: namespaceName, + }) + s.NoError(err) + + // Verify all CHASM executions are cleaned up from visibility. + await.Require(testcore.NewContext(), s.T(), func(t *await.T) { + resp, err := s.FrontendClient().ListWorkflowExecutions(t.Context(), &workflowservice.ListWorkflowExecutionsRequest{ + Namespace: namespaceName, + PageSize: 10, + Query: visQuery, + }) + var notFound *serviceerror.NamespaceNotFound + if errors.As(err, ¬Found) { + return // namespace fully deleted is also acceptable + } + require.NoError(t, err) + require.Empty(t, resp.Executions) + }, 20*time.Second*debug.TimeoutMultiplier, time.Second) +} + // TODO: More tests here... From dfbdd298c9a753c37952e9b662e4da30f05f266e Mon Sep 17 00:00:00 2001 From: Roey Berman Date: Tue, 19 May 2026 09:15:35 -0700 Subject: [PATCH 56/73] Emit matching_timeout outcome for Nexus matching dispatch errors (#10325) When the matching client returns an error from DispatchNexusTask in StartOperation and CancelOperation, tag the metrics with outcome=matching_timeout instead of falling through to the default internal_error outcome. This lets us distinguish failures originating from the matching dispatch (typically transient timeouts) from other internal errors, which are considered more severe. --- service/frontend/nexus_handler.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/service/frontend/nexus_handler.go b/service/frontend/nexus_handler.go index c35b3f72f39..98710d504ba 100644 --- a/service/frontend/nexus_handler.go +++ b/service/frontend/nexus_handler.go @@ -448,6 +448,7 @@ func (h *nexusHandler) StartOperation( // RPC. response, err := h.matchingClient.DispatchNexusTask(ctx, request) if err != nil { + oc.metricsHandler = oc.metricsHandler.WithTags(metrics.OutcomeTag("matching_timeout")) oc.logger.Error("received error from matching service for Nexus StartOperation request", tag.Error(err)) return nil, commonnexus.ConvertGRPCError(err, false) } @@ -665,6 +666,7 @@ func (h *nexusHandler) CancelOperation(ctx context.Context, service, operation, // RPC. response, err := h.matchingClient.DispatchNexusTask(ctx, request) if err != nil { + oc.metricsHandler = oc.metricsHandler.WithTags(metrics.OutcomeTag("matching_timeout")) oc.logger.Error("received error from matching service for Nexus CancelOperation request", tag.Error(err)) return commonnexus.ConvertGRPCError(err, false) } From d8a11039924b065fb3b782164ed6b2017bd3fa50 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 19 May 2026 09:55:21 -0700 Subject: [PATCH 57/73] Block usage of internal worker controller per-namespace task queue (#10289) ## What changed? Add `WorkerControllerPerNSWorkerTaskQueue` ("temporal-sys-worker-controller-per-ns-tq") to the set of internal per-namespace task queues and treat it the same as `PerNSWorkerTaskQueue` in `IsInternalPerNsTaskQueue`, so user workflows cannot start workflows, schedule activities, start child workflows, continue-as-new, or update activity options targeting it. ## Why? The Worker Controller internal task queue has the same security concerns as the shared one, so treating it the same seems appropriate. ## How did you test it? - [X] built - [X] run locally and tested manually - [X] covered by existing tests - [X] added new unit test(s) - [X] added new functional test(s) --- chasm/lib/activity/validator_test.go | 4 ++-- common/primitives/task_queues.go | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/chasm/lib/activity/validator_test.go b/chasm/lib/activity/validator_test.go index 7ebc7415223..d0781d5ac2a 100644 --- a/chasm/lib/activity/validator_test.go +++ b/chasm/lib/activity/validator_test.go @@ -313,7 +313,7 @@ func TestStandaloneActivityTaskQueueValidations(t *testing.T) { } func TestEmbeddedActivityTaskQueueValidations(t *testing.T) { - t.Run("Allow PerNSWorkerTaskQueue TaskQueue", func(t *testing.T) { + t.Run("Allow PerNSWorkerTaskQueue TaskQueue on the same TaskQueue", func(t *testing.T) { options := &activitypb.ActivityOptions{ TaskQueue: &taskqueuepb.TaskQueue{Name: primitives.PerNSWorkerTaskQueue}, ScheduleToCloseTimeout: durationpb.New(10 * time.Second), @@ -332,7 +332,7 @@ func TestEmbeddedActivityTaskQueueValidations(t *testing.T) { require.NoError(t, err) }) - t.Run("Disallow PerNSWorkerTaskQueue TaskQueue", func(t *testing.T) { + t.Run("Disallow PerNSWorkerTaskQueue TaskQueue from non-internal TaskQueue", func(t *testing.T) { options := &activitypb.ActivityOptions{ TaskQueue: &taskqueuepb.TaskQueue{Name: primitives.PerNSWorkerTaskQueue}, ScheduleToCloseTimeout: durationpb.New(10 * time.Second), diff --git a/common/primitives/task_queues.go b/common/primitives/task_queues.go index 9b88f9733de..0c6c049f11a 100644 --- a/common/primitives/task_queues.go +++ b/common/primitives/task_queues.go @@ -10,8 +10,11 @@ import ( // all internal task queues shall be defined here such that we enhance security on top of them const ( - DefaultWorkerTaskQueue = "default-worker-tq" - PerNSWorkerTaskQueue = "temporal-sys-per-ns-tq" + DefaultWorkerTaskQueue = "default-worker-tq" + PerNSWorkerTaskQueue = "temporal-sys-per-ns-tq" + WorkerControllerPerNSWorkerTaskQueue = "temporal-sys-worker-controller-per-ns-tq" + internalTaskQueuePrefix = "temporal-sys-" + internalTaskQueuePerNSPrefix = "temporal-sys-per-ns-" MigrationActivityTQ = "temporal-sys-migration-activity-tq" AddSearchAttributesActivityTQ = "temporal-sys-add-search-attributes-activity-tq" @@ -34,15 +37,15 @@ func IsInternalTaskQueueKind(kind enumspb.TaskQueueKind) bool { return false } -const internalTaskQueuePrefix = "temporal-sys-" - // IsInternalTaskQueue returns true if the task queue name belongs to an internal system task queue. func IsInternalTaskQueue(taskQueue string) bool { return strings.HasPrefix(taskQueue, internalTaskQueuePrefix) } +// IsInternalPerNsTaskQueue returns true if the task queue name belongs to a per-namespace internal system worker func IsInternalPerNsTaskQueue(taskQueue string) bool { - return taskQueue == PerNSWorkerTaskQueue + // TODO: remove WorkerControllerPerNSWorkerTaskQueue once it has been updated to match the prefix + return strings.HasPrefix(taskQueue, internalTaskQueuePerNSPrefix) || taskQueue == WorkerControllerPerNSWorkerTaskQueue } // CheckInternalPerNsTaskQueueAllowed tries to block the usage of internal per-namespace task queue for illegal cases. From 7ddfc53e35e1b1d03d7bcc57b831ed8d8e8b1157 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Tue, 19 May 2026 12:31:14 -0700 Subject: [PATCH 58/73] Add test cluster DC redirection policy option (#10328) ## What changed? Pass custom `DCRedirectionPolicy` directly instead of going through `WithFxOptionsForService`. ## Why? We want to eliminate `WithFxOptionsForService` as it is blocking us from migrating away from the `onebox.go` approach (which duplicates the fx setup) since we don't want to expose an equivalent method in `temporal/fx.go`. --- tests/testcore/functional_test_base.go | 8 ++++++++ tests/testcore/onebox.go | 11 ++++++++--- tests/testcore/test_cluster.go | 2 ++ tests/xdc/failover_test.go | 7 +------ 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/tests/testcore/functional_test_base.go b/tests/testcore/functional_test_base.go index 7d6832dc500..cde853344f6 100644 --- a/tests/testcore/functional_test_base.go +++ b/tests/testcore/functional_test_base.go @@ -95,6 +95,7 @@ type ( // TestClusterParams contains the variables which are used to configure test cluster via the TestClusterOption type. TestClusterParams struct { ServiceOptions map[primitives.ServiceName][]fx.Option + DCRedirectionPolicy config.DCRedirectionPolicy DynamicConfigOverrides map[dynamicconfig.Key]any ArchivalEnabled bool EnableMTLS bool @@ -131,6 +132,12 @@ func WithFxOptionsForService(serviceName primitives.ServiceName, options ...fx.O } } +func WithDCRedirectionPolicy(policy config.DCRedirectionPolicy) TestClusterOption { + return func(params *TestClusterParams) { + params.DCRedirectionPolicy = policy + } +} + func WithDynamicConfigOverrides(overrides map[dynamicconfig.Key]any) TestClusterOption { return func(params *TestClusterParams) { if params.DynamicConfigOverrides == nil { @@ -284,6 +291,7 @@ func (s *FunctionalTestBase) setupCluster(options ...TestClusterOption) { HistoryConfig: HistoryConfig{ NumHistoryShards: cmp.Or(params.NumHistoryShards, 4), }, + DCRedirectionPolicy: params.DCRedirectionPolicy, DynamicConfigOverrides: params.DynamicConfigOverrides, ServiceFxOptions: params.ServiceOptions, EnableMetricsCapture: true, diff --git a/tests/testcore/onebox.go b/tests/testcore/onebox.go index 68f23f03eec..3c591db3374 100644 --- a/tests/testcore/onebox.go +++ b/tests/testcore/onebox.go @@ -109,6 +109,7 @@ type ( esClient esclient.Client mockAdminClient map[string]adminservice.AdminServiceClient namespaceReplicationTaskExecutor nsreplication.TaskExecutor + dcRedirectionPolicy config.DCRedirectionPolicy tlsConfigProvider *encryption.FixedTLSConfigProvider captureMetricsHandler *metricstest.CaptureHandler hostsByProtocolByService map[transferProtocol]map[primitives.ServiceName]static.Hosts @@ -171,6 +172,7 @@ type ( ESClient esclient.Client MockAdminClient map[string]adminservice.AdminServiceClient NamespaceReplicationTaskExecutor nsreplication.TaskExecutor + DCRedirectionPolicy config.DCRedirectionPolicy DynamicConfigOverrides map[dynamicconfig.Key]any TLSConfigProvider *encryption.FixedTLSConfigProvider CaptureMetricsHandler *metricstest.CaptureHandler @@ -216,6 +218,7 @@ func newTemporal(t *testing.T, params *TemporalParams) *TemporalImpl { workerConfig: params.WorkerConfig, mockAdminClient: params.MockAdminClient, namespaceReplicationTaskExecutor: params.NamespaceReplicationTaskExecutor, + dcRedirectionPolicy: params.DCRedirectionPolicy, tlsConfigProvider: params.TLSConfigProvider, captureMetricsHandler: params.CaptureMetricsHandler, dcClient: dynamicconfig.NewMemoryClient(), @@ -378,7 +381,7 @@ func (c *TemporalImpl) startFrontend() { fx.Provide(c.frontendConfigProvider), fx.Provide(func() listenHostPort { return listenHostPort(host) }), fx.Provide(func() httpPort { return mustPortFromAddress(c.FrontendHTTPAddress()) }), - fx.Provide(func() config.DCRedirectionPolicy { return config.DCRedirectionPolicy{} }), + fx.Provide(func() config.DCRedirectionPolicy { return c.dcRedirectionPolicy }), fx.Provide(func() log.Logger { return logger }), fx.Provide(func() log.ThrottledLogger { return logger }), fx.Provide(func() resource.NamespaceLogger { return logger }), @@ -475,7 +478,7 @@ func (c *TemporalImpl) startHistory() { fx.Provide(c.GetMetricsHandler), fx.Provide(func() listenHostPort { return listenHostPort(host) }), fx.Provide(func() httpPort { return mustPortFromAddress(c.FrontendHTTPAddress()) }), - fx.Provide(func() config.DCRedirectionPolicy { return config.DCRedirectionPolicy{} }), + fx.Provide(func() config.DCRedirectionPolicy { return c.dcRedirectionPolicy }), fx.Provide(func() log.Logger { return logger }), fx.Provide(func() log.ThrottledLogger { return logger }), fx.Provide(c.newRPCFactory), @@ -626,7 +629,7 @@ func (c *TemporalImpl) startWorker() { fx.Provide(c.GetMetricsHandler), fx.Provide(func() listenHostPort { return listenHostPort(host) }), fx.Provide(func() httpPort { return mustPortFromAddress(c.FrontendHTTPAddress()) }), - fx.Provide(func() config.DCRedirectionPolicy { return config.DCRedirectionPolicy{} }), + fx.Provide(func() config.DCRedirectionPolicy { return c.dcRedirectionPolicy }), fx.Provide(func() log.Logger { return logger }), fx.Provide(func() log.ThrottledLogger { return logger }), fx.Provide(c.newRPCFactory), @@ -748,6 +751,7 @@ func (c *TemporalImpl) frontendConfigProvider() *config.Config { }, }, }, + DCRedirectionPolicy: c.dcRedirectionPolicy, ExporterConfig: telemetry.ExportConfig{ CustomExporters: c.spanExporters, }, @@ -761,6 +765,7 @@ func (c *TemporalImpl) configProvider(serviceName primitives.ServiceName) *confi RPC: config.RPC{}, }, }, + DCRedirectionPolicy: c.dcRedirectionPolicy, ExporterConfig: telemetry.ExportConfig{ CustomExporters: c.spanExporters, }, diff --git a/tests/testcore/test_cluster.go b/tests/testcore/test_cluster.go index 27850223e43..e1a903c8f48 100644 --- a/tests/testcore/test_cluster.go +++ b/tests/testcore/test_cluster.go @@ -85,6 +85,7 @@ type ( ESConfig *esclient.Config MockAdminClient map[string]adminservice.AdminServiceClient FaultInjection *config.FaultInjection + DCRedirectionPolicy config.DCRedirectionPolicy DynamicConfigOverrides map[dynamicconfig.Key]any EnableMTLS bool EnableMetricsCapture bool @@ -340,6 +341,7 @@ func newClusterWithPersistenceTestBaseFactory( WorkerConfig: clusterConfig.WorkerConfig, MockAdminClient: clusterConfig.MockAdminClient, NamespaceReplicationTaskExecutor: nsreplication.NewTaskExecutor(clusterConfig.ClusterMetadata.CurrentClusterName, testBase.MetadataManager, nsreplication.NewNoopDataMerger(), nsreplication.NewDefaultAdmitter(), logger), + DCRedirectionPolicy: clusterConfig.DCRedirectionPolicy, DynamicConfigOverrides: clusterConfig.DynamicConfigOverrides, TLSConfigProvider: tlsConfigProvider, ServiceFxOptions: clusterConfig.ServiceFxOptions, diff --git a/tests/xdc/failover_test.go b/tests/xdc/failover_test.go index 6af351a7d9c..72111e11f55 100644 --- a/tests/xdc/failover_test.go +++ b/tests/xdc/failover_test.go @@ -35,7 +35,6 @@ import ( "go.temporal.io/server/common/primitives" "go.temporal.io/server/service/worker/migration" "go.temporal.io/server/tests/testcore" - "go.uber.org/fx" "google.golang.org/protobuf/types/known/durationpb" ) @@ -2742,11 +2741,7 @@ func TestFuncClustersWithRedirectionTestSuite(t *testing.T) { func (s *FunctionalClustersWithRedirectionTestSuite) SetupSuite() { s.setupSuite( - testcore.WithFxOptionsForService(primitives.FrontendService, - fx.Decorate(func(_ config.DCRedirectionPolicy) config.DCRedirectionPolicy { - return config.DCRedirectionPolicy{Policy: "all-apis-forwarding"} - }), - ), + testcore.WithDCRedirectionPolicy(config.DCRedirectionPolicy{Policy: "all-apis-forwarding"}), ) } From e59d6da4911260e1d39a6352ed48b4b211b5a5a7 Mon Sep 17 00:00:00 2001 From: Yaniv Kaul Date: Tue, 19 May 2026 23:53:03 +0300 Subject: [PATCH 59/73] perf: cache tally metrics handler scopes and WithTags handlers to reduce allocations (#9620) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Cache `WithTags()` child handlers via `sync.Map` to eliminate repeated `tagsToMap()`, `scope.Tagged()`, and handler struct allocations on the hot path - Cache `scope.Tagged()` results per unique inline tag combination in `cachedTaggedScope()`, bounded to 1024 entries with graceful degradation - Normalize excluded tags before cache key computation so high-cardinality excluded values (e.g. `activityType`) share a single cache entry, preventing unbounded cache growth ## Design Two complementary caching layers in `tallyMetricsHandler`: 1. **`childCache`** (`sync.Map`): caches entire handler subtrees returned by `WithTags()`. On cache hit: zero allocations. 2. **`scopeCache`** (`sync.Map` + `atomic.Int64` size bound): caches `tally.Scope` objects returned by `scope.Tagged()` for inline tags passed to `Counter`/`Gauge`/`Timer`/`Histogram` `Record()` calls. Bounded to 1024 entries; beyond that, scopes are created but not cached. Both caches use `LoadOrStore` for safe concurrent access. Tag normalization via `normalizeTagsForCaching()` ensures excluded tag variants collapse to the same cache key. The normalization has a zero-alloc fast path when no tags need substitution. ## Allocation Reduction (pprof alloc_space, 5min ScyllaDB workload) ### Commit 1: WithTags handler cache | Metric | Before | After | Reduction | |--------|--------|-------|-----------| | WithTags cumulative | 1,930 MB | 316 MB | -83.6% | | Total server allocs | 18,030 MB | 16,481 MB | -8.6% | ### Commit 2: Scope cache for inline tags | Metric | Before | After | Reduction | |--------|--------|-------|-----------| | tagsToMap.func1 | 1,101 MB | 0 MB | -100% | | tally Subscope | 1,012 MB | 0 MB | -100% | | Total server allocs | 18,465 MB | 16,511 MB | -10.6% | ## Benchmark (omes throughput_stress, mc150, 5 min) Host networking, i7-1270P 4 cores/component, inter-run data resets: | Database | Baseline | After commit 1 | After commit 2 | |----------|----------|----------------|----------------| | Cassandra | 280 | 294 (+5.0%) | 270 (-3.6%) | | ScyllaDB | 290 | 296 (+2.1%) | 298 (+2.8%) | Note: Throughput variance at mc150 is ~5-10%. The allocation reduction is confirmed by pprof but throughput gains are within noise at this concurrency level. ## Testing - Unit tests for all 4 metric types (Counter, Gauge, Timer, Histogram) with inline tags - Concurrency tests with race detector (32 goroutines × 100 iterations) - Cache bound enforcement test - Exclude-tag normalization tests (merge, allowed values, zero-alloc fast path) - Independent per-handler scope cache verification - All existing tests continue to pass --- ## v2 — addressing review feedback All 6 review comments addressed. Rebased on `origin/main`, squashed into a single commit. ### Changes 1. **`tagsCacheKey`: Use `strings.Builder` with `Grow` pre-allocation** — replaced manual `[]byte` construction with `strings.Builder`. A sizing pass pre-computes the exact capacity via `Grow()` to avoid internal reallocation (1 alloc/op). 2. **`tagsCacheKey`: Remove single-tag special case, uniform `\x00` separator** — removed the `len(tags) == 1` branch. Every tag pair now unconditionally appends `\x00` after both key and value, making the format uniform and the code simpler. 3. **`normalizeTagsForCaching`: Use `slices.Clone`** — replaced `make([]Tag, len(tags))` + `copy(tags[:i])` with `slices.Clone(tags)`, which copies the entire slice upfront. This eliminates the `if normalized != nil { normalized[i] = t }` guard for unchanged tags after the clone point. 4. **Extract shared `normalizeTag` function** — the exclude-tag check was duplicated between `normalizeTagsForCaching` and the `convert` closure in `tagsToMap`. Extracted `normalizeTag(tag Tag, excl excludeTags) (Tag, bool)` used by both, removing the duplication. 5. **Bound `childCache` to `scopeCacheMaxSize`** — `childCache` (used by `WithTags`) was previously unbounded. Applied the same bounding strategy as `scopeCache`: atomic counter + stop caching beyond 1024 entries. Added `childCacheSize atomic.Int64` field and `TestWithTags_BoundedChildCacheSize` test. ### Micro-benchmark results (cached vs uncached) ``` goos: linux, goarch: amd64, cpu: 12th Gen Intel(R) Core(TM) i7-1270P ns/op B/op allocs/op CounterRecord_Uncached 344 592 3 CounterRecord_CachedScope 93 48 2 ← 3.7x faster, 12x less memory WithTags_Uncached 325 592 3 WithTags_CacheHit 50 16 1 ← 6.5x faster, 37x less memory TagsCacheKey_SingleTag 29 24 1 TagsCacheKey_ThreeTags 49 64 1 ``` ### Not changed (deliberate) - **Cache key is order-sensitive / does not deduplicate keys** — Tally internally canonicalizes tag maps (sorted keys, rightmost precedence), so the cache key could theoretically miss on reordered-but-equivalent tag sets. Verified across 100+ call sites: tag ordering is fully consistent and duplicate keys never appear in the codebase. Adding sort+dedup to the hot path would add cost without real-world benefit. --- common/metrics/config.go | 3 + common/metrics/tally_metrics_handler.go | 252 ++++++-- common/metrics/tally_metrics_handler_test.go | 572 ++++++++++++++++++- 3 files changed, 767 insertions(+), 60 deletions(-) diff --git a/common/metrics/config.go b/common/metrics/config.go index 1d98cc598fc..cc5bf481a36 100644 --- a/common/metrics/config.go +++ b/common/metrics/config.go @@ -66,6 +66,9 @@ type ( // (instead of milliseconds). // This config only takes effect when using prometheus via opentelemetry framework RecordTimerInSeconds bool `yaml:"recordTimerInSeconds"` + // TagsCacheMaxSize controls the maximum number of entries in the metrics + // tag cache. When the cache is full, all entries are cleared. Default: 10000. + TagsCacheMaxSize int `yaml:"tagsCacheMaxSize"` } // StatsdConfig contains the config items for statsd metrics reporter diff --git a/common/metrics/tally_metrics_handler.go b/common/metrics/tally_metrics_handler.go index 71d15e80ce0..4f0b4ef6c58 100644 --- a/common/metrics/tally_metrics_handler.go +++ b/common/metrics/tally_metrics_handler.go @@ -1,12 +1,24 @@ package metrics import ( + "encoding/binary" + "slices" + "strings" + "sync" "time" "github.com/uber-go/tally/v4" "go.temporal.io/server/common/log" ) +// defaultTagsCacheMaxSize is the default upper bound on cached scope/handler entries. +const defaultTagsCacheMaxSize = 10000 + +type histogramCacheKey struct { + name string + unit MetricUnit +} + var sanitizer = tally.NewSanitizer(tally.SanitizeOptions{ NameCharacters: tally.ValidCharacters{Ranges: tally.AlphanumericRange, Characters: tally.UnderscoreCharacters}, KeyCharacters: tally.ValidCharacters{Ranges: tally.AlphanumericRange, Characters: tally.UnderscoreCharacters}, @@ -14,6 +26,70 @@ var sanitizer = tally.NewSanitizer(tally.SanitizeOptions{ ReplacementCharacter: '_', }) +// sharedScopeCache is a bounded cache shared across all tallyMetricsHandler +// instances in a handler tree. When the cache reaches its size limit, all +// entries are cleared (clear-on-overflow) to bound memory usage. +type sharedScopeCache struct { + scopes map[string]tally.Scope + handlers map[string]*tallyMetricsHandler + mu sync.RWMutex + maxSize int +} + +func newSharedScopeCache(maxSize int) *sharedScopeCache { + return &sharedScopeCache{ + maxSize: maxSize, + scopes: make(map[string]tally.Scope), + handlers: make(map[string]*tallyMetricsHandler), + } +} + +func (c *sharedScopeCache) loadOrStoreScope(key string, create func() tally.Scope) tally.Scope { + c.mu.RLock() + if s, ok := c.scopes[key]; ok { + c.mu.RUnlock() + return s + } + c.mu.RUnlock() + + s := create() + + c.mu.Lock() + defer c.mu.Unlock() + // Double-check: another goroutine may have inserted while we were creating. + if existing, ok := c.scopes[key]; ok { + return existing + } + if len(c.scopes) >= c.maxSize { + clear(c.scopes) + } + c.scopes[key] = s + return s +} + +func (c *sharedScopeCache) loadOrStoreHandler(key string, create func() *tallyMetricsHandler) *tallyMetricsHandler { + c.mu.RLock() + if h, ok := c.handlers[key]; ok { + c.mu.RUnlock() + return h + } + c.mu.RUnlock() + + h := create() + + c.mu.Lock() + defer c.mu.Unlock() + // Double-check: another goroutine may have inserted while we were creating. + if existing, ok := c.handlers[key]; ok { + return existing + } + if len(c.handlers) >= c.maxSize { + clear(c.handlers) + } + c.handlers[key] = h + return h +} + type ( excludeTags map[string]map[string]struct{} @@ -21,6 +97,12 @@ type ( scope tally.Scope perUnitBuckets map[MetricUnit]tally.Buckets excludeTags excludeTags + cache *sharedScopeCache + scopeKey string // unique prefix for this handler in the shared cache + counters sync.Map // metric name -> CounterIface + gauges sync.Map // metric name -> GaugeIface + timers sync.Map // metric name -> TimerIface + histograms sync.Map // metric name + unit -> HistogramIface } ) @@ -33,65 +115,159 @@ func NewTallyMetricsHandler(cfg ClientConfig, scope tally.Scope) *tallyMetricsHa perUnitBuckets[MetricUnit(unit)] = tally.ValueBuckets(boundariesList) } + maxSize := cfg.TagsCacheMaxSize + if maxSize <= 0 { + maxSize = defaultTagsCacheMaxSize + } + return &tallyMetricsHandler{ scope: scope, perUnitBuckets: perUnitBuckets, excludeTags: configExcludeTags(cfg), + cache: newSharedScopeCache(maxSize), + scopeKey: "", } } -// WithTags creates a new MetricProvder with provided []Tag -// Tags are merged with registered Tags from the source MetricsHandler +// tagsCacheKey builds a compact string key from a tag slice for use as a +// map lookup key. +func tagsCacheKey(tags []Tag) string { + size := 0 + for i := range tags { + size += len(tags[i].Key) + len(tags[i].Value) + 2*binary.MaxVarintLen64 + } + var sb strings.Builder + sb.Grow(size) + for _, t := range tags { + appendCacheKeyPart(&sb, t.Key) + appendCacheKeyPart(&sb, t.Value) + } + return sb.String() +} + +func appendCacheKeyPart(sb *strings.Builder, value string) { + var lenBuf [binary.MaxVarintLen64]byte + n := binary.PutUvarint(lenBuf[:], uint64(len(value))) + _, _ = sb.Write(lenBuf[:n]) + sb.WriteString(value) +} + +// WithTags creates a new MetricProvider with provided []Tag +// Tags are merged with registered Tags from the source MetricsHandler. +// Handlers are cached by tag combination so repeated calls avoid allocations. func (tmh *tallyMetricsHandler) WithTags(tags ...Tag) Handler { - return &tallyMetricsHandler{ - scope: tmh.scope.Tagged(tagsToMap(tags, tmh.excludeTags)), - perUnitBuckets: tmh.perUnitBuckets, - excludeTags: tmh.excludeTags, + if len(tags) == 0 { + return tmh } + normalizedKey := tagsCacheKey(normalizeTagsForCaching(tags, tmh.excludeTags)) + key := tmh.scopeKey + normalizedKey + return tmh.cache.loadOrStoreHandler(key, func() *tallyMetricsHandler { + return &tallyMetricsHandler{ + scope: tmh.scope.Tagged(tagsToMap(tags, tmh.excludeTags)), + perUnitBuckets: tmh.perUnitBuckets, + excludeTags: tmh.excludeTags, + cache: tmh.cache, + scopeKey: key, + } + }) +} + +// cachedTaggedScope returns a tally.Scope tagged with the given tags, caching +// the result so that repeated calls with the same tag combination avoid +// allocating a new map and tally scope lookup. Tags are normalized through +// excludeTags before cache key computation so that different raw values which +// map to the same excluded placeholder share a single cache entry. +func (tmh *tallyMetricsHandler) cachedTaggedScope(tags []Tag) tally.Scope { + if len(tags) == 0 { + return tmh.scope + } + key := tmh.scopeKey + tagsCacheKey(normalizeTagsForCaching(tags, tmh.excludeTags)) + return tmh.cache.loadOrStoreScope(key, func() tally.Scope { + return tmh.scope.Tagged(tagsToMap(tags, tmh.excludeTags)) + }) +} + +// normalizeTag applies excludeTags substitution to a single tag. +// Returns the (possibly modified) tag and whether it was normalized. +func normalizeTag(t Tag, excl excludeTags) (Tag, bool) { + if vals, ok := excl[t.Key]; ok { + if _, ok := vals[t.Value]; !ok { + return Tag{Key: t.Key, Value: tagExcludedValue}, true + } + } + return t, false +} + +// normalizeTagsForCaching applies excludeTags substitution to produce +// canonical tag values for cache key computation. Returns the original slice +// unchanged if no tags need normalization (zero-alloc fast path). +func normalizeTagsForCaching(tags []Tag, excl excludeTags) []Tag { + if len(excl) == 0 { + return tags + } + var normalized []Tag + for i, t := range tags { + nt, changed := normalizeTag(t, excl) + if changed { + if normalized == nil { + normalized = slices.Clone(tags) + } + normalized[i] = nt + } + } + if normalized != nil { + return normalized + } + return tags } // Counter obtains a counter for the given name. func (tmh *tallyMetricsHandler) Counter(counter string) CounterIface { - return CounterFunc(func(i int64, t ...Tag) { - scope := tmh.scope - if len(t) > 0 { - scope = tmh.scope.Tagged(tagsToMap(t, tmh.excludeTags)) - } - scope.Counter(counter).Inc(i) + if v, ok := tmh.counters.Load(counter); ok { + return v.(CounterIface) //nolint:revive // type-safe: only CounterIface is stored + } + c := CounterFunc(func(i int64, t ...Tag) { + tmh.cachedTaggedScope(t).Counter(counter).Inc(i) }) + actual, _ := tmh.counters.LoadOrStore(counter, c) + return actual.(CounterIface) //nolint:revive // type-safe: only CounterIface is stored } // Gauge obtains a gauge for the given name. func (tmh *tallyMetricsHandler) Gauge(gauge string) GaugeIface { - return GaugeFunc(func(f float64, t ...Tag) { - scope := tmh.scope - if len(t) > 0 { - scope = tmh.scope.Tagged(tagsToMap(t, tmh.excludeTags)) - } - scope.Gauge(gauge).Update(f) + if v, ok := tmh.gauges.Load(gauge); ok { + return v.(GaugeIface) //nolint:revive // type-safe: only GaugeIface is stored + } + g := GaugeFunc(func(f float64, t ...Tag) { + tmh.cachedTaggedScope(t).Gauge(gauge).Update(f) }) + actual, _ := tmh.gauges.LoadOrStore(gauge, g) + return actual.(GaugeIface) //nolint:revive // type-safe: only GaugeIface is stored } // Timer obtains a timer for the given name. func (tmh *tallyMetricsHandler) Timer(timer string) TimerIface { - return TimerFunc(func(d time.Duration, t ...Tag) { - scope := tmh.scope - if len(t) > 0 { - scope = tmh.scope.Tagged(tagsToMap(t, tmh.excludeTags)) - } - scope.Timer(timer).Record(d) + if v, ok := tmh.timers.Load(timer); ok { + return v.(TimerIface) //nolint:revive // type-safe: only TimerIface is stored + } + ti := TimerFunc(func(d time.Duration, t ...Tag) { + tmh.cachedTaggedScope(t).Timer(timer).Record(d) }) + actual, _ := tmh.timers.LoadOrStore(timer, ti) + return actual.(TimerIface) //nolint:revive // type-safe: only TimerIface is stored } // Histogram obtains a histogram for the given name. func (tmh *tallyMetricsHandler) Histogram(histogram string, unit MetricUnit) HistogramIface { - return HistogramFunc(func(i int64, t ...Tag) { - scope := tmh.scope - if len(t) > 0 { - scope = tmh.scope.Tagged(tagsToMap(t, tmh.excludeTags)) - } - scope.Histogram(histogram, tmh.perUnitBuckets[unit]).RecordValue(float64(i)) + key := histogramCacheKey{name: histogram, unit: unit} + if v, ok := tmh.histograms.Load(key); ok { + return v.(HistogramIface) //nolint:revive // type-safe: only HistogramIface is stored + } + h := HistogramFunc(func(i int64, t ...Tag) { + tmh.cachedTaggedScope(t).Histogram(histogram, tmh.perUnitBuckets[unit]).RecordValue(float64(i)) }) + actual, _ := tmh.histograms.LoadOrStore(key, h) + return actual.(HistogramIface) //nolint:revive // type-safe: only HistogramIface is stored } func (*tallyMetricsHandler) Stop(log.Logger) {} @@ -110,21 +286,9 @@ func tagsToMap(t1 []Tag, e excludeTags) map[string]string { } m := make(map[string]string, len(t1)) - - convert := func(tag Tag) { - if vals, ok := e[tag.Key]; ok { - if _, ok := vals[tag.Value]; !ok { - m[tag.Key] = tagExcludedValue - return - } - } - - m[tag.Key] = tag.Value - } - for i := range t1 { - convert(t1[i]) + nt, _ := normalizeTag(t1[i], e) + m[nt.Key] = nt.Value } - return m } diff --git a/common/metrics/tally_metrics_handler_test.go b/common/metrics/tally_metrics_handler_test.go index 27e90c1aeb4..c33b0777fac 100644 --- a/common/metrics/tally_metrics_handler_test.go +++ b/common/metrics/tally_metrics_handler_test.go @@ -2,11 +2,13 @@ package metrics import ( "math" + "strconv" + "sync" "testing" "time" "github.com/google/uuid" - "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/uber-go/tally/v4" ) @@ -34,41 +36,41 @@ func TestTallyScope(t *testing.T) { snap := scope.Snapshot() counters, gauges, timers, histograms := snap.Counters(), snap.Gauges(), snap.Timers(), snap.Histograms() - assert.EqualValues(t, 8, counters["test.hits+"].Value()) - assert.EqualValues(t, map[string]string{}, counters["test.hits+"].Tags()) + require.EqualValues(t, 8, counters["test.hits+"].Value()) + require.Equal(t, map[string]string{}, counters["test.hits+"].Tags()) - assert.EqualValues(t, 11, counters["test.hits-tagged+taskqueue=__sticky__"].Value()) - assert.EqualValues(t, map[string]string{"taskqueue": "__sticky__"}, counters["test.hits-tagged+taskqueue=__sticky__"].Tags()) + require.EqualValues(t, 11, counters["test.hits-tagged+taskqueue=__sticky__"].Value()) + require.Equal(t, map[string]string{"taskqueue": "__sticky__"}, counters["test.hits-tagged+taskqueue=__sticky__"].Tags()) - assert.EqualValues(t, 14, counters["test.hits-tagged-excluded+taskqueue="+tagExcludedValue].Value()) - assert.EqualValues(t, map[string]string{"taskqueue": tagExcludedValue}, counters["test.hits-tagged-excluded+taskqueue="+tagExcludedValue].Tags()) + require.EqualValues(t, 14, counters["test.hits-tagged-excluded+taskqueue="+tagExcludedValue].Value()) + require.Equal(t, map[string]string{"taskqueue": tagExcludedValue}, counters["test.hits-tagged-excluded+taskqueue="+tagExcludedValue].Tags()) - assert.EqualValues(t, float64(-100), gauges["test.temp+location=Mare Imbrium"].Value()) - assert.EqualValues(t, map[string]string{ + require.InDelta(t, float64(-100), gauges["test.temp+location=Mare Imbrium"].Value(), 0.01) + require.Equal(t, map[string]string{ "location": "Mare Imbrium", }, gauges["test.temp+location=Mare Imbrium"].Tags()) - assert.EqualValues(t, []time.Duration{ + require.Equal(t, []time.Duration{ 1248 * time.Millisecond, 5255 * time.Millisecond, }, timers["test.latency+"].Values()) - assert.EqualValues(t, map[string]string{}, timers["test.latency+"].Tags()) + require.Equal(t, map[string]string{}, timers["test.latency+"].Tags()) - assert.EqualValues(t, map[float64]int64{ + require.Equal(t, map[float64]int64{ 1024: 0, 2048: 0, math.MaxFloat64: 1, }, histograms["test.transmission+"].Values()) - assert.EqualValues(t, map[time.Duration]int64(nil), histograms["test.transmission+"].Durations()) - assert.EqualValues(t, map[string]string{}, histograms["test.transmission+"].Tags()) + require.Equal(t, map[time.Duration]int64(nil), histograms["test.transmission+"].Durations()) + require.Equal(t, map[string]string{}, histograms["test.transmission+"].Tags()) newTaggedHandler := mp.WithTags(NamespaceTag(uuid.NewString())) recordTallyMetrics(newTaggedHandler) snap = scope.Snapshot() counters = snap.Counters() - assert.EqualValues(t, 11, counters["test.hits-tagged+taskqueue=__sticky__"].Value()) - assert.EqualValues(t, map[string]string{"taskqueue": "__sticky__"}, counters["test.hits-tagged+taskqueue=__sticky__"].Tags()) + require.EqualValues(t, 11, counters["test.hits-tagged+taskqueue=__sticky__"].Value()) + require.Equal(t, map[string]string{"taskqueue": "__sticky__"}, counters["test.hits-tagged+taskqueue=__sticky__"].Tags()) } func recordTallyMetrics(h Handler) { @@ -87,3 +89,541 @@ func recordTallyMetrics(h Handler) { hitsTaggedCounter.Record(11, UnsafeTaskQueueTag("__sticky__")) hitsTaggedExcludedCounter.Record(14, UnsafeTaskQueueTag("filtered")) } + +func TestWithTags_EmptyTagsReturnsSelf(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + got := h.WithTags() + require.Same(t, h, got, "WithTags() with no args should return the same handler") +} + +func TestWithTags_CacheHitReturnsSamePointer(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + h1 := h.WithTags(OperationTag("op1")) + h2 := h.WithTags(OperationTag("op1")) + require.Same(t, h1, h2, "repeated WithTags with identical args should return the same handler") +} + +func TestWithTags_DifferentTagsReturnDifferentHandlers(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + h1 := h.WithTags(OperationTag("op1")) + h2 := h.WithTags(OperationTag("op2")) + require.NotSame(t, h1, h2, "WithTags with different values must produce different handlers") + + // Different keys with same value. + h3 := h.WithTags(StringTag("key_a", "val")) + h4 := h.WithTags(StringTag("key_b", "val")) + require.NotSame(t, h3, h4, "WithTags with different keys must produce different handlers") +} + +func TestWithTags_CachedHandlerRecordsMetricsCorrectly(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + tagged := h.WithTags(StringTag("env", "prod")) + + // Record via first call. + tagged.Counter("requests").Record(5) + + // Record via second (cached) call. + cached := h.WithTags(StringTag("env", "prod")) + cached.Counter("requests").Record(3) + + snap := scope.Snapshot() + c := snap.Counters()["test.requests+env=prod"] + require.NotNil(t, c) + require.EqualValues(t, 8, c.Value()) + require.Equal(t, map[string]string{"env": "prod"}, c.Tags()) +} + +func TestWithTags_MultipleTagsCacheCorrectly(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + tags := []Tag{OperationTag("op1"), StringTag("env", "staging")} + h1 := h.WithTags(tags...) + h2 := h.WithTags(tags...) + require.Same(t, h1, h2, "multi-tag WithTags should be cached") + + h1.Counter("hits").Record(1) + h2.Counter("hits").Record(2) + snap := scope.Snapshot() + c := snap.Counters()["test.hits+env=staging,operation=op1"] + require.NotNil(t, c) + require.EqualValues(t, 3, c.Value()) +} + +func TestWithTags_TagOrderMatters(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + // Different ordering of the same two tags should be separate cache + // entries (the tally scope will merge them, but the cache keys differ). + h1 := h.WithTags(StringTag("a", "1"), StringTag("b", "2")) + h2 := h.WithTags(StringTag("b", "2"), StringTag("a", "1")) + + // They must both work — record via each. + h1.Counter("c").Record(1) + h2.Counter("c").Record(1) + + snap := scope.Snapshot() + c := snap.Counters()["test.c+a=1,b=2"] + require.NotNil(t, c) + require.EqualValues(t, 2, c.Value()) +} + +func TestWithTags_ChildCachesAreIndependent(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + child := h.WithTags(OperationTag("parent_op")) + grandchild := child.WithTags(StringTag("env", "dev")) + + // Grandchild should be cached on child, not on root. + grandchild2 := child.WithTags(StringTag("env", "dev")) + require.Same(t, grandchild, grandchild2) + + // Root should not have the grandchild cached. + fromRoot := h.WithTags(StringTag("env", "dev")) + require.NotSame(t, grandchild, fromRoot, "child and root caches should be independent") +} + +func TestWithTags_ExcludeTagsStillApply(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + // "activityType" is in excludeTags with empty allow-list, so any value + // should be replaced with tagExcludedValue. + tagged := h.WithTags(ActivityTypeTag("MyActivity")) + tagged.Counter("hits").Record(1) + + snap := scope.Snapshot() + c := snap.Counters()["test.hits+activityType="+tagExcludedValue] + require.NotNil(t, c, "excluded tag value should be sanitized") + require.EqualValues(t, 1, c.Value()) +} + +func TestWithTags_ExcludedTagsShareChildHandler(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + // Different excluded-tag values should produce the same cached child handler, + // preventing unbounded childCache growth from high-cardinality excluded tags. + h1 := h.WithTags(ActivityTypeTag("TypeA")) + h2 := h.WithTags(ActivityTypeTag("TypeB")) + h3 := h.WithTags(ActivityTypeTag("TypeC")) + require.Same(t, h1, h2, "excluded tag variants should share the same child handler") + require.Same(t, h2, h3, "excluded tag variants should share the same child handler") + + // Verify the child handler still records metrics correctly. + h1.Counter("hits").Record(1) + h2.Counter("hits").Record(2) + h3.Counter("hits").Record(4) + + snap := scope.Snapshot() + c := snap.Counters()["test.hits+activityType="+tagExcludedValue] + require.NotNil(t, c) + require.EqualValues(t, 7, c.Value()) +} + +func TestWithTags_ConcurrentAccess(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + const goroutines = 32 + const iterations = 100 + var wg sync.WaitGroup + handlers := make([]Handler, goroutines) + + wg.Add(goroutines) + for i := range goroutines { + go func(idx int) { + defer wg.Done() + var last Handler + for range iterations { + last = h.WithTags(OperationTag("concurrent_op")) + last.Counter("concurrent_count").Record(1) + } + handlers[idx] = last + }(i) + } + wg.Wait() + + // All goroutines should have received the same cached handler. + for i := 1; i < goroutines; i++ { + require.Same(t, handlers[0], handlers[i], + "all goroutines should get the same cached handler") + } + + snap := scope.Snapshot() + c := snap.Counters()["test.concurrent_count+operation=concurrent_op"] + require.NotNil(t, c) + require.Equal(t, int64(goroutines*iterations), c.Value()) +} + +func TestTagsCacheKey(t *testing.T) { + tests := []struct { + name string + a, b []Tag + same bool + }{ + { + name: "identical single tags", + a: []Tag{{Key: "op", Value: "foo"}}, + b: []Tag{{Key: "op", Value: "foo"}}, + same: true, + }, + { + name: "different values", + a: []Tag{{Key: "op", Value: "foo"}}, + b: []Tag{{Key: "op", Value: "bar"}}, + same: false, + }, + { + name: "different keys", + a: []Tag{{Key: "op", Value: "x"}}, + b: []Tag{{Key: "ns", Value: "x"}}, + same: false, + }, + { + name: "identical multi tags", + a: []Tag{{Key: "a", Value: "1"}, {Key: "b", Value: "2"}}, + b: []Tag{{Key: "a", Value: "1"}, {Key: "b", Value: "2"}}, + same: true, + }, + { + name: "different ordering", + a: []Tag{{Key: "a", Value: "1"}, {Key: "b", Value: "2"}}, + b: []Tag{{Key: "b", Value: "2"}, {Key: "a", Value: "1"}}, + same: false, + }, + { + name: "single vs multi", + a: []Tag{{Key: "a", Value: "1"}}, + b: []Tag{{Key: "a", Value: "1"}, {Key: "b", Value: "2"}}, + same: false, + }, + { + name: "key boundary ambiguity", + a: []Tag{{Key: "ab", Value: "c"}}, + b: []Tag{{Key: "a", Value: "bc"}}, + same: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ka := tagsCacheKey(tt.a) + kb := tagsCacheKey(tt.b) + if tt.same { + require.Equal(t, ka, kb) + } else { + require.NotEqual(t, ka, kb) + } + }) + } +} + +func TestScopeCache_CounterWithInlineTags(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + c := h.Counter("requests") + c.Record(1, StringTag("status", "ok")) + c.Record(2, StringTag("status", "ok")) + c.Record(5, StringTag("status", "err")) + + snap := scope.Snapshot() + ok := snap.Counters()["test.requests+status=ok"] + require.NotNil(t, ok) + require.EqualValues(t, 3, ok.Value()) + + errC := snap.Counters()["test.requests+status=err"] + require.NotNil(t, errC) + require.EqualValues(t, 5, errC.Value()) +} + +func TestScopeCache_GaugeWithInlineTags(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + g := h.Gauge("temp") + g.Record(42.0, StringTag("location", "cpu")) + g.Record(99.0, StringTag("location", "cpu")) + + snap := scope.Snapshot() + gauge := snap.Gauges()["test.temp+location=cpu"] + require.NotNil(t, gauge) + require.InDelta(t, 99.0, gauge.Value(), 0.01) +} + +func TestScopeCache_TimerWithInlineTags(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + ti := h.Timer("latency") + ti.Record(100*time.Millisecond, StringTag("op", "read")) + ti.Record(200*time.Millisecond, StringTag("op", "read")) + + snap := scope.Snapshot() + timer := snap.Timers()["test.latency+op=read"] + require.NotNil(t, timer) + require.Equal(t, []time.Duration{100 * time.Millisecond, 200 * time.Millisecond}, timer.Values()) +} + +func TestScopeCache_HistogramWithInlineTags(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + hist := h.Histogram("size", Bytes) + hist.Record(512, StringTag("type", "payload")) + hist.Record(4096, StringTag("type", "payload")) + + snap := scope.Snapshot() + histo := snap.Histograms()["test.size+type=payload"] + require.NotNil(t, histo) + require.Equal(t, map[float64]int64{ + 1024: 1, + 2048: 0, + math.MaxFloat64: 1, + }, histo.Values()) +} + +func TestScopeCache_NoTagsUsesBaseScope(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + h.Counter("hits").Record(7) + + snap := scope.Snapshot() + c := snap.Counters()["test.hits+"] + require.NotNil(t, c) + require.EqualValues(t, 7, c.Value()) +} + +func TestScopeCache_ExcludeTagsApply(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + h.Counter("hits").Record(1, ActivityTypeTag("MyActivity")) + + snap := scope.Snapshot() + c := snap.Counters()["test.hits+activityType="+tagExcludedValue] + require.NotNil(t, c, "excluded tag value should be sanitized via scope cache") + require.EqualValues(t, 1, c.Value()) +} + +func TestScopeCache_IndependentPerHandler(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + child1 := h.WithTags(OperationTag("op1")) + child2 := h.WithTags(OperationTag("op2")) + + child1.Counter("hits").Record(1, StringTag("env", "prod")) + child2.Counter("hits").Record(2, StringTag("env", "prod")) + + snap := scope.Snapshot() + c1 := snap.Counters()["test.hits+env=prod,operation=op1"] + require.NotNil(t, c1) + require.EqualValues(t, 1, c1.Value()) + + c2 := snap.Counters()["test.hits+env=prod,operation=op2"] + require.NotNil(t, c2) + require.EqualValues(t, 2, c2.Value()) +} + +func TestScopeCache_ConcurrentRecordWithTags(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + const goroutines = 32 + const iterations = 100 + var wg sync.WaitGroup + wg.Add(goroutines) + for range goroutines { + go func() { + defer wg.Done() + c := h.Counter("concurrent") + for range iterations { + c.Record(1, StringTag("shard", "0")) + } + }() + } + wg.Wait() + + snap := scope.Snapshot() + c := snap.Counters()["test.concurrent+shard=0"] + require.NotNil(t, c) + require.Equal(t, int64(goroutines*iterations), c.Value()) +} + +func TestScopeCache_ExcludedTagsMergeInCache(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + // "activityType" has empty allow-list, so all values are excluded. + // Different raw values should normalize to the same cache entry. + h.Counter("hits").Record(1, ActivityTypeTag("TypeA")) + h.Counter("hits").Record(2, ActivityTypeTag("TypeB")) + h.Counter("hits").Record(4, ActivityTypeTag("TypeC")) + + snap := scope.Snapshot() + c := snap.Counters()["test.hits+activityType="+tagExcludedValue] + require.NotNil(t, c) + require.EqualValues(t, 7, c.Value(), "all excluded tag values should map to the same counter") + + // Verify only one cache entry was created, not three. + h.cache.mu.Lock() + scopeCount := len(h.cache.scopes) + h.cache.mu.Unlock() + require.Equal(t, 1, scopeCount, + "excluded tags with different raw values should share a single cache entry") +} + +func TestScopeCache_BoundedSize(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + cfg := defaultConfig + cfg.TagsCacheMaxSize = 100 + h := NewTallyMetricsHandler(cfg, scope) + + // Fill the cache to the limit. + for i := range 100 { + h.Counter("c").Record(1, StringTag("id", strconv.Itoa(i))) + } + h.cache.mu.Lock() + require.Len(t, h.cache.scopes, 100) + h.cache.mu.Unlock() + + // Beyond the limit, cache is cleared and new entry is stored. + h.Counter("c").Record(1, StringTag("id", "overflow")) + h.cache.mu.Lock() + require.LessOrEqual(t, len(h.cache.scopes), 2, + "scope cache should have been cleared on overflow") + h.cache.mu.Unlock() + + snap := scope.Snapshot() + c := snap.Counters()["test.c+id=overflow"] + require.NotNil(t, c, "metrics should work even after cache clear") + require.EqualValues(t, 1, c.Value()) + + // Entries are re-cached after clear. + h.Counter("c").Record(1, StringTag("id", "0")) + snap = scope.Snapshot() + c0 := snap.Counters()["test.c+id=0"] + require.NotNil(t, c0) + require.EqualValues(t, 2, c0.Value()) +} + +func TestWithTags_BoundedChildCacheSize(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + cfg := defaultConfig + cfg.TagsCacheMaxSize = 100 + h := NewTallyMetricsHandler(cfg, scope) + + // Fill the handler cache to the limit. + for i := range 100 { + h.WithTags(StringTag("id", strconv.Itoa(i))) + } + h.cache.mu.Lock() + require.Len(t, h.cache.handlers, 100) + h.cache.mu.Unlock() + + // Beyond the limit, WithTags still works; cache is cleared. + overflow := h.WithTags(StringTag("id", "overflow")) + h.cache.mu.Lock() + require.LessOrEqual(t, len(h.cache.handlers), 2, + "handler cache should have been cleared on overflow") + h.cache.mu.Unlock() + + // The handler still works correctly. + overflow.Counter("hits").Record(1) + snap := scope.Snapshot() + c := snap.Counters()["test.hits+id=overflow"] + require.NotNil(t, c) + require.EqualValues(t, 1, c.Value()) + + // Cached entries are re-cached on next access. + cached := h.WithTags(StringTag("id", "0")) + cached2 := h.WithTags(StringTag("id", "0")) + require.Same(t, cached, cached2, "re-cached entries should hit") +} + +func TestNormalizeTagsForCaching(t *testing.T) { + excl := excludeTags{ + "activityType": {}, // empty allow-list: exclude all + "taskqueue": {"__sticky__": struct{}{}}, // allow only __sticky__ + } + + t.Run("no excluded tags returns original slice", func(t *testing.T) { + tags := []Tag{{Key: "env", Value: "prod"}} + result := normalizeTagsForCaching(tags, excl) + require.Same(t, &tags[0], &result[0], "should return the same slice when no normalization needed") + }) + + t.Run("excluded tag gets normalized", func(t *testing.T) { + tags := []Tag{{Key: "activityType", Value: "MyActivity"}} + result := normalizeTagsForCaching(tags, excl) + require.Equal(t, tagExcludedValue, result[0].Value) + }) + + t.Run("allowed tag value is not normalized", func(t *testing.T) { + tags := []Tag{{Key: "taskqueue", Value: "__sticky__"}} + result := normalizeTagsForCaching(tags, excl) + require.Same(t, &tags[0], &result[0], "allowed value should not trigger normalization") + }) + + t.Run("mixed tags normalize only excluded ones", func(t *testing.T) { + tags := []Tag{ + {Key: "env", Value: "prod"}, + {Key: "activityType", Value: "DoSomething"}, + {Key: "taskqueue", Value: "non-sticky"}, + } + result := normalizeTagsForCaching(tags, excl) + require.Equal(t, "prod", result[0].Value) + require.Equal(t, tagExcludedValue, result[1].Value) + require.Equal(t, tagExcludedValue, result[2].Value) + }) + + t.Run("empty excludeTags returns original", func(t *testing.T) { + tags := []Tag{{Key: "anything", Value: "val"}} + result := normalizeTagsForCaching(tags, nil) + require.Same(t, &tags[0], &result[0]) + }) +} + +func TestTagsCacheKey_NoCollisionsForEmbeddedNulls(t *testing.T) { + tagsA := []Tag{{Key: "a", Value: "\x00b"}} + tagsB := []Tag{{Key: "a\x00", Value: "b"}} + + require.NotEqual(t, tagsCacheKey(tagsA), tagsCacheKey(tagsB)) +} + +func TestWithTags_DistinguishesEmbeddedNullTags(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + h1 := h.WithTags(StringTag("a", "\x00b")) + h2 := h.WithTags(StringTag("a\x00", "b")) + + require.NotSame(t, h1, h2) +} + +func TestHistogram_CacheKeyDistinguishesNameAndUnit(t *testing.T) { + scope := tally.NewTestScope("test", map[string]string{}) + h := NewTallyMetricsHandler(defaultConfig, scope) + + h.Histogram("ab", MetricUnit("c")) + h.Histogram("ab\x00c", MetricUnit("")) + + count := 0 + h.histograms.Range(func(_, _ any) bool { + count++ + return true + }) + require.Equal(t, 2, count) +} From 48dfba7ba635b944cd81aa66db1678f367571615 Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Tue, 19 May 2026 15:42:45 -0600 Subject: [PATCH 60/73] tests: migrate activity_api_rules_test.go to TestEnv (#10329) ## What changed? WISOTT ## Why? Part of our migration to `TestEnv` to speed up tests and reduce flakes. ## How did you test it? - [ ] built - [ ] run locally and tested manually - [X] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) ## Potential risks Tests only --- tests/activity_api_rules_test.go | 241 ++++++++++++++----------------- 1 file changed, 107 insertions(+), 134 deletions(-) diff --git a/tests/activity_api_rules_test.go b/tests/activity_api_rules_test.go index 89e5db6e572..9dbd5914d81 100644 --- a/tests/activity_api_rules_test.go +++ b/tests/activity_api_rules_test.go @@ -1,7 +1,6 @@ package tests import ( - "context" "errors" "fmt" "sync/atomic" @@ -10,7 +9,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" commonpb "go.temporal.io/api/common/v1" rulespb "go.temporal.io/api/rules/v1" "go.temporal.io/api/serviceerror" @@ -19,24 +17,17 @@ import ( "go.temporal.io/sdk/temporal" "go.temporal.io/sdk/workflow" "go.temporal.io/server/common/dynamicconfig" - "go.temporal.io/server/common/log" + "go.temporal.io/server/common/testing/parallelsuite" "go.temporal.io/server/common/util" "go.temporal.io/server/tests/testcore" ) type ActivityApiRulesClientTestSuite struct { - testcore.FunctionalTestBase - - initialRetryInterval time.Duration - scheduleToCloseTimeout time.Duration - startToCloseTimeout time.Duration - - activityRetryPolicy *temporal.RetryPolicy + parallelsuite.Suite[*ActivityApiRulesClientTestSuite] } func TestActivityApiRulesClientTestSuite(t *testing.T) { - s := new(ActivityApiRulesClientTestSuite) - suite.Run(t, s) + parallelsuite.Run(t, &ActivityApiRulesClientTestSuite{}) } type internalRulesTestWorkflow struct { @@ -51,21 +42,17 @@ type internalRulesTestWorkflow struct { activityCompleteCn chan struct{} activityFailedCn chan struct{} - testSuite *testcore.FunctionalTestBase - logger log.Logger - ctx context.Context + env *testcore.TestEnv } -func newInternalRulesTestWorkflow(ctx context.Context, testSuite *testcore.FunctionalTestBase, logger log.Logger) *internalRulesTestWorkflow { +func newInternalRulesTestWorkflow(env *testcore.TestEnv) *internalRulesTestWorkflow { wf := &internalRulesTestWorkflow{ initialRetryInterval: 1 * time.Second, scheduleToCloseTimeout: 30 * time.Minute, startToCloseTimeout: 15 * time.Minute, activityCompleteCn: make(chan struct{}), activityFailedCn: make(chan struct{}), - testSuite: testSuite, - ctx: ctx, - logger: logger, + env: env, } wf.activityRetryPolicy = &temporal.RetryPolicy{ InitialInterval: wf.initialRetryInterval, @@ -102,11 +89,11 @@ func (w *internalRulesTestWorkflow) ActivityFuncForRetryActivity() (string, erro w.startedActivityCount.Add(1) if !w.letActivitySucceed.Load() { - w.testSuite.WaitForChannel(w.ctx, w.activityFailedCn) + w.env.WaitForChannel(w.activityFailedCn) activityErr := errors.New("bad-luck-please-retry") return "", activityErr } - w.testSuite.WaitForChannel(w.ctx, w.activityCompleteCn) + w.env.WaitForChannel(w.activityCompleteCn) return "done!", nil } @@ -117,37 +104,28 @@ func (w *internalRulesTestWorkflow) ActivityFuncForRetryTask() (string, error) { activityErr := errors.New("bad-luck-please-retry") return "", activityErr } - w.testSuite.WaitForChannel(w.ctx, w.activityCompleteCn) + w.env.WaitForChannel(w.activityCompleteCn) return "done!", nil } func (w *internalRulesTestWorkflow) ActivityFuncForPrePause() (string, error) { w.startedActivityCount.Add(1) - w.testSuite.WaitForChannel(w.ctx, w.activityCompleteCn) + w.env.WaitForChannel(w.activityCompleteCn) return "done!", nil } -func (s *ActivityApiRulesClientTestSuite) SetupTest() { - s.FunctionalTestBase.SetupTest() - - s.OverrideDynamicConfig(dynamicconfig.WorkflowRulesAPIsEnabled, true) - - s.initialRetryInterval = 1 * time.Second - s.scheduleToCloseTimeout = 30 * time.Minute - s.startToCloseTimeout = 15 * time.Minute - - s.activityRetryPolicy = &temporal.RetryPolicy{ - InitialInterval: s.initialRetryInterval, - BackoffCoefficient: 1, - } +func (s *ActivityApiRulesClientTestSuite) newTestEnv() *testcore.TestEnv { + return testcore.NewEnv(s.T(), + testcore.WithDynamicConfig(dynamicconfig.WorkflowRulesAPIsEnabled, true), + ) } -func (s *ActivityApiRulesClientTestSuite) createWorkflow(ctx context.Context, workflowFn WorkflowFunction) sdkclient.WorkflowRun { +func (s *ActivityApiRulesClientTestSuite) createWorkflow(env *testcore.TestEnv, workflowFn WorkflowFunction) sdkclient.WorkflowRun { workflowOptions := sdkclient.StartWorkflowOptions{ ID: testcore.RandomizeStr("wf_id-" + s.T().Name()), - TaskQueue: s.TaskQueue(), + TaskQueue: env.WorkerTaskQueue(), } - workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, workflowOptions, workflowFn) + workflowRun, err := env.SdkClient().ExecuteWorkflow(s.Context(), workflowOptions, workflowFn) s.NoError(err) s.NotNil(workflowRun) @@ -155,12 +133,11 @@ func (s *ActivityApiRulesClientTestSuite) createWorkflow(ctx context.Context, wo } func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_CRUD() { - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() + env := s.newTestEnv() // Initial state - no rules - nsResp, err := s.FrontendClient().ListWorkflowRules(ctx, &workflowservice.ListWorkflowRulesRequest{ - Namespace: s.Namespace().String(), + nsResp, err := env.FrontendClient().ListWorkflowRules(s.Context(), &workflowservice.ListWorkflowRulesRequest{ + Namespace: env.Namespace().String(), }) s.NoError(err) s.NotNil(nsResp) @@ -170,15 +147,15 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_CRUD() { ruleID1 := "pause-activity-rule-1" activityType := "ActivityFunc" - createRuleRequest := s.createPauseRuleRequest(activityType, ruleID1) - createRuleResponse, err := s.FrontendClient().CreateWorkflowRule(ctx, createRuleRequest) + createRuleRequest := s.createPauseRuleRequest(env.Namespace().String(), activityType, ruleID1) + createRuleResponse, err := env.FrontendClient().CreateWorkflowRule(s.Context(), createRuleRequest) s.NoError(err) s.NotNil(createRuleResponse) // verify that frontend has updated namespaces s.EventuallyWithT(func(t *assert.CollectT) { - nsResp, err := s.FrontendClient().ListWorkflowRules(ctx, &workflowservice.ListWorkflowRulesRequest{ - Namespace: s.Namespace().String(), + nsResp, err := env.FrontendClient().ListWorkflowRules(s.Context(), &workflowservice.ListWorkflowRulesRequest{ + Namespace: env.Namespace().String(), }) require.NoError(t, err) require.NotNil(t, nsResp) @@ -188,7 +165,7 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_CRUD() { }, 5*time.Second, 200*time.Millisecond) // create a second rule with the same ID - createRuleResponse, err = s.FrontendClient().CreateWorkflowRule(ctx, createRuleRequest) + createRuleResponse, err = env.FrontendClient().CreateWorkflowRule(s.Context(), createRuleRequest) var invalidArgument *serviceerror.InvalidArgument s.Error(err) s.ErrorAs(err, &invalidArgument) @@ -197,14 +174,14 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_CRUD() { // create a second rule with a different ID ruleID2 := "pause-activity-rule-2" createRuleRequest.Spec.Id = ruleID2 - createRuleResponse, err = s.FrontendClient().CreateWorkflowRule(ctx, createRuleRequest) + createRuleResponse, err = env.FrontendClient().CreateWorkflowRule(s.Context(), createRuleRequest) s.NoError(err) s.NotNil(createRuleResponse) // verify that frontend has updated namespaces s.EventuallyWithT(func(t *assert.CollectT) { - nsResp, err := s.FrontendClient().ListWorkflowRules(ctx, &workflowservice.ListWorkflowRulesRequest{ - Namespace: s.Namespace().String(), + nsResp, err := env.FrontendClient().ListWorkflowRules(s.Context(), &workflowservice.ListWorkflowRulesRequest{ + Namespace: env.Namespace().String(), }) require.NoError(t, err) require.NotNil(t, nsResp) @@ -216,16 +193,16 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_CRUD() { }, 5*time.Second, 200*time.Millisecond) // get rule by ID - describeRuleResponse, err := s.FrontendClient().DescribeWorkflowRule(ctx, &workflowservice.DescribeWorkflowRuleRequest{ - Namespace: s.Namespace().String(), + describeRuleResponse, err := env.FrontendClient().DescribeWorkflowRule(s.Context(), &workflowservice.DescribeWorkflowRuleRequest{ + Namespace: env.Namespace().String(), RuleId: ruleID1, }) s.NoError(err) s.NotNil(describeRuleResponse) s.Equal(ruleID1, describeRuleResponse.Rule.Spec.Id) - describeRuleResponse, err = s.FrontendClient().DescribeWorkflowRule(ctx, &workflowservice.DescribeWorkflowRuleRequest{ - Namespace: s.Namespace().String(), + describeRuleResponse, err = env.FrontendClient().DescribeWorkflowRule(s.Context(), &workflowservice.DescribeWorkflowRuleRequest{ + Namespace: env.Namespace().String(), RuleId: ruleID2, }) s.NoError(err) @@ -233,8 +210,8 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_CRUD() { s.Equal(ruleID2, describeRuleResponse.Rule.Spec.Id) // delete rule 1 - deleteRuleResponse, err := s.FrontendClient().DeleteWorkflowRule(ctx, &workflowservice.DeleteWorkflowRuleRequest{ - Namespace: s.Namespace().String(), + deleteRuleResponse, err := env.FrontendClient().DeleteWorkflowRule(s.Context(), &workflowservice.DeleteWorkflowRuleRequest{ + Namespace: env.Namespace().String(), RuleId: ruleID1, }) s.NoError(err) @@ -242,8 +219,8 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_CRUD() { // verify that frontend has updated namespaces s.EventuallyWithT(func(t *assert.CollectT) { - nsResp, err := s.FrontendClient().ListWorkflowRules(ctx, &workflowservice.ListWorkflowRulesRequest{ - Namespace: s.Namespace().String(), + nsResp, err := env.FrontendClient().ListWorkflowRules(s.Context(), &workflowservice.ListWorkflowRulesRequest{ + Namespace: env.Namespace().String(), }) require.NoError(t, err) require.NotNil(t, nsResp) @@ -254,8 +231,8 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_CRUD() { }, 5*time.Second, 200*time.Millisecond) // delete rule 2 - deleteRuleResponse, err = s.FrontendClient().DeleteWorkflowRule(ctx, &workflowservice.DeleteWorkflowRuleRequest{ - Namespace: s.Namespace().String(), + deleteRuleResponse, err = env.FrontendClient().DeleteWorkflowRule(s.Context(), &workflowservice.DeleteWorkflowRuleRequest{ + Namespace: env.Namespace().String(), RuleId: ruleID2, }) s.NoError(err) @@ -263,8 +240,8 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_CRUD() { // verify that frontend has updated namespaces and all rules are deleted s.EventuallyWithT(func(t *assert.CollectT) { - nsResp, err := s.FrontendClient().ListWorkflowRules(ctx, &workflowservice.ListWorkflowRulesRequest{ - Namespace: s.Namespace().String(), + nsResp, err := env.FrontendClient().ListWorkflowRules(s.Context(), &workflowservice.ListWorkflowRulesRequest{ + Namespace: env.Namespace().String(), }) require.NoError(t, err) require.NotNil(t, nsResp) @@ -273,14 +250,13 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_CRUD() { } func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryActivity() { - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() + env := s.newTestEnv() - testWorkflow := newInternalRulesTestWorkflow(ctx, &s.FunctionalTestBase, s.Logger) - s.SdkWorker().RegisterWorkflow(testWorkflow.WorkflowFuncForRetryActivity) - s.SdkWorker().RegisterActivity(testWorkflow.ActivityFuncForRetryActivity) + testWorkflow := newInternalRulesTestWorkflow(env) + env.SdkWorker().RegisterWorkflow(testWorkflow.WorkflowFuncForRetryActivity) + env.SdkWorker().RegisterActivity(testWorkflow.ActivityFuncForRetryActivity) - workflowRun := s.createWorkflow(ctx, testWorkflow.WorkflowFuncForRetryActivity) + workflowRun := s.createWorkflow(env, testWorkflow.WorkflowFuncForRetryActivity) // wait for activity to start s.EventuallyWithT(func(t *assert.CollectT) { @@ -290,15 +266,15 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryActivity() { // create rule to pause activity ruleID := "pause-activity" activityType := "ActivityFuncForRetryActivity" - createRuleRequest := s.createPauseRuleRequest(activityType, ruleID) - createRuleResponse, err := s.FrontendClient().CreateWorkflowRule(ctx, createRuleRequest) + createRuleRequest := s.createPauseRuleRequest(env.Namespace().String(), activityType, ruleID) + createRuleResponse, err := env.FrontendClient().CreateWorkflowRule(s.Context(), createRuleRequest) s.NoError(err) s.NotNil(createRuleResponse) // verify that frontend has updated namespaces s.EventuallyWithT(func(t *assert.CollectT) { - nsResp, err := s.FrontendClient().ListWorkflowRules(ctx, &workflowservice.ListWorkflowRulesRequest{ - Namespace: s.Namespace().String(), + nsResp, err := env.FrontendClient().ListWorkflowRules(s.Context(), &workflowservice.ListWorkflowRulesRequest{ + Namespace: env.Namespace().String(), }) require.NoError(t, err) require.NotNil(t, nsResp) @@ -308,14 +284,14 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryActivity() { // Let namespace config propagate. // There is no good way to check if the namespace config has propagated to the history service - err = util.InterruptibleSleep(ctx, 4*time.Second) + err = util.InterruptibleSleep(s.Context(), 4*time.Second) s.NoError(err) testWorkflow.activityFailedCn <- struct{}{} // check that activity was paused by the rule s.EventuallyWithT(func(t *assert.CollectT) { - description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := env.SdkClient().DescribeWorkflowExecution(s.Context(), workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.PendingActivities, 1) require.Equal(t, activityType, description.PendingActivities[0].GetActivityType().GetName()) @@ -324,7 +300,7 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryActivity() { }, 2*time.Second, 200*time.Millisecond) // make sure activity pause info is set - description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := env.SdkClient().DescribeWorkflowExecution(s.Context(), workflowRun.GetID(), workflowRun.GetRunID()) s.NoError(err) s.Len(description.PendingActivities, 1) s.True(description.PendingActivities[0].Paused) @@ -337,8 +313,8 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryActivity() { testWorkflow.letActivitySucceed.Store(true) // remove the rule so it didn't interfere with the activity - deleteRuleResponse, err := s.FrontendClient().DeleteWorkflowRule(ctx, &workflowservice.DeleteWorkflowRuleRequest{ - Namespace: s.Namespace().String(), + deleteRuleResponse, err := env.FrontendClient().DeleteWorkflowRule(s.Context(), &workflowservice.DeleteWorkflowRuleRequest{ + Namespace: env.Namespace().String(), RuleId: ruleID, }) s.NoError(err) @@ -346,8 +322,8 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryActivity() { // make sure there is no rules s.EventuallyWithT(func(t *assert.CollectT) { - nsResp, err := s.FrontendClient().ListWorkflowRules(ctx, &workflowservice.ListWorkflowRulesRequest{ - Namespace: s.Namespace().String(), + nsResp, err := env.FrontendClient().ListWorkflowRules(s.Context(), &workflowservice.ListWorkflowRulesRequest{ + Namespace: env.Namespace().String(), }) require.NoError(t, err) require.NotNil(t, nsResp) @@ -356,12 +332,12 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryActivity() { // Let namespace config propagate. // There is no good way to check if the namespace config has propagated to the history service - err = util.InterruptibleSleep(ctx, 4*time.Second) + err = util.InterruptibleSleep(s.Context(), 4*time.Second) s.NoError(err) // unpause the activity - _, err = s.FrontendClient().UnpauseActivity(ctx, &workflowservice.UnpauseActivityRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivity(s.Context(), &workflowservice.UnpauseActivityRequest{ + Namespace: env.Namespace().String(), Execution: &commonpb.WorkflowExecution{ WorkflowId: workflowRun.GetID(), }, @@ -371,7 +347,7 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryActivity() { // wait for activity to be unpaused s.EventuallyWithT(func(t *assert.CollectT) { - description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := env.SdkClient().DescribeWorkflowExecution(s.Context(), workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.PendingActivities, 1) require.Equal(t, activityType, description.PendingActivities[0].GetActivityType().GetName()) @@ -384,13 +360,12 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryActivity() { // wait for workflow to finish var out string - err = workflowRun.Get(ctx, &out) + err = workflowRun.Get(s.Context(), &out) s.NoError(err) } func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryTask() { - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() + env := s.newTestEnv() // overall test execution plan: // 1. start workflow @@ -405,21 +380,20 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryTask() { // 8. Let activity complete // 9. Wait for workflow to finish - testRetryTaskWorkflow := newInternalRulesTestWorkflow(ctx, &s.FunctionalTestBase, s.Logger) + testRetryTaskWorkflow := newInternalRulesTestWorkflow(env) // set much longer retry interval to make sure that activity is retried at least once - s.initialRetryInterval = 4 * time.Second - s.activityRetryPolicy.InitialInterval = s.initialRetryInterval + testRetryTaskWorkflow.activityRetryPolicy.InitialInterval = 4 * time.Second - s.SdkWorker().RegisterWorkflow(testRetryTaskWorkflow.WorkflowFuncForRetryTask) - s.SdkWorker().RegisterActivity(testRetryTaskWorkflow.ActivityFuncForRetryTask) + env.SdkWorker().RegisterWorkflow(testRetryTaskWorkflow.WorkflowFuncForRetryTask) + env.SdkWorker().RegisterActivity(testRetryTaskWorkflow.ActivityFuncForRetryTask) // 1. Start workflow - workflowRun := s.createWorkflow(ctx, testRetryTaskWorkflow.WorkflowFuncForRetryTask) + workflowRun := s.createWorkflow(env, testRetryTaskWorkflow.WorkflowFuncForRetryTask) // 2. Wait for activity to start and fail exactly once s.EventuallyWithT(func(t *assert.CollectT) { - description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := env.SdkClient().DescribeWorkflowExecution(s.Context(), workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.PendingActivities, 1) require.Equal(t, int32(1), testRetryTaskWorkflow.startedActivityCount.Load()) @@ -428,15 +402,15 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryTask() { // 3. Create rule to pause activity ruleID := "pause-activity" activityType := "ActivityFuncForRetryTask" - createRuleRequest := s.createPauseRuleRequest(activityType, ruleID) - createRuleResponse, err := s.FrontendClient().CreateWorkflowRule(ctx, createRuleRequest) + createRuleRequest := s.createPauseRuleRequest(env.Namespace().String(), activityType, ruleID) + createRuleResponse, err := env.FrontendClient().CreateWorkflowRule(s.Context(), createRuleRequest) s.NoError(err) s.NotNil(createRuleResponse) // 4. verify that frontend has updated namespaces s.EventuallyWithT(func(t *assert.CollectT) { - nsResp, err := s.FrontendClient().ListWorkflowRules(ctx, &workflowservice.ListWorkflowRulesRequest{ - Namespace: s.Namespace().String(), + nsResp, err := env.FrontendClient().ListWorkflowRules(s.Context(), &workflowservice.ListWorkflowRulesRequest{ + Namespace: env.Namespace().String(), }) require.NoError(t, err) require.NotNil(t, nsResp) @@ -446,12 +420,12 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryTask() { // Let namespace config propagate. // There is no good way to check if the namespace config has propagated to the history service - err = util.InterruptibleSleep(ctx, 2*time.Second) + err = util.InterruptibleSleep(s.Context(), 2*time.Second) s.NoError(err) // 5. wait for activity to be paused by rule. This should happen in the activity retry task s.EventuallyWithT(func(t *assert.CollectT) { - description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := env.SdkClient().DescribeWorkflowExecution(s.Context(), workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.PendingActivities, 1) require.Equal(t, activityType, description.PendingActivities[0].GetActivityType().GetName()) @@ -460,7 +434,7 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryTask() { }, 5*time.Second, 200*time.Millisecond) // make sure activity pause info is set - description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := env.SdkClient().DescribeWorkflowExecution(s.Context(), workflowRun.GetID(), workflowRun.GetRunID()) s.NoError(err) s.Len(description.PendingActivities, 1) s.True(description.PendingActivities[0].Paused) @@ -473,8 +447,8 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryTask() { testRetryTaskWorkflow.letActivitySucceed.Store(true) // remove the rule so it didn't interfere with the activity - deleteRuleResponse, err := s.FrontendClient().DeleteWorkflowRule(ctx, &workflowservice.DeleteWorkflowRuleRequest{ - Namespace: s.Namespace().String(), + deleteRuleResponse, err := env.FrontendClient().DeleteWorkflowRule(s.Context(), &workflowservice.DeleteWorkflowRuleRequest{ + Namespace: env.Namespace().String(), RuleId: ruleID, }) s.NoError(err) @@ -482,8 +456,8 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryTask() { // make sure there is no rules s.EventuallyWithT(func(t *assert.CollectT) { - nsResp, err := s.FrontendClient().ListWorkflowRules(ctx, &workflowservice.ListWorkflowRulesRequest{ - Namespace: s.Namespace().String(), + nsResp, err := env.FrontendClient().ListWorkflowRules(s.Context(), &workflowservice.ListWorkflowRulesRequest{ + Namespace: env.Namespace().String(), }) require.NoError(t, err) require.NotNil(t, nsResp) @@ -492,12 +466,12 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryTask() { // Let namespace config propagate. // There is no good way to check if the namespace config has propagated to the history service - err = util.InterruptibleSleep(ctx, 2*time.Second) + err = util.InterruptibleSleep(s.Context(), 2*time.Second) s.NoError(err) // unpause the activity. this will also trigger the activity - _, err = s.FrontendClient().UnpauseActivity(ctx, &workflowservice.UnpauseActivityRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivity(s.Context(), &workflowservice.UnpauseActivityRequest{ + Namespace: env.Namespace().String(), Execution: &commonpb.WorkflowExecution{ WorkflowId: workflowRun.GetID(), }, @@ -507,7 +481,7 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryTask() { // wait for activity to be unpaused s.EventuallyWithT(func(t *assert.CollectT) { - description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := env.SdkClient().DescribeWorkflowExecution(s.Context(), workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.PendingActivities, 1) require.Equal(t, activityType, description.PendingActivities[0].GetActivityType().GetName()) @@ -519,13 +493,12 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_RetryTask() { testRetryTaskWorkflow.activityCompleteCn <- struct{}{} // wait for workflow to finish var out string - err = workflowRun.Get(ctx, &out) + err = workflowRun.Get(s.Context(), &out) s.NoError(err) } func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_PrePause() { - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() + env := s.newTestEnv() // overall test execution plan: // 1. create rule to pause activity @@ -541,23 +514,23 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_PrePause() { // 11. Let activity complete // 12. Wait for workflow to finish - testRetryTaskWorkflow := newInternalRulesTestWorkflow(ctx, &s.FunctionalTestBase, s.Logger) + testRetryTaskWorkflow := newInternalRulesTestWorkflow(env) - s.SdkWorker().RegisterWorkflow(testRetryTaskWorkflow.WorkflowFuncForPrePause) - s.SdkWorker().RegisterActivity(testRetryTaskWorkflow.ActivityFuncForPrePause) + env.SdkWorker().RegisterWorkflow(testRetryTaskWorkflow.WorkflowFuncForPrePause) + env.SdkWorker().RegisterActivity(testRetryTaskWorkflow.ActivityFuncForPrePause) // 1. Create rule to pause activity ruleID := "pause-activity" activityType := "ActivityFuncForPrePause" - createRuleRequest := s.createPauseRuleRequest(activityType, ruleID) - createRuleResponse, err := s.FrontendClient().CreateWorkflowRule(ctx, createRuleRequest) + createRuleRequest := s.createPauseRuleRequest(env.Namespace().String(), activityType, ruleID) + createRuleResponse, err := env.FrontendClient().CreateWorkflowRule(s.Context(), createRuleRequest) s.NoError(err) s.NotNil(createRuleResponse) // 2. Verify that frontend has updated namespaces and rules are available s.EventuallyWithT(func(t *assert.CollectT) { - nsResp, err := s.FrontendClient().ListWorkflowRules(ctx, &workflowservice.ListWorkflowRulesRequest{ - Namespace: s.Namespace().String(), + nsResp, err := env.FrontendClient().ListWorkflowRules(s.Context(), &workflowservice.ListWorkflowRulesRequest{ + Namespace: env.Namespace().String(), }) require.NoError(t, err) require.NotNil(t, nsResp) @@ -568,15 +541,15 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_PrePause() { // 3. Let namespace config propagate to the history service. // There is no good way to check if the namespace config has propagated to the history service - err = util.InterruptibleSleep(ctx, 2*time.Second) + err = util.InterruptibleSleep(s.Context(), 2*time.Second) s.NoError(err) // 4. Start workflow - workflowRun := s.createWorkflow(ctx, testRetryTaskWorkflow.WorkflowFuncForPrePause) + workflowRun := s.createWorkflow(env, testRetryTaskWorkflow.WorkflowFuncForPrePause) // 5. Wait for activity to be paused by rule. This should happen in the recording activity task started s.EventuallyWithT(func(t *assert.CollectT) { - description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := env.SdkClient().DescribeWorkflowExecution(s.Context(), workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.PendingActivities, 1) require.Equal(t, activityType, description.PendingActivities[0].GetActivityType().GetName()) @@ -588,7 +561,7 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_PrePause() { }, 5*time.Second, 200*time.Millisecond) // make sure activity pause info is set - description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := env.SdkClient().DescribeWorkflowExecution(s.Context(), workflowRun.GetID(), workflowRun.GetRunID()) s.NoError(err) s.Len(description.PendingActivities, 1) s.True(description.PendingActivities[0].Paused) @@ -596,8 +569,8 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_PrePause() { s.Equal(ruleID, description.PendingActivities[0].PauseInfo.GetRule().GetRuleId()) // 6. Remove the rule so it didn't interfere with the activity - deleteRuleResponse, err := s.FrontendClient().DeleteWorkflowRule(ctx, &workflowservice.DeleteWorkflowRuleRequest{ - Namespace: s.Namespace().String(), + deleteRuleResponse, err := env.FrontendClient().DeleteWorkflowRule(s.Context(), &workflowservice.DeleteWorkflowRuleRequest{ + Namespace: env.Namespace().String(), RuleId: ruleID, }) s.NoError(err) @@ -605,8 +578,8 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_PrePause() { // 7. Make sure there is no rules in frontend s.EventuallyWithT(func(t *assert.CollectT) { - nsResp, err := s.FrontendClient().ListWorkflowRules(ctx, &workflowservice.ListWorkflowRulesRequest{ - Namespace: s.Namespace().String(), + nsResp, err := env.FrontendClient().ListWorkflowRules(s.Context(), &workflowservice.ListWorkflowRulesRequest{ + Namespace: env.Namespace().String(), }) require.NoError(t, err) require.NotNil(t, nsResp) @@ -615,12 +588,12 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_PrePause() { // 8. Let namespace config changes propagate to the history service. // There is no good way to check if the namespace config has propagated to the history service - err = util.InterruptibleSleep(ctx, 2*time.Second) + err = util.InterruptibleSleep(s.Context(), 2*time.Second) s.NoError(err) // 9. Unpause the activity. this will also trigger the activity - _, err = s.FrontendClient().UnpauseActivity(ctx, &workflowservice.UnpauseActivityRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().UnpauseActivity(s.Context(), &workflowservice.UnpauseActivityRequest{ + Namespace: env.Namespace().String(), Execution: &commonpb.WorkflowExecution{ WorkflowId: workflowRun.GetID(), }, @@ -630,7 +603,7 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_PrePause() { // 10. Wait for activity to be unpaused s.EventuallyWithT(func(t *assert.CollectT) { - description, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowRun.GetID(), workflowRun.GetRunID()) + description, err := env.SdkClient().DescribeWorkflowExecution(s.Context(), workflowRun.GetID(), workflowRun.GetRunID()) require.NoError(t, err) require.Len(t, description.PendingActivities, 1) require.Equal(t, activityType, description.PendingActivities[0].GetActivityType().GetName()) @@ -643,15 +616,15 @@ func (s *ActivityApiRulesClientTestSuite) TestActivityRulesApi_PrePause() { // 12. Wait for workflow to finish var out string - err = workflowRun.Get(ctx, &out) + err = workflowRun.Get(s.Context(), &out) s.NoError(err) } func (s *ActivityApiRulesClientTestSuite) createPauseRuleRequest( - activityType string, ruleID string, + namespace, activityType, ruleID string, ) *workflowservice.CreateWorkflowRuleRequest { createRuleRequest := &workflowservice.CreateWorkflowRuleRequest{ - Namespace: s.Namespace().String(), + Namespace: namespace, Spec: &rulespb.WorkflowRuleSpec{ Id: ruleID, Trigger: &rulespb.WorkflowRuleSpec_ActivityStart{ From cd87889af492930608a08a9a92378b65bf74fd9a Mon Sep 17 00:00:00 2001 From: Rodrigo Zhou Date: Tue, 19 May 2026 15:05:09 -0700 Subject: [PATCH 61/73] [CHASM Visibility] Filter nil values from memo (#10335) ## What changed? Remove keys with `nil` value from the memo when instantiating CHASM Visibility. ## Why? `nil` value is used to remove keys from the memo, and it needs to match the behavior in workflows. ## How did you test it? - [x] built - [ ] run locally and tested manually - [x] covered by existing tests - [x] added new unit test(s) - [ ] added new functional test(s) ## Potential risks --- chasm/visibility.go | 14 ++++++++++---- chasm/visibility_test.go | 11 ++++++----- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/chasm/visibility.go b/chasm/visibility.go index 50f74530ddc..2258528924f 100644 --- a/chasm/visibility.go +++ b/chasm/visibility.go @@ -169,10 +169,13 @@ func NewVisibilityWithData( &commonpb.SearchAttributes{IndexedFields: filteredSA}, ) } - if len(customMemo) != 0 { + + // Filter out nil/empty payload values for memo. + filteredMemo := payload.MergeMapOfPayload(nil, customMemo) + if len(filteredMemo) != 0 { visibility.Memo = NewDataField( mutableContext, - &commonpb.Memo{Fields: customMemo}, + &commonpb.Memo{Fields: filteredMemo}, ) } @@ -307,7 +310,10 @@ func (v *Visibility) ReplaceCustomMemo( mutableContext MutableContext, customMemo map[string]*commonpb.Payload, ) { - if len(customMemo) == 0 { + // Filter out nil/empty payload values for memo. + filteredMemo := payload.MergeMapOfPayload(nil, customMemo) + + if len(filteredMemo) == 0 { _, ok := v.Memo.TryGet(mutableContext) if !ok { // Already empty, no-op @@ -318,7 +324,7 @@ func (v *Visibility) ReplaceCustomMemo( } else { v.Memo = NewDataField( mutableContext, - &commonpb.Memo{Fields: customMemo}, + &commonpb.Memo{Fields: filteredMemo}, ) } diff --git a/chasm/visibility_test.go b/chasm/visibility_test.go index 4308791e60d..bfe4c6dfab1 100644 --- a/chasm/visibility_test.go +++ b/chasm/visibility_test.go @@ -139,7 +139,7 @@ func (s *visibilitySuite) TestNewVisibilityWithData_FilterNilSearchAttributes() "nilKey1": nil, "nilKey2": nil, } - // Memo with 1 valid and 2 nil values - nil values should NOT be filtered out + // Memo with 1 valid and 2 nil values - nil values should be filtered out customMemo := map[string]*commonpb.Payload{ stringKey: s.mustEncode(stringVal), "nilKey1": nil, @@ -149,8 +149,8 @@ func (s *visibilitySuite) TestNewVisibilityWithData_FilterNilSearchAttributes() // SA should have only 1 field (nil values filtered out) s.Len(visibility.SA.Get(s.mockContext).IndexedFields, 1) s.NotNil(visibility.SA.Get(s.mockContext).IndexedFields[stringKey]) - // Memo should have all 3 fields (nil values NOT filtered) - s.Len(visibility.Memo.Get(s.mockContext).Fields, 3) + // Memo should have only 1 field (nil values filtered out) + s.Len(visibility.Memo.Get(s.mockContext).Fields, 1) s.NotNil(visibility.Memo.Get(s.mockContext).Fields[stringKey]) } @@ -314,8 +314,9 @@ func (s *visibilitySuite) TestReplaceCustomMemo() { s.visibility.ReplaceCustomMemo( s.mockMutableContext, map[string]*commonpb.Payload{ - floatKey: s.mustEncode(floatVal), - byteKey: s.mustEncode(byteVal), + floatKey: s.mustEncode(floatVal), + byteKey: s.mustEncode(byteVal), + stringKey: nil, // nil value must be filtered out }, ) s.Len(s.mockMutableContext.Tasks, 2) From f1fe14b1f5ff0d9b59d74cf80a98d7c3fd0db651 Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Tue, 19 May 2026 15:31:15 -0700 Subject: [PATCH 62/73] Callback for workflow update support (#9614) ## What changed? Added support for Nexus workflow update completion callbacks via CHASM. This allows a Nexus caller to be notified when a workflow update completes by attaching completion callbacks to the update request. ## Why? Nexus operations that target workflow updates need a way to receive completion notifications. Without this, a Nexus caller that sends an update has no async mechanism to learn when the update finishes. Completion callbacks enable the same async notification pattern that already exists for workflow-level Nexus operations. ## How did you test it? - [ ] built - [x] run locally and tested manually - [ ] covered by existing tests - [x] added new unit test(s) - [x] added new functional test(s) ## Potential risks Touches speculative workflow updates, they are always hard to reason about. Tried to compensate with lots of test coverage. Note: Needs this API PR https://github.com/temporalio/api/pull/742/changes --- > [!NOTE] > **High Risk** > Touches workflow update state machine and mutable state event handling to persist/trigger per-update callbacks, including close/retry/reset paths, which is complex and can affect correctness of update outcomes and callback delivery. > > **Overview** > Adds **workflow update completion callbacks** via CHASM so Nexus callers can register callbacks on `UpdateWorkflowExecution` and have them fired on update completion or workflow close. > > This introduces a `WorkflowUpdate` CHASM component with new `UpdateState` protobuf (including persisted `rejection_failure`), stores update callbacks under `Workflow.Updates`, and extends callback processing to handle *update-level* callbacks on update completion, rejection (including reset/reapply), and on run transitions (retry/timeout/continue-as-new) where update callbacks must fire even if workflow-level callbacks are inherited. > > It also adds dynamic config gates/limits (`EnableWorkflowUpdateCallbacks`, `MaxCallbacksPerUpdateID`), updates `DescribeWorkflow` to surface update callback triggers, extends mutable state/history builder APIs to carry per-update callback options in `WorkflowExecutionOptionsUpdated`, and adds `Update.AttachCallbacks` logic to persist/flush callbacks (including buffering while `stateSent`, request-id dedup, and stricter validation requiring `request_id` when callbacks are present). > > Reviewed by [Cursor Bugbot](https://cursor.com/bugbot) for commit 4484fee104d73344ef5370c468008440c7df1c6a. Bugbot is set up for automated code reviews on this repo. Configure [here](https://www.cursor.com/dashboard/bugbot). --------- Co-authored-by: long-nt-tran --- .../v1/update_state.go-helpers.pb.go | 43 + .../gen/workflowpb/v1/update_state.pb.go | 137 ++ chasm/lib/workflow/library.go | 1 + .../lib/workflow/proto/v1/update_state.proto | 14 + chasm/lib/workflow/workflow.go | 178 ++- chasm/lib/workflow/workflow_update.go | 67 + chasm/ms_pointer.go | 5 + chasm/node_backend_mock.go | 12 + chasm/tree.go | 5 + common/dynamicconfig/constants.go | 12 + go.mod | 3 +- go.sum | 6 +- service/frontend/namespace_handler.go | 1 + service/frontend/service.go | 2 + service/history/api/describeworkflow/api.go | 46 +- service/history/api/pollupdate/api_test.go | 4 + service/history/api/startworkflow/api.go | 1 + service/history/api/updateworkflow/api.go | 47 + .../history/api/updateworkflowoptions/api.go | 2 +- .../api/updateworkflowoptions/api_test.go | 4 +- service/history/configs/config.go | 10 +- .../history/historybuilder/event_factory.go | 2 + .../history/historybuilder/history_builder.go | 2 + .../history_builder_categorization_test.go | 2 +- service/history/interfaces/mutable_state.go | 5 +- .../history/interfaces/mutable_state_mock.go | 24 +- service/history/ndc/events_reapplier_test.go | 2 + service/history/ndc/workflow_resetter.go | 1 + service/history/ndc/workflow_resetter_test.go | 2 + .../history/workflow/mutable_state_impl.go | 481 ++++-- .../workflow/mutable_state_impl_test.go | 14 +- .../history/workflow/update/abort_reason.go | 18 +- .../workflow/update/errors_failures.go | 2 +- .../history/workflow/update/export_test.go | 3 +- service/history/workflow/update/store.go | 30 + .../workflow/update/store_mock_test.go | 46 +- service/history/workflow/update/update.go | 178 ++- .../history/workflow/update/update_test.go | 409 +++++ service/history/workflow/update/validation.go | 10 + tests/nexus_workflow_update_test.go | 1359 +++++++++++++++++ tests/update_workflow_sdk_test.go | 96 ++ 41 files changed, 3092 insertions(+), 194 deletions(-) create mode 100644 chasm/lib/workflow/gen/workflowpb/v1/update_state.go-helpers.pb.go create mode 100644 chasm/lib/workflow/gen/workflowpb/v1/update_state.pb.go create mode 100644 chasm/lib/workflow/proto/v1/update_state.proto create mode 100644 chasm/lib/workflow/workflow_update.go create mode 100644 tests/nexus_workflow_update_test.go diff --git a/chasm/lib/workflow/gen/workflowpb/v1/update_state.go-helpers.pb.go b/chasm/lib/workflow/gen/workflowpb/v1/update_state.go-helpers.pb.go new file mode 100644 index 00000000000..7dd8ceec129 --- /dev/null +++ b/chasm/lib/workflow/gen/workflowpb/v1/update_state.go-helpers.pb.go @@ -0,0 +1,43 @@ +// Code generated by protoc-gen-go-helpers. DO NOT EDIT. +package workflowpb + +import ( + "google.golang.org/protobuf/proto" +) + +// Marshal an object of type UpdateState to the protobuf v3 wire format +func (val *UpdateState) Marshal() ([]byte, error) { + return proto.Marshal(val) +} + +// Unmarshal an object of type UpdateState from the protobuf v3 wire format +func (val *UpdateState) Unmarshal(buf []byte) error { + return proto.Unmarshal(buf, val) +} + +// Size returns the size of the object, in bytes, once serialized +func (val *UpdateState) Size() int { + return proto.Size(val) +} + +// Equal returns whether two UpdateState values are equivalent by recursively +// comparing the message's fields. +// For more information see the documentation for +// https://pkg.go.dev/google.golang.org/protobuf/proto#Equal +func (this *UpdateState) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + var that1 *UpdateState + switch t := that.(type) { + case *UpdateState: + that1 = t + case UpdateState: + that1 = &t + default: + return false + } + + return proto.Equal(this, that1) +} diff --git a/chasm/lib/workflow/gen/workflowpb/v1/update_state.pb.go b/chasm/lib/workflow/gen/workflowpb/v1/update_state.pb.go new file mode 100644 index 00000000000..422b8bf5a69 --- /dev/null +++ b/chasm/lib/workflow/gen/workflowpb/v1/update_state.pb.go @@ -0,0 +1,137 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// plugins: +// protoc-gen-go +// protoc +// source: temporal/server/chasm/lib/workflow/proto/v1/update_state.proto + +package workflowpb + +import ( + reflect "reflect" + sync "sync" + unsafe "unsafe" + + v1 "go.temporal.io/api/failure/v1" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type UpdateState struct { + state protoimpl.MessageState `protogen:"open.v1"` + UpdateId string `protobuf:"bytes,1,opt,name=update_id,json=updateId,proto3" json:"update_id,omitempty"` + // Populated when the update was rejected by a validator. + // Used to resolve the update outcome for callbacks on rejected updates. + RejectionFailure *v1.Failure `protobuf:"bytes,2,opt,name=rejection_failure,json=rejectionFailure,proto3" json:"rejection_failure,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *UpdateState) Reset() { + *x = UpdateState{} + mi := &file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *UpdateState) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*UpdateState) ProtoMessage() {} + +func (x *UpdateState) ProtoReflect() protoreflect.Message { + mi := &file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use UpdateState.ProtoReflect.Descriptor instead. +func (*UpdateState) Descriptor() ([]byte, []int) { + return file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescGZIP(), []int{0} +} + +func (x *UpdateState) GetUpdateId() string { + if x != nil { + return x.UpdateId + } + return "" +} + +func (x *UpdateState) GetRejectionFailure() *v1.Failure { + if x != nil { + return x.RejectionFailure + } + return nil +} + +var File_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto protoreflect.FileDescriptor + +const file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDesc = "" + + "\n" + + ">temporal/server/chasm/lib/workflow/proto/v1/update_state.proto\x12+temporal.server.chasm.lib.workflow.proto.v1\x1a%temporal/api/failure/v1/message.proto\"y\n" + + "\vUpdateState\x12\x1b\n" + + "\tupdate_id\x18\x01 \x01(\tR\bupdateId\x12M\n" + + "\x11rejection_failure\x18\x02 \x01(\v2 .temporal.api.failure.v1.FailureR\x10rejectionFailureBDZBgo.temporal.io/server/chasm/lib/workflow/gen/workflowpb;workflowpbb\x06proto3" + +var ( + file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescOnce sync.Once + file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescData []byte +) + +func file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescGZIP() []byte { + file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescOnce.Do(func() { + file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDesc), len(file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDesc))) + }) + return file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDescData +} + +var file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_goTypes = []any{ + (*UpdateState)(nil), // 0: temporal.server.chasm.lib.workflow.proto.v1.UpdateState + (*v1.Failure)(nil), // 1: temporal.api.failure.v1.Failure +} +var file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_depIdxs = []int32{ + 1, // 0: temporal.server.chasm.lib.workflow.proto.v1.UpdateState.rejection_failure:type_name -> temporal.api.failure.v1.Failure + 1, // [1:1] is the sub-list for method output_type + 1, // [1:1] is the sub-list for method input_type + 1, // [1:1] is the sub-list for extension type_name + 1, // [1:1] is the sub-list for extension extendee + 0, // [0:1] is the sub-list for field type_name +} + +func init() { file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_init() } +func file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_init() { + if File_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDesc), len(file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_rawDesc)), + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_goTypes, + DependencyIndexes: file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_depIdxs, + MessageInfos: file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_msgTypes, + }.Build() + File_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto = out.File + file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_goTypes = nil + file_temporal_server_chasm_lib_workflow_proto_v1_update_state_proto_depIdxs = nil +} diff --git a/chasm/lib/workflow/library.go b/chasm/lib/workflow/library.go index 759baf124bb..88be1d5b864 100644 --- a/chasm/lib/workflow/library.go +++ b/chasm/lib/workflow/library.go @@ -46,6 +46,7 @@ func (l *library) Components() []*chasm.RegistrableComponent { chasm.NewRegistrableComponent[*Workflow](chasm.WorkflowComponentName, chasm.WithContextValues(map[any]any{ ctxKeyWorkflowContext: &workflowContext{registry: l.registry}, })), + chasm.NewRegistrableComponent[*WorkflowUpdate]("update"), } } diff --git a/chasm/lib/workflow/proto/v1/update_state.proto b/chasm/lib/workflow/proto/v1/update_state.proto new file mode 100644 index 00000000000..8d1e67bea3d --- /dev/null +++ b/chasm/lib/workflow/proto/v1/update_state.proto @@ -0,0 +1,14 @@ +syntax = "proto3"; + +package temporal.server.chasm.lib.workflow.proto.v1; + +import "temporal/api/failure/v1/message.proto"; + +option go_package = "go.temporal.io/server/chasm/lib/workflow/gen/workflowpb;workflowpb"; + +message UpdateState { + string update_id = 1; + // Populated when the update was rejected by a validator. + // Used to resolve the update outcome for callbacks on rejected updates. + temporal.api.failure.v1.Failure rejection_failure = 2; +} diff --git a/chasm/lib/workflow/workflow.go b/chasm/lib/workflow/workflow.go index 8e3148cc03a..2b5409a4147 100644 --- a/chasm/lib/workflow/workflow.go +++ b/chasm/lib/workflow/workflow.go @@ -4,6 +4,7 @@ import ( "fmt" commonpb "go.temporal.io/api/common/v1" + failurepb "go.temporal.io/api/failure/v1" historypb "go.temporal.io/api/history/v1" "go.temporal.io/api/serviceerror" "go.temporal.io/server/chasm" @@ -35,6 +36,9 @@ type Workflow struct { // IncomingSignals map is used to track incoming signals, keyed by request ID, // to allow DescribeWorkflow to resolve RequestIDRef signal backlinks. IncomingSignals chasm.Map[string, *chasmworkflowpb.IncomingSignalData] + + // Updates indexed by update ID, used to store the update components. + Updates chasm.Map[string, *WorkflowUpdate] } func NewWorkflow( @@ -68,35 +72,92 @@ func (w *Workflow) Terminate( return chasm.TerminateComponentResponse{}, serviceerror.NewInternal("workflow root Terminate should not be called") } -// AddCompletionCallbacks creates completion callbacks using the CHASM implementation. -// maxCallbacksPerWorkflow is the configured maximum number of callbacks allowed per workflow. -func (w *Workflow) AddCompletionCallbacks( - ctx chasm.MutableContext, - eventTime *timestamppb.Timestamp, - requestID string, - completionCallbacks []*commonpb.Callback, - maxCallbacksPerWorkflow int, -) error { - // Check CHASM max callbacks limit - currentCallbackCount := len(w.Callbacks) - if len(completionCallbacks)+currentCallbackCount > maxCallbacksPerWorkflow { +// ProcessCloseCallbacks triggers "WorkflowClosed" callbacks using the CHASM implementation. +// It schedules all workflow-level and update-level callbacks that are in STANDBY state. +func (w *Workflow) ProcessCloseCallbacks(ctx chasm.MutableContext) error { + if err := callback.ScheduleStandbyCallbacks(ctx, w.Callbacks); err != nil { + return err + } + return w.ProcessAllUpdateCloseCallbacks(ctx) +} + +// ProcessAllUpdateCloseCallbacks triggers callbacks for all updates without touching +// workflow-level callbacks. This is used when the workflow is continuing to a new run +// (ContinueAsNew, retry, cron): workflow-level callbacks are inherited by the new run, +// but update callbacks must fire now because the update was aborted on the old run. +func (w *Workflow) ProcessAllUpdateCloseCallbacks(ctx chasm.MutableContext) error { + for _, updateField := range w.Updates { + if err := callback.ScheduleStandbyCallbacks(ctx, updateField.Get(ctx).Callbacks); err != nil { + return err + } + } + return nil +} + +// ProcessUpdateCallbacks triggers callbacks for a single updateID if exists. +func (w *Workflow) ProcessUpdateCallbacks(ctx chasm.MutableContext, updateID string) error { + update, exists := w.Updates[updateID] + if !exists { + return serviceerror.NewNotFoundf("update with ID %s not found", updateID) + } + return callback.ScheduleStandbyCallbacks(ctx, update.Get(ctx).Callbacks) +} + +// RejectUpdate stores the rejection failure on the WorkflowUpdate component and +// fires any pending callbacks. This is used when a reapplied update (after reset) +// is rejected by the worker's validator - the callbacks need to deliver the +// rejection failure to the caller. +func (w *Workflow) RejectUpdate(ctx chasm.MutableContext, updateID string, rejectionFailure *failurepb.Failure) error { + updateField, exists := w.Updates[updateID] + if !exists { + return nil // no callbacks registered for this update + } + + upd := updateField.Get(ctx) + upd.RejectionFailure = rejectionFailure + + return callback.ScheduleStandbyCallbacks(ctx, upd.Callbacks) +} + +// totalCallbackCount returns the total number of callbacks across workflow-level +// and all update-level callback maps. +func (w *Workflow) totalCallbackCount(ctx chasm.Context) int { + count := len(w.Callbacks) + for _, updateField := range w.Updates { + count += len(updateField.Get(ctx).Callbacks) + } + return count +} + +// checkWorkflowCallbackLimit returns an error if adding newCount callbacks would +// exceed the per-workflow maximum. +func (w *Workflow) checkWorkflowCallbackLimit(ctx chasm.Context, newCount, maxCallbacksPerWorkflow int) error { + current := w.totalCallbackCount(ctx) + if newCount+current > maxCallbacksPerWorkflow { return serviceerror.NewFailedPreconditionf( "cannot attach more than %d callbacks to a workflow (%d callbacks already attached)", maxCallbacksPerWorkflow, - currentCallbackCount, + current, ) } + return nil +} - // Initialize map if needed - if w.Callbacks == nil { - w.Callbacks = make(chasm.Map[string, *callback.Callback], len(completionCallbacks)) - } - - // Add each callback - for idx, cb := range completionCallbacks { - chasmCB := &callbackspb.Callback{ - Links: cb.GetLinks(), - } +// addCallbacksToMap converts common callbacks to CHASM callback components and +// inserts them into the target map, keyed by "-". +// +// All callbacks are validated up front, so target is not mutated unless every +// callback can be converted successfully (atomic from the caller's POV). +func addCallbacksToMap( + ctx chasm.MutableContext, + target chasm.Map[string, *callback.Callback], + requestID string, + eventTime *timestamppb.Timestamp, + completionCallbacks []*commonpb.Callback, +) error { + chasmCBs := make([]*callbackspb.Callback, len(completionCallbacks)) + for i, cb := range completionCallbacks { + chasmCB := &callbackspb.Callback{Links: cb.GetLinks()} switch variant := cb.Variant.(type) { case *commonpb.Callback_Nexus_: chasmCB.Variant = &callbackspb.Callback_Nexus_{ @@ -108,19 +169,84 @@ func (w *Workflow) AddCompletionCallbacks( default: return serviceerror.NewInvalidArgumentf("unsupported callback variant: %T", variant) } + chasmCBs[i] = chasmCB + } + for idx, chasmCB := range chasmCBs { // requestID (unique per API call) + idx (position within the request) ensures unique, idempotent callback IDs. // Unlike HSM callbacks, CHASM replicates entire trees rather than replaying events, so deterministic // cross-cluster IDs based on event version are not needed. id := fmt.Sprintf("%s-%d", requestID, idx) - - // Create and add callback + if _, exists := target[id]; exists { + // Already registered, skip to avoid overwriting. + continue + } callbackObj := callback.NewCallback(requestID, eventTime, &callbackspb.CallbackState{}, chasmCB) - w.Callbacks[id] = chasm.NewComponentField(ctx, callbackObj) + target[id] = chasm.NewComponentField(ctx, callbackObj) } return nil } +// AddCompletionCallbacks creates completion callbacks using the CHASM implementation. +// maxCallbacksPerWorkflow is the configured maximum number of callbacks allowed per workflow. +func (w *Workflow) AddCompletionCallbacks( + ctx chasm.MutableContext, + eventTime *timestamppb.Timestamp, + requestID string, + completionCallbacks []*commonpb.Callback, + maxCallbacksPerWorkflow int, +) error { + if err := w.checkWorkflowCallbackLimit(ctx, len(completionCallbacks), maxCallbacksPerWorkflow); err != nil { + return err + } + + if w.Callbacks == nil { + w.Callbacks = make(chasm.Map[string, *callback.Callback], len(completionCallbacks)) + } + + return addCallbacksToMap(ctx, w.Callbacks, requestID, eventTime, completionCallbacks) +} + +// AddUpdateCompletionCallbacks creates completion callbacks using the CHASM implementation. +// maxCallbacksPerWorkflow is the configured maximum number of callbacks allowed per workflow. +// maxCallbacksPerUpdateID is the configured maximum number of callbacks allowed per update ID. +func (w *Workflow) AddUpdateCompletionCallbacks( + ctx chasm.MutableContext, + eventTime *timestamppb.Timestamp, + updateID string, + requestID string, + completionCallbacks []*commonpb.Callback, + maxCallbacksPerWorkflow int, + maxCallbacksPerUpdateID int, +) error { + if err := w.checkWorkflowCallbackLimit(ctx, len(completionCallbacks), maxCallbacksPerWorkflow); err != nil { + return err + } + + if w.Updates == nil { + w.Updates = make(chasm.Map[string, *WorkflowUpdate], 1) + } + if _, ok := w.Updates[updateID]; !ok { + workflowUpdateObj := NewWorkflowUpdate(ctx, updateID, w.MSPointer) + workflowUpdateObj.Callbacks = make(chasm.Map[string, *callback.Callback], len(completionCallbacks)) + w.Updates[updateID] = chasm.NewComponentField(ctx, workflowUpdateObj) + } + + update := w.Updates[updateID].Get(ctx) + + currentCallbackCount := len(update.Callbacks) + if len(completionCallbacks)+currentCallbackCount > maxCallbacksPerUpdateID { + return serviceerror.NewFailedPreconditionf( + "cannot attach more than %d callbacks to update %q (%d callbacks already attached)", + maxCallbacksPerUpdateID, + updateID, + currentCallbackCount, + ) + } + + return addCallbacksToMap(ctx, update.Callbacks, requestID, eventTime, completionCallbacks) +} + // addAndApplyHistoryEvent adds a history event to the workflow and applies the corresponding event definition, // looked up by Go type. This is the preferred way to add and apply events as it provides go-to-definition navigation. func addAndApplyHistoryEvent[D EventDefinition]( diff --git a/chasm/lib/workflow/workflow_update.go b/chasm/lib/workflow/workflow_update.go new file mode 100644 index 00000000000..ff99af32c2c --- /dev/null +++ b/chasm/lib/workflow/workflow_update.go @@ -0,0 +1,67 @@ +package workflow + +import ( + "github.com/nexus-rpc/sdk-go/nexus" + "go.temporal.io/server/chasm" + "go.temporal.io/server/chasm/lib/callback" + "go.temporal.io/server/chasm/lib/workflow/gen/workflowpb/v1" + commonnexus "go.temporal.io/server/common/nexus" + "go.temporal.io/server/common/nexus/nexusrpc" +) + +type WorkflowUpdate struct { + chasm.UnimplementedComponent + + *workflowpb.UpdateState + + // MSPointer is a special in-memory field for accessing the underlying mutable state. + chasm.MSPointer + + // Callbacks map is used to store the callbacks for the update. + Callbacks chasm.Map[string, *callback.Callback] +} + +func NewWorkflowUpdate( + _ chasm.MutableContext, updateID string, msPointer chasm.MSPointer, +) *WorkflowUpdate { + return &WorkflowUpdate{ + UpdateState: &workflowpb.UpdateState{ + UpdateId: updateID, + }, + MSPointer: msPointer, + } +} + +func (u *WorkflowUpdate) LifecycleState( + _ chasm.Context, +) chasm.LifecycleState { + return chasm.LifecycleStateRunning +} + +func (u *WorkflowUpdate) GetNexusCompletion( + ctx chasm.Context, + requestID string, +) (nexusrpc.CompleteOperationOptions, error) { + // If the update was rejected, return the rejection failure directly instead + // of looking up a completion event that doesn't exist. + if rf := u.GetRejectionFailure(); rf != nil { + f, err := commonnexus.TemporalFailureToNexusFailure(rf) + if err != nil { + return nexusrpc.CompleteOperationOptions{}, err + } + opErr := &nexus.OperationError{ + Message: "update rejected", + State: nexus.OperationStateFailed, + Cause: &nexus.FailureError{Failure: f}, + } + if err := nexusrpc.MarkAsWrapperError(nexusrpc.DefaultFailureConverter(), opErr); err != nil { + return nexusrpc.CompleteOperationOptions{}, err + } + return nexusrpc.CompleteOperationOptions{ + Error: opErr, + }, nil + } + + // Retrieve the completion data from the underlying mutable state via MSPointer + return u.GetNexusUpdateCompletion(ctx, u.UpdateId, requestID) +} diff --git a/chasm/ms_pointer.go b/chasm/ms_pointer.go index 301da259c9e..cb2c3cf35ad 100644 --- a/chasm/ms_pointer.go +++ b/chasm/ms_pointer.go @@ -56,3 +56,8 @@ func (m MSPointer) GetNexusCompletion(ctx Context, requestID string) (nexusrpc.C func (m MSPointer) GetWorkflowTypeName() string { return m.backend.GetExecutionInfo().GetWorkflowTypeName() } + +// GetNexusUpdateCompletion retrieves the Nexus operation completion data for the given update ID and request ID from the underlying mutable state. +func (m MSPointer) GetNexusUpdateCompletion(ctx Context, updateID string, requestID string) (nexusrpc.CompleteOperationOptions, error) { + return m.backend.GetNexusUpdateCompletion(ctx.goContext(), updateID, requestID) +} diff --git a/chasm/node_backend_mock.go b/chasm/node_backend_mock.go index 6ddc1a815c1..0fcc7faba92 100644 --- a/chasm/node_backend_mock.go +++ b/chasm/node_backend_mock.go @@ -32,6 +32,7 @@ type MockNodeBackend struct { HandleUpdateWorkflowStateStatus func(state enumsspb.WorkflowExecutionState, status enumspb.WorkflowExecutionStatus) (bool, error) HandleIsWorkflow func() bool HandleGetNexusCompletion func(ctx context.Context, requestID string) (nexusrpc.CompleteOperationOptions, error) + HandleGetNexusUpdateCompletion func(ctx context.Context, updateID string, requestID string) (nexusrpc.CompleteOperationOptions, error) HandleAddHistoryEvent func(t enumspb.EventType, setAttributes func(*historypb.HistoryEvent)) *historypb.HistoryEvent HandleLoadHistoryEvent func(ctx context.Context, token []byte) (*historypb.HistoryEvent, error) HandleGenerateEventLoadToken func(event *historypb.HistoryEvent) ([]byte, error) @@ -231,6 +232,17 @@ func (m *MockNodeBackend) EndpointRegistry() EndpointRegistry { return nil } +func (m *MockNodeBackend) GetNexusUpdateCompletion( + ctx context.Context, + updateID string, + requestID string, +) (nexusrpc.CompleteOperationOptions, error) { + if m.HandleGetNexusUpdateCompletion != nil { + return m.HandleGetNexusUpdateCompletion(ctx, updateID, requestID) + } + return nexusrpc.CompleteOperationOptions{}, nil +} + func (m *MockNodeBackend) NumTasksAdded() int { m.mu.Lock() defer m.mu.Unlock() diff --git a/chasm/tree.go b/chasm/tree.go index 3b9e4cb4fe9..540af938509 100644 --- a/chasm/tree.go +++ b/chasm/tree.go @@ -220,6 +220,11 @@ type ( ctx context.Context, requestID string, ) (nexusrpc.CompleteOperationOptions, error) + GetNexusUpdateCompletion( + ctx context.Context, + updateID string, + requestID string, + ) (nexusrpc.CompleteOperationOptions, error) EndpointRegistry() EndpointRegistry } diff --git a/common/dynamicconfig/constants.go b/common/dynamicconfig/constants.go index 267f5e1fd5a..04557e0d1ae 100644 --- a/common/dynamicconfig/constants.go +++ b/common/dynamicconfig/constants.go @@ -1019,6 +1019,11 @@ so forwarding by endpoint ID will not work out of the box.`, 32, `MaxCallbacksPerWorkflow is the maximum number of callbacks that can be attached to a workflow.`, ) + MaxCallbacksPerUpdateID = NewNamespaceIntSetting( + "system.maxCallbacksPerUpdateID", + 32, + `MaxCallbacksPerUpdateID is the maximum number of callbacks that can be attached to a single update ID.`, + ) FrontendLinkMaxSize = NewNamespaceIntSetting( "frontend.linkMaxSize", 4000, // Links may include a workflow ID and namespace name, both of which are limited to a length of 1000. @@ -2975,6 +2980,13 @@ map to enable DescribeWorkflow to resolve RequestIDRef signal backlinks. Require Only enable once all servers in the fleet have been upgraded to a version that understands the IncomingSignals CHASM field.`, ) + EnableWorkflowUpdateCallbacks = NewNamespaceBoolSetting( + "history.enableUpdateCallbacks", + false, + `Controls whether completion callbacks are created for workflow updates using +the CHASM implementation. When disabled, new update callbacks will not be registered, +but existing callbacks will still be processed and fired.`, + ) VersionMembershipCacheTTL = NewGlobalDurationSetting( "history.versionMembershipCacheTTL", diff --git a/go.mod b/go.mod index 7e264e7ba5d..2192ce7eaa9 100644 --- a/go.mod +++ b/go.mod @@ -63,7 +63,7 @@ require ( go.opentelemetry.io/otel/sdk v1.43.0 go.opentelemetry.io/otel/sdk/metric v1.43.0 go.opentelemetry.io/otel/trace v1.43.0 - go.temporal.io/api v1.62.12 + go.temporal.io/api v1.62.13-0.20260519214255-11907b499103 go.temporal.io/auto-scaled-workers v0.0.0-20260407181057-edd947d743d2 go.temporal.io/sdk v1.41.1 go.uber.org/fx v1.24.0 @@ -99,6 +99,7 @@ require ( github.com/go-openapi/swag/typeutils v0.26.0 // indirect github.com/go-openapi/swag/yamlutils v0.26.0 // indirect github.com/hashicorp/go-version v1.9.0 // indirect + github.com/nexus-rpc/nexus-proto-annotations v0.1.0 // indirect go.opentelemetry.io/collector/featuregate v1.56.0 // indirect ) diff --git a/go.sum b/go.sum index d93212758de..b5753456f6e 100644 --- a/go.sum +++ b/go.sum @@ -319,6 +319,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= +github.com/nexus-rpc/nexus-proto-annotations v0.1.0 h1:2fELd+9sqUtNu6Fg//pw8YFsxOvp8vZ8hfP0nHhNI80= +github.com/nexus-rpc/nexus-proto-annotations v0.1.0/go.mod h1:n3UjF1bPCW8llR8tHvbxJ+27yPWrhpo8w/Yg1IOuY0Y= github.com/nexus-rpc/sdk-go v0.6.0 h1:QRgnP2zTbxEbiyWG/aXH8uSC5LV/Mg1fqb19jb4DBlo= github.com/nexus-rpc/sdk-go v0.6.0/go.mod h1:FHdPfVQwRuJFZFTF0Y2GOAxCrbIBNrcPna9slkGKPYk= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= @@ -469,8 +471,8 @@ go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.3.0 h1:R go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.3.0/go.mod h1:I89cynRj8y+383o7tEQVg2SVA6SRgDVIouWPUVXjx0U= go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.3.0 h1:CQvJSldHRUN6Z8jsUeYv8J0lXRvygALXIzsmAeCcZE0= go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.3.0/go.mod h1:xSQ+mEfJe/GjK1LXEyVOoSI1N9JV9ZI923X5kup43W4= -go.temporal.io/api v1.62.12 h1:627rVnItegQmrszg1bH4vfyc/1uNo5qCereCNkvZefw= -go.temporal.io/api v1.62.12/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.62.13-0.20260519214255-11907b499103 h1:mPaS2+VdLF+TEcQ7nbAqjFIJPPmLzS+Tr0qDmrzvlG0= +go.temporal.io/api v1.62.13-0.20260519214255-11907b499103/go.mod h1:0k75tRljEuELWGeXjEZZO7zYqBln4+1FrG6+IMOMy7Q= go.temporal.io/auto-scaled-workers v0.0.0-20260407181057-edd947d743d2 h1:1hKeH3GyR6YD6LKMHGCZ76t6h1Sgha0hXVQBxWi3dlQ= go.temporal.io/auto-scaled-workers v0.0.0-20260407181057-edd947d743d2/go.mod h1:T8dnzVPeO+gaUTj9eDgm/lT2lZH4+JXNvrGaQGyVi50= go.temporal.io/sdk v1.41.1 h1:yOpvsHyDD1lNuwlGBv/SUodCPhjv9nDeC9lLHW/fJUA= diff --git a/service/frontend/namespace_handler.go b/service/frontend/namespace_handler.go index 087bc5f7de9..25a7c27cc24 100644 --- a/service/frontend/namespace_handler.go +++ b/service/frontend/namespace_handler.go @@ -906,6 +906,7 @@ func (d *namespaceHandler) createResponse( StandaloneNexusOperation: d.config.EnableChasm(info.Name) && d.config.StandaloneNexusOperationsEnabled(info.Name), WorkerPollCompleteOnShutdown: d.config.EnableCancelWorkerPollsOnShutdown(info.Name), WorkerCommands: d.config.WorkerCommandsEnabled(info.Name), + WorkflowUpdateCallbacks: d.config.EnableWorkflowUpdateCallbacks(info.Name), PollerAutoscaling: true, }, Limits: &namespacepb.NamespaceInfo_Limits{ diff --git a/service/frontend/service.go b/service/frontend/service.go index 0a743002912..dacde0b073d 100644 --- a/service/frontend/service.go +++ b/service/frontend/service.go @@ -191,6 +191,7 @@ type Config struct { EnableUpdateWorkflowExecution dynamicconfig.BoolPropertyFnWithNamespaceFilter EnableUpdateWorkflowExecutionAsyncAccepted dynamicconfig.BoolPropertyFnWithNamespaceFilter + EnableWorkflowUpdateCallbacks dynamicconfig.BoolPropertyFnWithNamespaceFilter NumConsecutiveWorkflowTaskProblemsToTriggerSearchAttribute dynamicconfig.IntPropertyFnWithNamespaceFilter EnableWorkerVersioningData dynamicconfig.BoolPropertyFnWithNamespaceFilter @@ -367,6 +368,7 @@ func NewConfig( EnableUpdateWorkflowExecution: dynamicconfig.FrontendEnableUpdateWorkflowExecution.Get(dc), EnableUpdateWorkflowExecutionAsyncAccepted: dynamicconfig.FrontendEnableUpdateWorkflowExecutionAsyncAccepted.Get(dc), + EnableWorkflowUpdateCallbacks: dynamicconfig.EnableWorkflowUpdateCallbacks.Get(dc), NumConsecutiveWorkflowTaskProblemsToTriggerSearchAttribute: dynamicconfig.NumConsecutiveWorkflowTaskProblemsToTriggerSearchAttribute.Get(dc), EnableWorkerVersioningData: dynamicconfig.FrontendEnableWorkerVersioningDataAPIs.Get(dc), diff --git a/service/history/api/describeworkflow/api.go b/service/history/api/describeworkflow/api.go index ca52e2ab638..938e007ed82 100644 --- a/service/history/api/describeworkflow/api.go +++ b/service/history/api/describeworkflow/api.go @@ -510,7 +510,11 @@ func buildCallbackInfosFromChasm( for _, field := range wf.Callbacks { callback := field.Get(chasmCtx) - callbackInfo, err := buildCallbackInfoFromChasm(ctx, namespaceID, callback, outboundQueueCBPool) + trigger := &workflowpb.CallbackInfo_Trigger{ + Variant: &workflowpb.CallbackInfo_Trigger_WorkflowClosed{}, + } + + callbackInfo, err := buildCallbackInfoFromChasm(ctx, namespaceID, callback, trigger, outboundQueueCBPool) if err != nil { logger.Error( "failed to build callback info from CHASM callback", @@ -526,6 +530,38 @@ func buildCallbackInfosFromChasm( } result = append(result, callbackInfo) } + // Collect update callbacks + for updateID, ufield := range wf.Updates { + updates := ufield.Get(chasmCtx) + + for _, ucfield := range updates.Callbacks { + callback := ucfield.Get(chasmCtx) + + trigger := &workflowpb.CallbackInfo_Trigger{ + Variant: &workflowpb.CallbackInfo_Trigger_UpdateWorkflowExecutionCompleted{ + UpdateWorkflowExecutionCompleted: &workflowpb.CallbackInfo_UpdateWorkflowExecutionCompleted{ + UpdateId: updateID, + }, + }, + } + + callbackInfo, err := buildCallbackInfoFromChasm(ctx, namespaceID, callback, trigger, outboundQueueCBPool) + if err != nil { + logger.Error( + "failed to build callback info from CHASM update callback", + tag.WorkflowNamespaceID(namespaceID.String()), + tag.WorkflowID(executionInfo.WorkflowId), + tag.WorkflowRunID(executionState.RunId), + tag.Error(err), + ) + return nil, serviceerror.NewInternal("failed to construct describe response") + } + if callbackInfo == nil { + continue + } + result = append(result, callbackInfo) + } + } return result, nil } @@ -535,6 +571,7 @@ func buildCallbackInfoFromChasm( ctx context.Context, namespaceID namespace.ID, callback *chasmcallback.Callback, + trigger *workflowpb.CallbackInfo_Trigger, outboundQueueCBPool *circuitbreakerpool.OutboundQueueCircuitBreakerPool, ) (*workflowpb.CallbackInfo, error) { // Create a circuit breaker state checker function @@ -547,7 +584,7 @@ func buildCallbackInfoFromChasm( return cb.State() != gobreaker.StateClosed } - return buildChasmCallbackInfo(ctx, namespaceID.String(), callback, circuitBreakerState) + return buildChasmCallbackInfo(ctx, namespaceID.String(), callback, trigger, circuitBreakerState) } // buildChasmCallbackInfo converts a single CHASM callback to API CallbackInfo format. @@ -556,6 +593,7 @@ func buildChasmCallbackInfo( ctx context.Context, namespaceID string, cb *chasmcallback.Callback, + trigger *workflowpb.CallbackInfo_Trigger, circuitBreakerState func(destination string) bool, ) (*workflowpb.CallbackInfo, error) { nexusVariant := cb.GetCallback().GetNexus() @@ -595,10 +633,6 @@ func buildChasmCallbackInfo( } } - trigger := &workflowpb.CallbackInfo_Trigger{ - Variant: &workflowpb.CallbackInfo_Trigger_WorkflowClosed{}, - } - return &workflowpb.CallbackInfo{ Callback: cbSpec, Trigger: trigger, diff --git a/service/history/api/pollupdate/api_test.go b/service/history/api/pollupdate/api_test.go index 29c78f6cdf9..55c37e01b64 100644 --- a/service/history/api/pollupdate/api_test.go +++ b/service/history/api/pollupdate/api_test.go @@ -58,6 +58,10 @@ type ( func (mockUpdateEventStore) OnAfterCommit(f func(context.Context)) { f(context.TODO()) } func (mockUpdateEventStore) OnAfterRollback(f func(context.Context)) {} func (mockUpdateEventStore) CanAddEvent() bool { return true } +func (mockUpdateEventStore) RejectWorkflowExecutionUpdate(string, *failurepb.Failure) error { + return nil +} +func (mockUpdateEventStore) HasRequestID(string) bool { return false } func (m mockWFConsistencyChecker) GetWorkflowLease( ctx context.Context, diff --git a/service/history/api/startworkflow/api.go b/service/history/api/startworkflow/api.go index 5127311a50e..ee5e1b131f9 100644 --- a/service/history/api/startworkflow/api.go +++ b/service/history/api/startworkflow/api.go @@ -694,6 +694,7 @@ func (s *Starter) handleUseExistingWorkflowOnConflictOptions( "", // identity nil, // priority nil, // timeSkippingConfig + nil, // workflowUpdateOptions ) return api.UpdateWorkflowWithoutWorkflowTask, err }, diff --git a/service/history/api/updateworkflow/api.go b/service/history/api/updateworkflow/api.go index 9ed6d8993f2..4e283b297a2 100644 --- a/service/history/api/updateworkflow/api.go +++ b/service/history/api/updateworkflow/api.go @@ -165,6 +165,17 @@ func (u *Updater) ApplyRequest( return nil, err } + callbacksAttached, err := u.upd.AttachCallbacks(updateRequest, workflow.WithEffects(effect.Immediate(ctx), ms)) + if err != nil { + return nil, err + } + if callbacksAttached { + return &api.UpdateWorkflowAction{ + Noop: false, + CreateWorkflowTask: false, + }, nil + } + // If WT is scheduled, but not started, updates will be attached to it, when WT is started. // If WT has already started, new speculative WT will be created when started WT completes. // If update is duplicate, then WT for this update was already created. @@ -263,6 +274,42 @@ func (u *Updater) OnSuccess( return nil, err } resp := u.CreateResponse(u.wfKey, status.Outcome, status.Stage) + + // Attach a link to the response. For accepted/completed updates, use a WorkflowEvent link + // with a RequestIdReference pointing to the accepted event. For rejected updates (stage + // COMPLETED with a failure outcome and no acceptance), use a Workflow link since rejected + // updates don't write any event to history. + requestID := u.req.GetRequest().GetRequest().GetRequestId() + if status.Outcome.GetFailure() != nil && status.Stage == enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED { + // Rejected update: no event in history, link to the workflow itself. + resp.Response.Link = &commonpb.Link{ + Variant: &commonpb.Link_Workflow_{ + Workflow: &commonpb.Link_Workflow{ + Namespace: u.req.Request.Namespace, + WorkflowId: u.wfKey.WorkflowID, + RunId: u.wfKey.RunID, + Reason: "Update rejected", + }, + }, + } + } else if status.Stage == enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED || status.Stage == enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED { + // Accepted or completed update: link to the accepted event. + resp.Response.Link = &commonpb.Link{ + Variant: &commonpb.Link_WorkflowEvent_{ + WorkflowEvent: &commonpb.Link_WorkflowEvent{ + Namespace: u.req.Request.Namespace, + WorkflowId: u.wfKey.WorkflowID, + RunId: u.wfKey.RunID, + Reference: &commonpb.Link_WorkflowEvent_RequestIdRef{ + RequestIdRef: &commonpb.Link_WorkflowEvent_RequestIdReference{ + RequestId: requestID, + EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ACCEPTED, + }, + }, + }, + }, + } + } return resp, nil } diff --git a/service/history/api/updateworkflowoptions/api.go b/service/history/api/updateworkflowoptions/api.go index a2defe8f766..43204a5f40d 100644 --- a/service/history/api/updateworkflowoptions/api.go +++ b/service/history/api/updateworkflowoptions/api.go @@ -178,7 +178,7 @@ func MergeAndApply( if mergedOpts.GetVersioningOverride() == nil { unsetOverride = true } - _, err = ms.AddWorkflowExecutionOptionsUpdatedEvent(mergedOpts.GetVersioningOverride(), unsetOverride, "", nil, nil, identity, mergedOpts.GetPriority(), mergedOpts.GetTimeSkippingConfig()) + _, err = ms.AddWorkflowExecutionOptionsUpdatedEvent(mergedOpts.GetVersioningOverride(), unsetOverride, "", nil, nil, identity, mergedOpts.GetPriority(), mergedOpts.GetTimeSkippingConfig(), nil) if err != nil { return nil, hasChanges, err } diff --git a/service/history/api/updateworkflowoptions/api_test.go b/service/history/api/updateworkflowoptions/api_test.go index 07d31ec7289..8143b52cdd2 100644 --- a/service/history/api/updateworkflowoptions/api_test.go +++ b/service/history/api/updateworkflowoptions/api_test.go @@ -304,7 +304,7 @@ func (s *updateWorkflowOptionsSuite) TestInvoke_Success() { ).Return(&matchingservice.CheckTaskQueueVersionMembershipResponse{ IsMember: true, }, nil) - s.currentMutableState.EXPECT().AddWorkflowExecutionOptionsUpdatedEvent(expectedOverrideOptions.VersioningOverride, false, "", nil, nil, "", expectedOverrideOptions.Priority, expectedOverrideOptions.TimeSkippingConfig).Return(&historypb.HistoryEvent{}, nil) + s.currentMutableState.EXPECT().AddWorkflowExecutionOptionsUpdatedEvent(expectedOverrideOptions.VersioningOverride, false, "", nil, nil, "", expectedOverrideOptions.Priority, expectedOverrideOptions.TimeSkippingConfig, nil).Return(&historypb.HistoryEvent{}, nil) s.currentContext.EXPECT().UpdateWorkflowExecutionAsActive(gomock.Any(), s.shardContext).Return(nil) updateReq := &historyservice.UpdateWorkflowExecutionOptionsRequest{ @@ -498,7 +498,7 @@ func TestMergeAndApply_TimeSkippingConfig(t *testing.T) { Config: tc.initialConfig, }, }).AnyTimes() - ms.EXPECT().AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, "", nil, gomock.Any()).Return(&historypb.HistoryEvent{}, nil) + ms.EXPECT().AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, "", nil, gomock.Any(), gomock.Any()).Return(&historypb.HistoryEvent{}, nil) result, hasChanges, err := MergeAndApply(ms, tc.updateOptions, tc.updateMask, "") require.NoError(t, err) diff --git a/service/history/configs/config.go b/service/history/configs/config.go index 57cc6ae1e1d..e8e7c592eb4 100644 --- a/service/history/configs/config.go +++ b/service/history/configs/config.go @@ -70,9 +70,11 @@ type Config struct { EnableTransitionHistory dynamicconfig.BoolPropertyFnWithNamespaceFilter MaxCallbacksPerWorkflow dynamicconfig.IntPropertyFnWithNamespaceFilter MaxCallbacksPerExecution dynamicconfig.IntPropertyFnWithNamespaceFilter + MaxCallbacksPerUpdateID dynamicconfig.IntPropertyFnWithNamespaceFilter EnableChasm dynamicconfig.BoolPropertyFnWithNamespaceFilter EnableCHASMCallbacks dynamicconfig.BoolPropertyFnWithNamespaceFilter EnableCHASMSignalBacklinks dynamicconfig.BoolPropertyFnWithNamespaceFilter + EnableWorkflowUpdateCallbacks dynamicconfig.BoolPropertyFnWithNamespaceFilter ChasmMaxInMemoryPureTasks dynamicconfig.IntPropertyFn EnableCHASMSchedulerCreation dynamicconfig.BoolPropertyFnWithNamespaceFilter EnableCHASMSchedulerMigration dynamicconfig.BoolPropertyFnWithNamespaceFilter @@ -495,15 +497,17 @@ func NewConfig( EnableTransitionHistory: dynamicconfig.EnableTransitionHistory.Get(dc), MaxCallbacksPerWorkflow: dynamicconfig.MaxCallbacksPerWorkflow.Get(dc), MaxCallbacksPerExecution: callback.MaxPerExecution.Get(dc), + MaxCallbacksPerUpdateID: dynamicconfig.MaxCallbacksPerUpdateID.Get(dc), EnableChasm: dynamicconfig.EnableChasm.Get(dc), ChasmMaxInMemoryPureTasks: dynamicconfig.ChasmMaxInMemoryPureTasks.Get(dc), EnableCHASMSchedulerCreation: dynamicconfig.EnableCHASMSchedulerCreation.Get(dc), EnableCHASMSchedulerMigration: dynamicconfig.EnableCHASMSchedulerMigration.Get(dc), - EnableCHASMCallbacks: dynamicconfig.EnableCHASMCallbacks.Get(dc), - EnableCHASMSignalBacklinks: dynamicconfig.EnableCHASMSignalBacklinks.Get(dc), - ExternalPayloadsEnabled: dynamicconfig.ExternalPayloadsEnabled.Get(dc), + EnableCHASMCallbacks: dynamicconfig.EnableCHASMCallbacks.Get(dc), + EnableCHASMSignalBacklinks: dynamicconfig.EnableCHASMSignalBacklinks.Get(dc), + ExternalPayloadsEnabled: dynamicconfig.ExternalPayloadsEnabled.Get(dc), + EnableWorkflowUpdateCallbacks: dynamicconfig.EnableWorkflowUpdateCallbacks.Get(dc), EventsShardLevelCacheMaxSizeBytes: dynamicconfig.EventsCacheMaxSizeBytes.Get(dc), // 512KB EventsHostLevelCacheMaxSizeBytes: dynamicconfig.EventsHostLevelCacheMaxSizeBytes.Get(dc), // 256MB diff --git a/service/history/historybuilder/event_factory.go b/service/history/historybuilder/event_factory.go index 0d2cdb04695..04c51ecf37c 100644 --- a/service/history/historybuilder/event_factory.go +++ b/service/history/historybuilder/event_factory.go @@ -407,6 +407,7 @@ func (b *EventFactory) CreateWorkflowExecutionOptionsUpdatedEvent( identity string, priority *commonpb.Priority, timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, ) *historypb.HistoryEvent { event := b.createHistoryEvent(enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_OPTIONS_UPDATED, b.timeSource.Now()) event.Attributes = &historypb.HistoryEvent_WorkflowExecutionOptionsUpdatedEventAttributes{ @@ -418,6 +419,7 @@ func (b *EventFactory) CreateWorkflowExecutionOptionsUpdatedEvent( Identity: identity, Priority: priority, TimeSkippingConfig: timeSkippingConfig, + WorkflowUpdateOptions: workflowUpdateOptions, }, } event.Links = links diff --git a/service/history/historybuilder/history_builder.go b/service/history/historybuilder/history_builder.go index 35416a48396..bf69f2fc5ca 100644 --- a/service/history/historybuilder/history_builder.go +++ b/service/history/historybuilder/history_builder.go @@ -475,6 +475,7 @@ func (b *HistoryBuilder) AddWorkflowExecutionOptionsUpdatedEvent( identity string, priority *commonpb.Priority, timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, ) *historypb.HistoryEvent { event := b.EventFactory.CreateWorkflowExecutionOptionsUpdatedEvent( worker_versioning.ConvertOverrideToV32(versioningOverride), @@ -485,6 +486,7 @@ func (b *HistoryBuilder) AddWorkflowExecutionOptionsUpdatedEvent( identity, priority, timeSkippingConfig, + workflowUpdateOptions, ) event, _ = b.EventStore.add(event) return event diff --git a/service/history/historybuilder/history_builder_categorization_test.go b/service/history/historybuilder/history_builder_categorization_test.go index 14a9029a1c0..450212c90d5 100644 --- a/service/history/historybuilder/history_builder_categorization_test.go +++ b/service/history/historybuilder/history_builder_categorization_test.go @@ -208,7 +208,7 @@ func TestHistoryBuilder_FlushBufferToCurrentBatch(t *testing.T) { t.Errorf("expected 1 event in memBufferBatch got %d", len(hb.memBufferBatch)) } // add another event to memBufferBatch - hb.AddWorkflowExecutionOptionsUpdatedEvent(nil, false, "request-id-1", nil, nil, "", nil, nil) + hb.AddWorkflowExecutionOptionsUpdatedEvent(nil, false, "request-id-1", nil, nil, "", nil, nil, nil) if len(hb.memBufferBatch) != 2 { t.Errorf("expected 2 event in memBufferBatch got %d", len(hb.memBufferBatch)) } diff --git a/service/history/interfaces/mutable_state.go b/service/history/interfaces/mutable_state.go index a0b85253cf7..2698a95a663 100644 --- a/service/history/interfaces/mutable_state.go +++ b/service/history/interfaces/mutable_state.go @@ -127,10 +127,11 @@ type ( identity string, priority *commonpb.Priority, timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, ) (*historypb.HistoryEvent, error) - AddWorkflowExecutionUpdateAcceptedEvent(protocolInstanceID string, acceptedRequestMessageId string, acceptedRequestSequencingEventId int64, acceptedRequest *updatepb.Request) (*historypb.HistoryEvent, error) + AddWorkflowExecutionUpdateAcceptedEvent(updateID string, acceptedRequestMessageID string, acceptedRequestSequencingEventID int64, acceptedRequest *updatepb.Request) (*historypb.HistoryEvent, error) AddWorkflowExecutionUpdateCompletedEvent(acceptedEventID int64, updResp *updatepb.Response) (*historypb.HistoryEvent, error) - RejectWorkflowExecutionUpdate(protocolInstanceID string, updRejection *updatepb.Rejection) error + RejectWorkflowExecutionUpdate(updateID string, failure *failurepb.Failure) error AddWorkflowExecutionUpdateAdmittedEvent(request *updatepb.Request, origin enumspb.UpdateAdmittedEventOrigin) (*historypb.HistoryEvent, error) ApplyWorkflowExecutionUpdateAdmittedEvent(event *historypb.HistoryEvent, batchId int64) error VisitUpdates(visitor func(updID string, updInfo *persistencespb.UpdateInfo)) diff --git a/service/history/interfaces/mutable_state_mock.go b/service/history/interfaces/mutable_state_mock.go index 20378e67ac4..684b648df54 100644 --- a/service/history/interfaces/mutable_state_mock.go +++ b/service/history/interfaces/mutable_state_mock.go @@ -678,18 +678,18 @@ func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionCanceledEvent(arg0, } // AddWorkflowExecutionOptionsUpdatedEvent mocks base method. -func (m *MockMutableState) AddWorkflowExecutionOptionsUpdatedEvent(versioningOverride *workflow.VersioningOverride, unsetVersioningOverride bool, attachRequestID string, attachCompletionCallbacks []*common.Callback, links []*common.Link, identity string, priority *common.Priority, timeSkippingConfig *workflow.TimeSkippingConfig) (*history.HistoryEvent, error) { +func (m *MockMutableState) AddWorkflowExecutionOptionsUpdatedEvent(versioningOverride *workflow.VersioningOverride, unsetVersioningOverride bool, attachRequestID string, attachCompletionCallbacks []*common.Callback, links []*common.Link, identity string, priority *common.Priority, timeSkippingConfig *workflow.TimeSkippingConfig, workflowUpdateOptions []*history.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate) (*history.HistoryEvent, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "AddWorkflowExecutionOptionsUpdatedEvent", versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig) + ret := m.ctrl.Call(m, "AddWorkflowExecutionOptionsUpdatedEvent", versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig, workflowUpdateOptions) ret0, _ := ret[0].(*history.HistoryEvent) ret1, _ := ret[1].(error) return ret0, ret1 } // AddWorkflowExecutionOptionsUpdatedEvent indicates an expected call of AddWorkflowExecutionOptionsUpdatedEvent. -func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionOptionsUpdatedEvent(versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig any) *gomock.Call { +func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionOptionsUpdatedEvent(versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig, workflowUpdateOptions any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddWorkflowExecutionOptionsUpdatedEvent", reflect.TypeOf((*MockMutableState)(nil).AddWorkflowExecutionOptionsUpdatedEvent), versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddWorkflowExecutionOptionsUpdatedEvent", reflect.TypeOf((*MockMutableState)(nil).AddWorkflowExecutionOptionsUpdatedEvent), versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig, workflowUpdateOptions) } // AddWorkflowExecutionPausedEvent mocks base method. @@ -813,18 +813,18 @@ func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionUnpausedEvent(identi } // AddWorkflowExecutionUpdateAcceptedEvent mocks base method. -func (m *MockMutableState) AddWorkflowExecutionUpdateAcceptedEvent(protocolInstanceID, acceptedRequestMessageId string, acceptedRequestSequencingEventId int64, acceptedRequest *update.Request) (*history.HistoryEvent, error) { +func (m *MockMutableState) AddWorkflowExecutionUpdateAcceptedEvent(updateID, acceptedRequestMessageID string, acceptedRequestSequencingEventID int64, acceptedRequest *update.Request) (*history.HistoryEvent, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "AddWorkflowExecutionUpdateAcceptedEvent", protocolInstanceID, acceptedRequestMessageId, acceptedRequestSequencingEventId, acceptedRequest) + ret := m.ctrl.Call(m, "AddWorkflowExecutionUpdateAcceptedEvent", updateID, acceptedRequestMessageID, acceptedRequestSequencingEventID, acceptedRequest) ret0, _ := ret[0].(*history.HistoryEvent) ret1, _ := ret[1].(error) return ret0, ret1 } // AddWorkflowExecutionUpdateAcceptedEvent indicates an expected call of AddWorkflowExecutionUpdateAcceptedEvent. -func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionUpdateAcceptedEvent(protocolInstanceID, acceptedRequestMessageId, acceptedRequestSequencingEventId, acceptedRequest any) *gomock.Call { +func (mr *MockMutableStateMockRecorder) AddWorkflowExecutionUpdateAcceptedEvent(updateID, acceptedRequestMessageID, acceptedRequestSequencingEventID, acceptedRequest any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddWorkflowExecutionUpdateAcceptedEvent", reflect.TypeOf((*MockMutableState)(nil).AddWorkflowExecutionUpdateAcceptedEvent), protocolInstanceID, acceptedRequestMessageId, acceptedRequestSequencingEventId, acceptedRequest) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddWorkflowExecutionUpdateAcceptedEvent", reflect.TypeOf((*MockMutableState)(nil).AddWorkflowExecutionUpdateAcceptedEvent), updateID, acceptedRequestMessageID, acceptedRequestSequencingEventID, acceptedRequest) } // AddWorkflowExecutionUpdateAdmittedEvent mocks base method. @@ -3431,17 +3431,17 @@ func (mr *MockMutableStateMockRecorder) RegenerateActivityRetryTask(ai, newSched } // RejectWorkflowExecutionUpdate mocks base method. -func (m *MockMutableState) RejectWorkflowExecutionUpdate(protocolInstanceID string, updRejection *update.Rejection) error { +func (m *MockMutableState) RejectWorkflowExecutionUpdate(updateID string, arg1 *failure.Failure) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "RejectWorkflowExecutionUpdate", protocolInstanceID, updRejection) + ret := m.ctrl.Call(m, "RejectWorkflowExecutionUpdate", updateID, arg1) ret0, _ := ret[0].(error) return ret0 } // RejectWorkflowExecutionUpdate indicates an expected call of RejectWorkflowExecutionUpdate. -func (mr *MockMutableStateMockRecorder) RejectWorkflowExecutionUpdate(protocolInstanceID, updRejection any) *gomock.Call { +func (mr *MockMutableStateMockRecorder) RejectWorkflowExecutionUpdate(updateID, arg1 any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RejectWorkflowExecutionUpdate", reflect.TypeOf((*MockMutableState)(nil).RejectWorkflowExecutionUpdate), protocolInstanceID, updRejection) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RejectWorkflowExecutionUpdate", reflect.TypeOf((*MockMutableState)(nil).RejectWorkflowExecutionUpdate), updateID, arg1) } // RemoveSpeculativeWorkflowTaskTimeoutTask mocks base method. diff --git a/service/history/ndc/events_reapplier_test.go b/service/history/ndc/events_reapplier_test.go index 2f5995dc160..8608083e52b 100644 --- a/service/history/ndc/events_reapplier_test.go +++ b/service/history/ndc/events_reapplier_test.go @@ -115,6 +115,7 @@ func (s *nDCEventReapplicationSuite) TestReapplyEvents_AppliedEvent_WorkflowExec attr.GetIdentity(), attr.GetPriority(), attr.GetTimeSkippingConfig(), + attr.GetWorkflowUpdateOptions(), ).Return(event, nil) msCurrent.EXPECT().HSM().Return(s.hsmNode).AnyTimes() msCurrent.EXPECT().IsWorkflowPendingOnWorkflowTaskBackoff().Return(true) @@ -163,6 +164,7 @@ func (s *nDCEventReapplicationSuite) TestReapplyEvents_AppliedEvent_WorkflowExec attr.GetIdentity(), attr.GetPriority(), timeSkippingConfig, + attr.GetWorkflowUpdateOptions(), ).Return(event, nil) msCurrent.EXPECT().HSM().Return(s.hsmNode).AnyTimes() msCurrent.EXPECT().IsWorkflowPendingOnWorkflowTaskBackoff().Return(true) diff --git a/service/history/ndc/workflow_resetter.go b/service/history/ndc/workflow_resetter.go index 26d7fb1c537..f7459c15555 100644 --- a/service/history/ndc/workflow_resetter.go +++ b/service/history/ndc/workflow_resetter.go @@ -972,6 +972,7 @@ func reapplyEvents( attr.GetIdentity(), attr.GetPriority(), attr.GetTimeSkippingConfig(), + attr.GetWorkflowUpdateOptions(), ); err != nil { return reappliedEvents, err } diff --git a/service/history/ndc/workflow_resetter_test.go b/service/history/ndc/workflow_resetter_test.go index 4da5bcfc064..54c8f1343c8 100644 --- a/service/history/ndc/workflow_resetter_test.go +++ b/service/history/ndc/workflow_resetter_test.go @@ -1207,6 +1207,7 @@ func (s *workflowResetterSuite) TestReapplyEvents() { attr.GetIdentity(), attr.GetPriority(), attr.GetTimeSkippingConfig(), + attr.GetWorkflowUpdateOptions(), ).Return(&historypb.HistoryEvent{}, nil) case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_SIGNALED: attr := event.GetWorkflowExecutionSignaledEventAttributes() @@ -1737,6 +1738,7 @@ func (s *workflowResetterSuite) TestReapplyEvents_WorkflowOptionsUpdated_WithTim attr.GetIdentity(), attr.GetPriority(), timeSkippingConfig, + attr.GetWorkflowUpdateOptions(), ).Return(&historypb.HistoryEvent{}, nil) appliedEvents, err := reapplyEvents(context.Background(), ms, nil, smReg, []*historypb.HistoryEvent{event}, nil, "", true) diff --git a/service/history/workflow/mutable_state_impl.go b/service/history/workflow/mutable_state_impl.go index e5d410475cf..2bfad286d93 100644 --- a/service/history/workflow/mutable_state_impl.go +++ b/service/history/workflow/mutable_state_impl.go @@ -35,7 +35,6 @@ import ( tokenspb "go.temporal.io/server/api/token/v1" workflowspb "go.temporal.io/server/api/workflow/v1" "go.temporal.io/server/chasm" - "go.temporal.io/server/chasm/lib/callback" chasmworkflow "go.temporal.io/server/chasm/lib/workflow" "go.temporal.io/server/common" "go.temporal.io/server/common/backoff" @@ -740,6 +739,112 @@ func (ms *MutableStateImpl) EndpointRegistry() chasm.EndpointRegistry { return ms.endpointRegistry } +func (ms *MutableStateImpl) GetNexusUpdateCompletion( + ctx context.Context, + updateID string, + requestID string, +) (_ nexusrpc.CompleteOperationOptions, err error) { + var closeTime time.Time + cevent, err := ms.getUpdateOutcomeEvent(ctx, updateID) + var outcome *updatepb.Outcome + if err != nil { + // If the workflow is complete but the update outcome is missing we need to respond to all callbacks + ce, errCE := ms.GetCompletionEvent(ctx) + if errors.Is(errCE, ErrMissingWorkflowCompletionEvent) { + return nexusrpc.CompleteOperationOptions{}, err + } else if errCE != nil { + return nexusrpc.CompleteOperationOptions{}, errCE + } + outcome = &updatepb.Outcome{ + Value: &updatepb.Outcome_Failure{ + Failure: common.CloneProto(update.AcceptedUpdateCompletedWorkflowFailure), + }, + } + closeTime = ce.GetEventTime().AsTime() + } else { + outcome = cevent.GetWorkflowExecutionUpdateCompletedEventAttributes().GetOutcome() + closeTime = cevent.GetEventTime().AsTime() + } + + // Create a RequestIdReference link for the update callback. This is preferred over an + // EventReference link because the requestID is always available, whereas the accepted + // event ID may not be resolvable (e.g., when the workflow completed before the update). + // Note: rejected updates are removed from mutable state, so this code path is only + // reachable for accepted/completed updates. + link := &commonpb.Link_WorkflowEvent{ + Namespace: ms.namespaceEntry.Name().String(), + WorkflowId: ms.executionInfo.WorkflowId, + RunId: ms.executionState.RunId, + } + requestIDInfo, exists := ms.executionState.RequestIds[requestID] + if exists { + link.Reference = &commonpb.Link_WorkflowEvent_RequestIdRef{ + RequestIdRef: &commonpb.Link_WorkflowEvent_RequestIdReference{ + RequestId: requestID, + EventType: requestIDInfo.GetEventType(), + }, + } + } + startLink := commonnexus.ConvertLinkWorkflowEventToNexusLink(link) + + startTime := ms.executionState.GetStartTime().AsTime() + links := []nexus.Link{startLink} + + if outcome.GetSuccess() != nil { + return nexusCompleteOperationSuccess(outcome.GetSuccess(), startTime, closeTime, links), nil + } else if outcome.GetFailure() != nil { + return nexusCompleteOperationFailure(outcome.GetFailure(), nexus.OperationStateFailed, "operation failed", startTime, closeTime, links) + } + return nexusrpc.CompleteOperationOptions{}, serviceerror.NewInternalf("unknown update outcome for update ID: %s", updateID) +} + +// nexusCompleteOperationSuccess constructs a successful CompleteOperationOptions from the given payloads. +// Only the first payload is used since Nexus does not support multi-value returns. +func nexusCompleteOperationSuccess( + result *commonpb.Payloads, + startTime, closeTime time.Time, + links []nexus.Link, +) nexusrpc.CompleteOperationOptions { + var p *commonpb.Payload + if payloads := result.GetPayloads(); len(payloads) > 0 { + p = payloads[0] + } + return nexusrpc.CompleteOperationOptions{ + Result: p, + StartTime: startTime, + CloseTime: closeTime, + Links: links, + } +} + +// nexusCompleteOperationFailure constructs a failed CompleteOperationOptions from the given failure. +func nexusCompleteOperationFailure( + f *failurepb.Failure, + state nexus.OperationState, + message string, + startTime, closeTime time.Time, + links []nexus.Link, +) (nexusrpc.CompleteOperationOptions, error) { + nexusFailure, err := commonnexus.TemporalFailureToNexusFailure(f) + if err != nil { + return nexusrpc.CompleteOperationOptions{}, err + } + opErr := &nexus.OperationError{ + Message: message, + State: state, + Cause: &nexus.FailureError{Failure: nexusFailure}, + } + if err := nexusrpc.MarkAsWrapperError(nexusrpc.DefaultFailureConverter(), opErr); err != nil { + return nexusrpc.CompleteOperationOptions{}, err + } + return nexusrpc.CompleteOperationOptions{ + Error: opErr, + StartTime: startTime, + CloseTime: closeTime, + Links: links, + }, nil +} + // GetNexusCompletion converts a workflow completion event into a [nexus.OperationCompletion]. // Completions may be sent to arbitrary third parties, we intentionally do not include any termination reasons, and // expose only failure messages. @@ -781,118 +886,57 @@ func (ms *MutableStateImpl) GetNexusCompletion( } startLink := commonnexus.ConvertLinkWorkflowEventToNexusLink(link) + startTime := ms.executionState.GetStartTime().AsTime() + closeTime := ce.GetEventTime().AsTime() + links := []nexus.Link{startLink} + switch ce.GetEventType() { case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_COMPLETED: - payloads := ce.GetWorkflowExecutionCompletedEventAttributes().GetResult().GetPayloads() - var p *commonpb.Payload // default to nil, the payload serializer converts nil to Nexus nil Content. - if len(payloads) > 0 { - // All of our SDKs support returning a single value from workflows, we can safely ignore the - // rest of the payloads. Additionally, even if a workflow could return more than a single value, - // Nexus does not support it. - p = payloads[0] - } - return nexusrpc.CompleteOperationOptions{ - Result: p, - StartTime: ms.executionState.GetStartTime().AsTime(), - CloseTime: ce.GetEventTime().AsTime(), - Links: []nexus.Link{startLink}, - }, nil + return nexusCompleteOperationSuccess( + ce.GetWorkflowExecutionCompletedEventAttributes().GetResult(), + startTime, closeTime, links, + ), nil case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_FAILED: - f, err := commonnexus.TemporalFailureToNexusFailure(ce.GetWorkflowExecutionFailedEventAttributes().GetFailure()) - if err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - opErr := &nexus.OperationError{ - Message: "operation failed", - State: nexus.OperationStateFailed, - Cause: &nexus.FailureError{Failure: f}, - } - if err := nexusrpc.MarkAsWrapperError(nexusrpc.DefaultFailureConverter(), opErr); err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - return nexusrpc.CompleteOperationOptions{ - Error: opErr, - StartTime: ms.executionState.GetStartTime().AsTime(), - CloseTime: ce.GetEventTime().AsTime(), - Links: []nexus.Link{startLink}, - }, nil + return nexusCompleteOperationFailure( + ce.GetWorkflowExecutionFailedEventAttributes().GetFailure(), + nexus.OperationStateFailed, "operation failed", + startTime, closeTime, links, + ) case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_CANCELED: - f, err := commonnexus.TemporalFailureToNexusFailure(&failurepb.Failure{ - Message: "operation canceled", - FailureInfo: &failurepb.Failure_CanceledFailureInfo{ - CanceledFailureInfo: &failurepb.CanceledFailureInfo{ - Details: ce.GetWorkflowExecutionCanceledEventAttributes().GetDetails(), + return nexusCompleteOperationFailure( + &failurepb.Failure{ + Message: "operation canceled", + FailureInfo: &failurepb.Failure_CanceledFailureInfo{ + CanceledFailureInfo: &failurepb.CanceledFailureInfo{ + Details: ce.GetWorkflowExecutionCanceledEventAttributes().GetDetails(), + }, }, }, - }) - if err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - opErr := &nexus.OperationError{ - State: nexus.OperationStateCanceled, - Message: "operation canceled", - Cause: &nexus.FailureError{Failure: f}, - } - if err := nexusrpc.MarkAsWrapperError(nexusrpc.DefaultFailureConverter(), opErr); err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - return nexusrpc.CompleteOperationOptions{ - Error: opErr, - StartTime: ms.executionState.GetStartTime().AsTime(), - CloseTime: ce.GetEventTime().AsTime(), - Links: []nexus.Link{startLink}, - }, nil + nexus.OperationStateCanceled, "operation canceled", + startTime, closeTime, links, + ) case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_TERMINATED: - f, err := commonnexus.TemporalFailureToNexusFailure(&failurepb.Failure{ - Message: "operation terminated", - FailureInfo: &failurepb.Failure_TerminatedFailureInfo{ - TerminatedFailureInfo: &failurepb.TerminatedFailureInfo{}, + return nexusCompleteOperationFailure( + &failurepb.Failure{ + Message: "operation terminated", + FailureInfo: &failurepb.Failure_TerminatedFailureInfo{ + TerminatedFailureInfo: &failurepb.TerminatedFailureInfo{}, + }, }, - }) - if err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - opErr := &nexus.OperationError{ - State: nexus.OperationStateFailed, - Message: "operation failed", - Cause: &nexus.FailureError{Failure: f}, - } - if err := nexusrpc.MarkAsWrapperError(nexusrpc.DefaultFailureConverter(), opErr); err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - return nexusrpc.CompleteOperationOptions{ - Error: opErr, - StartTime: ms.executionState.GetStartTime().AsTime(), - CloseTime: ce.GetEventTime().AsTime(), - Links: []nexus.Link{startLink}, - }, nil + nexus.OperationStateFailed, "operation failed", + startTime, closeTime, links, + ) case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_TIMED_OUT: - f, err := commonnexus.TemporalFailureToNexusFailure(&failurepb.Failure{ - Message: "operation exceeded internal timeout", - FailureInfo: &failurepb.Failure_TimeoutFailureInfo{ - TimeoutFailureInfo: &failurepb.TimeoutFailureInfo{ - // Not filling in timeout type and other information, it's not particularly interesting to a Nexus - // caller. + return nexusCompleteOperationFailure( + &failurepb.Failure{ + Message: "operation exceeded internal timeout", + FailureInfo: &failurepb.Failure_TimeoutFailureInfo{ + TimeoutFailureInfo: &failurepb.TimeoutFailureInfo{}, }, }, - }) - if err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - opErr := &nexus.OperationError{ - State: nexus.OperationStateFailed, - Message: "operation failed", - Cause: &nexus.FailureError{Failure: f}, - } - if err := nexusrpc.MarkAsWrapperError(nexusrpc.DefaultFailureConverter(), opErr); err != nil { - return nexusrpc.CompleteOperationOptions{}, err - } - return nexusrpc.CompleteOperationOptions{ - Error: opErr, - StartTime: ms.executionState.GetStartTime().AsTime(), - CloseTime: ce.GetEventTime().AsTime(), - Links: []nexus.Link{startLink}, - }, nil + nexus.OperationStateFailed, "operation failed", + startTime, closeTime, links, + ) } return nexusrpc.CompleteOperationOptions{}, serviceerror.NewInternalf("invalid workflow execution status: %v", ce.GetEventType()) } @@ -1467,6 +1511,17 @@ func (ms *MutableStateImpl) GetUpdateOutcome( ctx context.Context, updateID string, ) (*updatepb.Outcome, error) { + event, err := ms.getUpdateOutcomeEvent(ctx, updateID) + if err != nil { + return nil, err + } + return event.GetWorkflowExecutionUpdateCompletedEventAttributes().GetOutcome(), nil +} + +func (ms *MutableStateImpl) getUpdateOutcomeEvent( + ctx context.Context, + updateID string, +) (*historypb.HistoryEvent, error) { if ms.executionInfo.UpdateInfos == nil { return nil, serviceerror.NewNotFound("update not found") } @@ -1493,11 +1548,10 @@ func (ms *MutableStateImpl) GetUpdateOutcome( if err != nil { return nil, err } - attrs := event.GetWorkflowExecutionUpdateCompletedEventAttributes() - if attrs == nil { + if event.GetWorkflowExecutionUpdateCompletedEventAttributes() == nil { return nil, serviceerror.NewInternal("event pointer does not reference an update completed event") } - return attrs.GetOutcome(), nil + return event, nil } func (ms *MutableStateImpl) GetActivityScheduledEvent( @@ -3237,6 +3291,43 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionUnpausedEvent(event *historypb return ms.updatePauseInfoSearchAttribute() } +func (ms *MutableStateImpl) addUpdateCallbacks( + event *historypb.HistoryEvent, + updateID string, + requestID string, + updateCallbacks []*commonpb.Callback, +) error { + if len(updateCallbacks) == 0 { + return nil + } + if ms.chasmCallbacksEnabled() && ms.config.EnableWorkflowUpdateCallbacks(ms.GetNamespaceEntry().Name().String()) { + // Initialize chasm tree once for new workflows. + // Using context.Background() because this is done outside an actual request context and the + // chasmworkflow.NewWorkflow does not actually use it currently. + ms.EnsureChasmWorkflowComponent(context.Background()) + return ms.addUpdateCallbacksChasm(event, updateID, requestID, updateCallbacks) + } + + return nil +} + +func (ms *MutableStateImpl) addUpdateCallbacksChasm( + event *historypb.HistoryEvent, + updateID string, + requestID string, + updateCallbacks []*commonpb.Callback, +) error { + wf, ctx, err := ms.ChasmWorkflowComponent(context.Background()) + if err != nil { + return err + } + + nsName := ms.GetNamespaceEntry().Name().String() + maxCallbacksPerWorkflow := ms.config.MaxCallbacksPerWorkflow(nsName) + maxCallbacksPerUpdateID := ms.config.MaxCallbacksPerUpdateID(nsName) + return wf.AddUpdateCompletionCallbacks(ctx, event.EventTime, updateID, requestID, updateCallbacks, maxCallbacksPerWorkflow, maxCallbacksPerUpdateID) +} + func (ms *MutableStateImpl) addCompletionCallbacks( event *historypb.HistoryEvent, requestID string, @@ -4743,7 +4834,9 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionFailedEvent( if attrs.RetryState != enumspb.RETRY_STATE_IN_PROGRESS { return ms.processCloseCallbacks() } - return nil + // Workflow-level callbacks are inherited by the retry run, but update callbacks + // must fire now because the update was aborted on the old run. + return ms.processUpdateCloseCallbacks() } func (ms *MutableStateImpl) AddTimeoutWorkflowEvent( @@ -4791,7 +4884,9 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionTimedoutEvent( if attrs.RetryState != enumspb.RETRY_STATE_IN_PROGRESS { return ms.processCloseCallbacks() } - return nil + // Workflow-level callbacks are inherited by the retry run, but update callbacks + // must fire now because the update was aborted on the old run. + return ms.processUpdateCloseCallbacks() } func (ms *MutableStateImpl) AddWorkflowExecutionCancelRequestedEvent( @@ -5452,22 +5547,44 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionUpdateAdmittedEvent(event *his ms.approximateSize += sizeDelta ms.updateInfoUpdated[updateID] = struct{}{} ms.writeEventToCache(event) - return nil + + // Store completion callbacks from the update request at admission time. + // This is needed for the reset/reapply case where the UpdateAccepted event + // may have a nil AcceptedRequest (because the UpdateAdmitted event already + // contains the request), causing callbacks to be lost at acceptance time. + requestID := attrs.GetRequest().GetRequestId() + if requestID != "" { + ms.AttachRequestID(requestID, event.EventType, event.EventId) + } + return ms.addUpdateCallbacks( + event, + updateID, + requestID, + attrs.GetRequest().GetCompletionCallbacks(), + ) } func (ms *MutableStateImpl) AddWorkflowExecutionUpdateAcceptedEvent( - protocolInstanceID string, - acceptedRequestMessageId string, - acceptedRequestSequencingEventId int64, + updateID string, + acceptedRequestMessageID string, + acceptedRequestSequencingEventID int64, acceptedRequest *updatepb.Request, ) (*historypb.HistoryEvent, error) { if err := ms.checkMutability(tag.WorkflowActionUpdateAccepted); err != nil { return nil, err } - event := ms.hBuilder.AddWorkflowExecutionUpdateAcceptedEvent(protocolInstanceID, acceptedRequestMessageId, acceptedRequestSequencingEventId, acceptedRequest) + event := ms.hBuilder.AddWorkflowExecutionUpdateAcceptedEvent(updateID, acceptedRequestMessageID, acceptedRequestSequencingEventID, acceptedRequest) if err := ms.ApplyWorkflowExecutionUpdateAcceptedEvent(event); err != nil { return nil, err } + // Add links from Nexus callbacks to the event. + callbacksLinks := make([]*commonpb.Link, 0) + for _, cb := range acceptedRequest.GetCompletionCallbacks() { + if cb.GetNexus() != nil { + callbacksLinks = append(callbacksLinks, cb.GetLinks()...) + } + } + event.Links = callbacksLinks return event, nil } @@ -5505,6 +5622,27 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionUpdateAcceptedEvent( ms.approximateSize += sizeDelta ms.updateInfoUpdated[updateID] = struct{}{} ms.writeEventToCache(event) + // Add update completion callbacks. + // This is the primary path for registering callbacks — AcceptedRequest is + // present in the normal flow. The exception is the reset/reapply case where + // callbacks are registered at admission time instead (because the + // UpdateAccepted event has a nil AcceptedRequest after reset). In that case, + // addCallbacksToMap is a no-op since the requestID-indexed keys already + // exist from the admitted event. + if attrs.GetAcceptedRequest() != nil { + requestID := attrs.GetAcceptedRequest().GetRequestId() + if requestID != "" { + ms.AttachRequestID(requestID, event.EventType, event.EventId) + } + if err := ms.addUpdateCallbacks( + event, + updateID, + requestID, + attrs.GetAcceptedRequest().GetCompletionCallbacks(), + ); err != nil { + return err + } + } return nil } @@ -5551,13 +5689,60 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionUpdateCompletedEvent( sizeDelta = ui.Size() - sizeBefore ms.approximateSize += sizeDelta ms.updateInfoUpdated[updateID] = struct{}{} + if ms.ChasmEnabled() { + if err := ms.processUpdateCallbacks(updateID); err != nil { + return err + } + } ms.writeEventToCache(event) return nil } -func (ms *MutableStateImpl) RejectWorkflowExecutionUpdate(_ string, _ *updatepb.Rejection) error { - // TODO (alex-update): This method is noop because we don't currently write rejections to the history. - return nil +func (ms *MutableStateImpl) RejectWorkflowExecutionUpdate(updateID string, wfFailure *failurepb.Failure) error { + if !ms.chasmCallbacksEnabled() { + return nil + } + + wf, _, err := ms.ChasmWorkflowComponentReadOnly(context.Background()) + if err != nil { + return err + } + + // Return early if there are no CHASM update callbacks for this update. + if _, ok := wf.Updates[updateID]; !ok { + return nil + } + + // Store the rejection failure and fire the callbacks. + wf, ctx, err := ms.ChasmWorkflowComponent(context.Background()) + if err != nil { + return err + } + return wf.RejectUpdate(ctx, updateID, wfFailure) +} + +// processUpdateCallbacks triggers "UpdateFinished" callbacks using the CHASM implementation. +func (ms *MutableStateImpl) processUpdateCallbacks(updateID string) error { + wf, _, err := ms.ChasmWorkflowComponentReadOnly(context.Background()) + if err != nil { + return err + } + + // Return early if there are no chasm callbacks to process for this update ID. + if len(wf.Updates) == 0 { + return nil + } + if _, ok := wf.Updates[updateID]; !ok { + return nil + } + + // If there are callbacks to process, create a writable workflow component. + wf, ctx, err := ms.ChasmWorkflowComponent(context.Background()) + if err != nil { + return err + } + + return wf.ProcessUpdateCallbacks(ctx, updateID) } func (ms *MutableStateImpl) AddWorkflowExecutionOptionsUpdatedEvent( @@ -5569,6 +5754,7 @@ func (ms *MutableStateImpl) AddWorkflowExecutionOptionsUpdatedEvent( identity string, priority *commonpb.Priority, timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, ) (*historypb.HistoryEvent, error) { if err := ms.checkMutability(tag.WorkflowActionWorkflowOptionsUpdated); err != nil { return nil, err @@ -5582,6 +5768,7 @@ func (ms *MutableStateImpl) AddWorkflowExecutionOptionsUpdatedEvent( identity, priority, timeSkippingConfig, + workflowUpdateOptions, ) prevEffectiveVersioningBehavior := ms.GetEffectiveVersioningBehavior() prevEffectiveDeployment := ms.GetEffectiveDeployment() @@ -5634,6 +5821,33 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionOptionsUpdatedEvent(event *his return err } + // Add update callbacks + for _, updateOptions := range attributes.GetWorkflowUpdateOptions() { + updateID := updateOptions.GetUpdateId() + requestID := updateOptions.GetAttachedRequestId() + if requestID != "" { + ms.AttachRequestID(requestID, event.EventType, event.EventId) + } + if err := ms.addUpdateCallbacks( + event, + updateID, + requestID, + updateOptions.GetAttachedCompletionCallbacks(), + ); err != nil { + return err + } + // If the update is already completed, fire the callbacks immediately. + if ms.ChasmEnabled() { + if ui, ok := ms.executionInfo.UpdateInfos[updateID]; ok { + if _, isCompleted := ui.Value.(*persistencespb.UpdateInfo_Completion); isCompleted { + if err := ms.processUpdateCallbacks(updateID); err != nil { + return err + } + } + } + } + } + // Update priority. if attributes.GetPriority() != nil { if !proto.Equal(ms.executionInfo.Priority, attributes.GetPriority()) { @@ -6034,7 +6248,9 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionContinuedAsNewEvent( ms.executionInfo.CloseTime = continueAsNewEvent.GetEventTime() ms.ClearStickyTaskQueue() ms.writeEventToCache(continueAsNewEvent) - return nil + // Workflow-level callbacks are inherited by the new run, but update callbacks + // must fire now because the update was aborted on the old run. + return ms.processUpdateCloseCallbacks() } func (ms *MutableStateImpl) AddStartChildWorkflowExecutionInitiatedEvent( @@ -6851,6 +7067,35 @@ func (ms *MutableStateImpl) AddExternalPayloadCount(count int64) { ms.executionInfo.ExecutionStats.ExternalPayloadCount += count } +// processUpdateCloseCallbacks triggers only update-level callbacks, leaving workflow-level +// callbacks untouched. This is used when the workflow is continuing to a new run +// (ContinueAsNew, retry, cron): workflow-level callbacks are inherited by the new run, +// but update callbacks must fire now because the update was aborted on the old run. +// +// Note: unlike processCloseCallbacks, this does not need a WorkflowWasReset guard. +// Reset always terminates the old run (via terminateWorkflow), which goes through +// processCloseCallbacks — not through the retry/CAN paths that call this method. +func (ms *MutableStateImpl) processUpdateCloseCallbacks() error { + if !ms.ChasmEnabled() { + // Update callbacks are only supported in CHASM mode. + return nil + } + + wf, _, err := ms.ChasmWorkflowComponentReadOnly(context.Background()) + if err != nil { + return err + } + if len(wf.Updates) == 0 { + return nil + } + + wf, ctx, err := ms.ChasmWorkflowComponent(context.Background()) + if err != nil { + return err + } + return wf.ProcessAllUpdateCloseCallbacks(ctx) +} + // processCloseCallbacks triggers "WorkflowClosed" callbacks, applying the state machine transition that schedules // callback tasks. func (ms *MutableStateImpl) processCloseCallbacks() error { @@ -6903,7 +7148,7 @@ func (ms *MutableStateImpl) processCloseCallbacksChasm() error { } // Return early if there are no chasm callbacks to process. - if len(wf.Callbacks) == 0 { + if len(wf.Callbacks) == 0 && len(wf.Updates) == 0 { return nil } @@ -6913,7 +7158,7 @@ func (ms *MutableStateImpl) processCloseCallbacksChasm() error { return err } - return callback.ScheduleStandbyCallbacks(ctx, wf.Callbacks) + return wf.ProcessCloseCallbacks(ctx) } func (ms *MutableStateImpl) AddTasks( diff --git a/service/history/workflow/mutable_state_impl_test.go b/service/history/workflow/mutable_state_impl_test.go index df62e74f7d2..8c65db2017b 100644 --- a/service/history/workflow/mutable_state_impl_test.go +++ b/service/history/workflow/mutable_state_impl_test.go @@ -1158,7 +1158,7 @@ func (s *mutableStateSuite) TestOverride_UnpinnedBase_SetPinnedAndUnsetWithEmpty s.createMutableStateWithVersioningBehavior(baseBehavior, deployment1, tq) // set pinned override - event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(pinnedOptions2.GetVersioningOverride(), false, "", nil, nil, id, nil, nil) + event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(pinnedOptions2.GetVersioningOverride(), false, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment2, overrideBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( @@ -1173,7 +1173,7 @@ func (s *mutableStateSuite) TestOverride_UnpinnedBase_SetPinnedAndUnsetWithEmpty // unset pinned override with boolean id = uuid.NewString() - event, err = s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, id, nil, nil) + event, err = s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment1, baseBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( @@ -1195,7 +1195,7 @@ func (s *mutableStateSuite) TestOverride_PinnedBase_SetUnpinnedAndUnsetWithEmpty s.createMutableStateWithVersioningBehavior(baseBehavior, deployment1, tq) // set unpinned override - event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(unpinnedOptions.GetVersioningOverride(), false, "", nil, nil, id, nil, nil) + event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(unpinnedOptions.GetVersioningOverride(), false, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment1, overrideBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( @@ -1210,7 +1210,7 @@ func (s *mutableStateSuite) TestOverride_PinnedBase_SetUnpinnedAndUnsetWithEmpty // unset pinned override with empty id = uuid.NewString() - event, err = s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, id, nil, nil) + event, err = s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment1, baseBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( @@ -1231,7 +1231,7 @@ func (s *mutableStateSuite) TestOverride_RedirectFails() { id := uuid.NewString() s.createMutableStateWithVersioningBehavior(baseBehavior, deployment1, tq) - event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(pinnedOptions3.GetVersioningOverride(), false, "", nil, nil, id, nil, nil) + event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(pinnedOptions3.GetVersioningOverride(), false, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment3, overrideBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( @@ -1258,7 +1258,7 @@ func (s *mutableStateSuite) TestOverride_BaseDeploymentUpdatedOnCompletion() { id := uuid.NewString() s.createMutableStateWithVersioningBehavior(baseBehavior, deployment1, tq) - event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(pinnedOptions3.GetVersioningOverride(), false, "", nil, nil, id, nil, nil) + event, err := s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(pinnedOptions3.GetVersioningOverride(), false, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment3, overrideBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( @@ -1312,7 +1312,7 @@ func (s *mutableStateSuite) TestOverride_BaseDeploymentUpdatedOnCompletion() { // now we unset the override and check that the base deployment/behavior is in effect id = uuid.NewString() - event, err = s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, id, nil, nil) + event, err = s.mutableState.AddWorkflowExecutionOptionsUpdatedEvent(nil, true, "", nil, nil, id, nil, nil, nil) s.NoError(err) s.verifyEffectiveDeployment(deployment2, baseBehavior) s.verifyWorkflowOptionsUpdatedEventAttr( diff --git a/service/history/workflow/update/abort_reason.go b/service/history/workflow/update/abort_reason.go index 2b3737125f9..a7594a8a35b 100644 --- a/service/history/workflow/update/abort_reason.go +++ b/service/history/workflow/update/abort_reason.go @@ -58,10 +58,10 @@ var reasonStateMatrix = map[reasonState]failureError{ // There can be different types of Update failures coming from worker and a client must handle them anyway. // It is easier and less error-prone for a client to handle only Update failures instead of both failures and // not obvious NotFound errors in case if the Workflow completes before the Update completes. - reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyAccepted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, - reasonState{r: AbortReasonWorkflowCompleted, st: stateAccepted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, - reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyCompleted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, - reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyCompletedAfterAccepted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyAccepted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowCompleted, st: stateAccepted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyCompleted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyCompletedAfterAccepted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, // Completed Updates can't be aborted. reasonState{r: AbortReasonWorkflowCompleted, st: stateCompleted}: {f: nil, err: nil}, reasonState{r: AbortReasonWorkflowCompleted, st: stateProvisionallyAborted}: {f: nil, err: nil}, @@ -74,10 +74,10 @@ var reasonStateMatrix = map[reasonState]failureError{ reasonState{r: AbortReasonWorkflowContinuing, st: stateAdmitted}: {f: nil, err: consts.ErrWorkflowClosing}, reasonState{r: AbortReasonWorkflowContinuing, st: stateSent}: {f: nil, err: consts.ErrWorkflowClosing}, // Accepted Update can't be applied to the new run, and must be failed same way as if Workflow is completed. - reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyAccepted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, - reasonState{r: AbortReasonWorkflowContinuing, st: stateAccepted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, - reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyCompleted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, - reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyCompletedAfterAccepted}: {f: acceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyAccepted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowContinuing, st: stateAccepted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyCompleted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, + reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyCompletedAfterAccepted}: {f: AcceptedUpdateCompletedWorkflowFailure, err: nil}, // Completed Updates can't be aborted. reasonState{r: AbortReasonWorkflowContinuing, st: stateCompleted}: {f: nil, err: nil}, reasonState{r: AbortReasonWorkflowContinuing, st: stateProvisionallyAborted}: {f: nil, err: nil}, @@ -121,6 +121,8 @@ func (r AbortReason) String() string { return "WorkflowCompleted" case AbortReasonWorkflowContinuing: return "WorkflowContinuing" + case AbortReasonWorkflowTaskFailed: + return "WorkflowTaskFailed" case lastAbortReason: return fmt.Sprintf("invalid reason %d", r) } diff --git a/service/history/workflow/update/errors_failures.go b/service/history/workflow/update/errors_failures.go index 6e14883018e..c20c57aaf41 100644 --- a/service/history/workflow/update/errors_failures.go +++ b/service/history/workflow/update/errors_failures.go @@ -24,7 +24,7 @@ var ( }}, } - acceptedUpdateCompletedWorkflowFailure = &failurepb.Failure{ + AcceptedUpdateCompletedWorkflowFailure = &failurepb.Failure{ Message: "Workflow Update failed because the Workflow completed before the Update completed.", Source: "Server", FailureInfo: &failurepb.Failure_ApplicationFailureInfo{ApplicationFailureInfo: &failurepb.ApplicationFailureInfo{ diff --git a/service/history/workflow/update/export_test.go b/service/history/workflow/update/export_test.go index f9a78e89d72..fc56d39c748 100644 --- a/service/history/workflow/update/export_test.go +++ b/service/history/workflow/update/export_test.go @@ -8,9 +8,10 @@ var ( // while we *could* write the unit test code to walk an Update through a // series of message deliveries to get to the right state, it's much faster // just to instantiate directly into the desired state. + NewAdmitted = newAdmitted NewAccepted = newAccepted NewCompleted = newCompleted - AbortFailure = acceptedUpdateCompletedWorkflowFailure + AbortFailure = AcceptedUpdateCompletedWorkflowFailure ) // ObserveCompletion exports withOnComplete to unit tests diff --git a/service/history/workflow/update/store.go b/service/history/workflow/update/store.go index c5520714f62..b0fc93fa7c7 100644 --- a/service/history/workflow/update/store.go +++ b/service/history/workflow/update/store.go @@ -3,8 +3,11 @@ package update import ( "context" + commonpb "go.temporal.io/api/common/v1" + failurepb "go.temporal.io/api/failure/v1" historypb "go.temporal.io/api/history/v1" updatepb "go.temporal.io/api/update/v1" + workflowpb "go.temporal.io/api/workflow/v1" persistencespb "go.temporal.io/server/api/persistence/v1" "go.temporal.io/server/common/effect" ) @@ -43,7 +46,34 @@ type ( resp *updatepb.Response, ) (*historypb.HistoryEvent, error) + // AddWorkflowExecutionOptionsUpdatedEvent writes a workflow execution + // options updated event. This is used to attach completion callbacks, + // request IDs, links, and per-update callback options to the workflow. + // The data may not be durable when this function returns. + AddWorkflowExecutionOptionsUpdatedEvent( + versioningOverride *workflowpb.VersioningOverride, + unsetVersioningOverride bool, + attachRequestID string, + attachCompletionCallbacks []*commonpb.Callback, + links []*commonpb.Link, + identity string, + priority *commonpb.Priority, + timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) + // CanAddEvent returns true if an event can be added to the EventStore. CanAddEvent() bool + + // RejectWorkflowExecutionUpdate notifies the store that an update was + // rejected by the worker's validator. The store uses this to fire any + // completion callbacks that were registered at admission time and to + // clean up the update's mutable-state entry. + RejectWorkflowExecutionUpdate(updateID string, rejectionFailure *failurepb.Failure) error + + // HasRequestID checks whether the given requestID has already been + // recorded for this workflow execution. Used by AttachCallbacks to deduplicate + // callback attachment when the same request is retried. + HasRequestID(requestID string) bool } ) diff --git a/service/history/workflow/update/store_mock_test.go b/service/history/workflow/update/store_mock_test.go index 2e5469af539..8fb08d5c69c 100644 --- a/service/history/workflow/update/store_mock_test.go +++ b/service/history/workflow/update/store_mock_test.go @@ -3,9 +3,12 @@ package update_test import ( "context" + commonpb "go.temporal.io/api/common/v1" + failurepb "go.temporal.io/api/failure/v1" historypb "go.temporal.io/api/history/v1" "go.temporal.io/api/serviceerror" updatepb "go.temporal.io/api/update/v1" + workflowpb "go.temporal.io/api/workflow/v1" persistencespb "go.temporal.io/server/api/persistence/v1" "go.temporal.io/server/common/effect" "go.temporal.io/server/service/history/workflow/update" @@ -72,7 +75,37 @@ type mockEventStore struct { resp *updatepb.Response, ) (*historypb.HistoryEvent, error) - CanAddEventFunc func() bool + AddWorkflowExecutionOptionsUpdatedEventFunc func( + versioningOverride *workflowpb.VersioningOverride, + unsetVersioningOverride bool, + attachRequestID string, + attachCompletionCallbacks []*commonpb.Callback, + links []*commonpb.Link, + identity string, + priority *commonpb.Priority, + timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) + + CanAddEventFunc func() bool + HasRequestIDFunc func(requestID string) bool +} + +func (m mockEventStore) AddWorkflowExecutionOptionsUpdatedEvent( + versioningOverride *workflowpb.VersioningOverride, + unsetVersioningOverride bool, + attachRequestID string, + attachCompletionCallbacks []*commonpb.Callback, + links []*commonpb.Link, + identity string, + priority *commonpb.Priority, + timeSkippingConfig *workflowpb.TimeSkippingConfig, + workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, +) (*historypb.HistoryEvent, error) { + if m.AddWorkflowExecutionOptionsUpdatedEventFunc != nil { + return m.AddWorkflowExecutionOptionsUpdatedEventFunc(versioningOverride, unsetVersioningOverride, attachRequestID, attachCompletionCallbacks, links, identity, priority, timeSkippingConfig, workflowUpdateOptions) + } + return &historypb.HistoryEvent{}, nil } func (m mockEventStore) AddWorkflowExecutionUpdateAcceptedEvent( @@ -103,3 +136,14 @@ func (m mockEventStore) CanAddEvent() bool { } return true } + +func (m mockEventStore) RejectWorkflowExecutionUpdate(_ string, _ *failurepb.Failure) error { + return nil +} + +func (m mockEventStore) HasRequestID(requestID string) bool { + if m.HasRequestIDFunc != nil { + return m.HasRequestIDFunc(requestID) + } + return false +} diff --git a/service/history/workflow/update/update.go b/service/history/workflow/update/update.go index 3d552f5ab46..f84b8e112ae 100644 --- a/service/history/workflow/update/update.go +++ b/service/history/workflow/update/update.go @@ -5,8 +5,10 @@ import ( "errors" "time" + commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" failurepb "go.temporal.io/api/failure/v1" + historypb "go.temporal.io/api/history/v1" protocolpb "go.temporal.io/api/protocol/v1" "go.temporal.io/api/serviceerror" updatepb "go.temporal.io/api/update/v1" @@ -18,6 +20,14 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) +// pendingCallback holds a AttachCallbacks request that arrived while the Update +// was in stateSent. These are flushed to the event store on acceptance +// in onAcceptanceMsg. In-memory only; lost on registry clear/lock release. +type pendingCallback struct { + requestID string + completionCallbacks []*commonpb.Callback +} + type ( // Update docs are at /docs/architecture/workflow-update.md. Update struct { @@ -42,6 +52,10 @@ type ( checkLimits func(*updatepb.Request) error instrumentation *instrumentation admittedTime time.Time + // pendingCallbacks buffers AttachCallbacks requests that arrive while + // the Update is in stateSent. Flushed to the event store in onAcceptanceMsg. + // Cleared on rejection, abort, or rollback. In-memory only; lost on lock release. + pendingCallbacks []pendingCallback // These fields might be accessed while not holding the workflow lock. accepted future.Future[*failurepb.Failure] @@ -251,6 +265,11 @@ func (u *Update) abort( return } + // Clear any buffered AttachCallbacks callbacks defensively. Abort is called during + // cleanup (e.g., registry clear, workflow close) where a hard error would be + // worse than silently clearing. + u.pendingCallbacks = nil + u.instrumentation.countAborted(u.id, reason) prevState := u.setState(stateProvisionallyAborted) @@ -351,6 +370,134 @@ func (u *Update) Admit( return nil } +// AttachCallbacks attaches completion callbacks from a second caller to an update +// that has already progressed past admission. If the update is accepted, it writes +// a WorkflowExecutionOptionsUpdatedEvent with the caller's callbacks and request ID. +// If the update is in stateSent (sent to worker, not yet accepted), callbacks are +// buffered in memory and flushed when the update is accepted. If the update is +// already completed, returns true without attaching callbacks since the caller +// receives the result synchronously. +// +// Returns (true, nil) if the caller should proceed (callbacks attached or update already completed), +// (false, nil) if the update is in an early state where attachment does not apply, +// or (false, error) if the update is in a transient state where the caller should retry. +func (u *Update) AttachCallbacks( + req *updatepb.Request, + eventStore EventStore, +) (isCallbackAttached bool, err error) { + // Only attach callbacks if the request actually has something to attach. + // This preserves existing behavior for callers that don't set callbacks. + if len(req.GetCompletionCallbacks()) == 0 { + return false, nil + } + if req.GetRequestId() == "" { + return false, serviceerror.NewInvalidArgumentf("invalid %T: request_id is required when completion_callbacks are set", req) + } + + switch u.state { + case stateProvisionallyAccepted, + stateProvisionallyCompleted, + stateProvisionallyCompletedAfterAccepted, + stateProvisionallyAborted: + // Provisional states are transient — they exist only between an event write + // and its OnAfterCommit callback within a single workflow task completion + // transaction. In practice, AttachCallbacks should never see these states because + // a new UpdateWorkflowExecution API call must acquire the workflow lock, + // which means the previous transaction has already committed and provisional + // states have resolved. This guard is kept defensively in case future code + // paths call AttachCallbacks within the same transaction. + return false, serviceerror.NewResourceExhausted(enumspb.RESOURCE_EXHAUSTED_CAUSE_BUSY_WORKFLOW, "workflow update is not yet accepted, please retry") + + case stateSent: + // stateSent: the update has been sent to the worker but not yet accepted. + // Buffer the callbacks in memory; they will be flushed to the event store + // when the update is accepted in onAcceptanceMsg. + // Returning (true, nil) is safe because: + // - The caller already holds the workflow lock + // - A workflow task already exists (the update was sent via one) + // - No new workflow task is needed — just buffer until acceptance + // - The event will be written atomically with acceptance + // If the Update struct is lost (registry cleared), the abort mechanism fires + // registryClearedErr on the caller's future, prompting an immediate retry. + if req.GetRequestId() != "" { + for _, pc := range u.pendingCallbacks { + if pc.requestID == req.GetRequestId() { + return true, nil + } + } + } + u.pendingCallbacks = append(u.pendingCallbacks, pendingCallback{ + requestID: req.GetRequestId(), + completionCallbacks: req.GetCompletionCallbacks(), + }) + return true, nil + + case stateAccepted: + // Persist immediately as its own event, u.persistCallback(...) will dedup against requestIDs + // that are already recorded on the workflow. + return u.persistCallback(eventStore, req.GetRequestId(), req.GetCompletionCallbacks()) + + case stateCompleted: + // If the update is already completed, the result is returned synchronously + // in the UpdateWorkflowExecution response — no callback needed. + return true, nil + + default: + // All other states are too early or not applicable for callback attachment. + return false, nil + } +} + +// persistPendingCallbacks writes one WorkflowExecutionOptionsUpdatedEvent per +// buffered AttachCallbacks callback, skipping any whose requestID is already persisted. +// Called from onAcceptanceMsg after the acceptance event has been written. +// +// NOTE: Each pending callback requires its own event because the API proto's +// WorkflowUpdateOptionsUpdate carries a singular AttachedRequestId, and the +// WorkflowUpdateOptions map is keyed by update ID (all entries here share u.id, +// so only one map entry is possible per event). Each requestID must be durably +// recorded in the event so that ApplyWorkflowExecutionOptionsUpdatedEvent can +// call AttachRequestID during replay for correct deduplication. +// +// In practice, the number of buffered callbacks is very small (1-2): it requires +// multiple concurrent callers to call AttachCallbacks while the update is in +// stateSent. The per-update callback limit (MaxCallbacksPerUpdateID) bounds the +// worst case. +func (u *Update) persistPendingCallbacks(eventStore EventStore) error { + for _, pc := range u.pendingCallbacks { + if _, err := u.persistCallback(eventStore, pc.requestID, pc.completionCallbacks); err != nil { + return err + } + } + u.pendingCallbacks = nil + return nil +} + +// persistCallback writes a single WorkflowExecutionOptionsUpdatedEvent to attach the +// given requestID and completion callbacks, and deduplicate on requestID if already attached. +func (u *Update) persistCallback( + eventStore EventStore, + requestID string, + completionCallbacks []*commonpb.Callback, +) (isCallbackAttached bool, err error) { + // Callback is already attached, no need to update. + if requestID != "" && eventStore.HasRequestID(requestID) { + return true, nil + } + _, err = eventStore.AddWorkflowExecutionOptionsUpdatedEvent( + nil, false, "", nil, nil, "", nil, nil, + []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate{{ + UpdateId: u.id, + AttachedRequestId: requestID, + AttachedCompletionCallbacks: completionCallbacks, + }}, + ) + if err != nil { + return false, err + } + return true, nil +} + // OnProtocolMessage delivers a message to the Update state machine. The Body field of // *protocolpb.Message parameter is expected to be one of *updatepb.Response, // *updatepb.Rejection, *updatepb.Acceptance. Writes to the EventStore @@ -503,6 +650,12 @@ func (u *Update) onAcceptanceMsg( } u.acceptedEventID = event.EventId + // Persist any callbacks that were buffered by AttachCallbacks while in stateSent or stateAdmitted. + // See persistPendingCallbacks for why this writes one event per pending entry. + if err := u.persistPendingCallbacks(eventStore); err != nil { + return err + } + prevState := u.setState(stateProvisionallyAccepted) eventStore.OnAfterCommit(func(context.Context) { if !u.state.Matches(stateSet(stateProvisionallyAccepted | stateProvisionallyCompleted | stateProvisionallyAborted)) { @@ -545,6 +698,7 @@ func (u *Update) onAcceptanceMsg( return } u.acceptedEventID = common.EmptyEventID + u.pendingCallbacks = nil u.setState(prevState) }) return nil @@ -556,7 +710,7 @@ func (u *Update) onAcceptanceMsg( // are both completed with the failurepb.Failure value from the updatepb.Rejection input message. func (u *Update) onRejectionMsg( rej *updatepb.Rejection, - effects effect.Controller, + eventStore EventStore, ) error { // See comment in onAcceptanceMsg about stateAdmitted. if err := u.checkStateSet(rej, stateSet(stateSent|stateAdmitted)); err != nil { @@ -566,7 +720,15 @@ func (u *Update) onRejectionMsg( return err } u.instrumentation.countRejectionMsg() - return u.reject(rej.Failure, effects) + // Notify the event store so it can fire any completion callbacks that were + // registered at admission time (e.g., after reset/reapply) and clean up + // the update's mutable-state entry. + if err := eventStore.RejectWorkflowExecutionUpdate(u.id, rej.Failure); err != nil { + return err + } + // Clear any buffered AttachCallbacks callbacks — they cannot be delivered for a rejected update. + u.pendingCallbacks = nil + return u.reject(rej.Failure, eventStore) } // rejects an Update with provided failure. @@ -574,6 +736,14 @@ func (u *Update) reject( rejectionFailure *failurepb.Failure, effects effect.Controller, ) error { + if len(u.pendingCallbacks) > 0 { + // Invariant: buffer must be cleared before reject. If we reach here, + // there is a bug in the caller (onRejectionMsg should clear the buffer). + return serviceerror.NewInternalf( + "update %s: reject called with %d pending AttachCallbacks callbacks", + u.id, len(u.pendingCallbacks), + ) + } prevState := u.setState(stateProvisionallyCompleted) effects.OnAfterCommit(func(context.Context) { if u.state != stateProvisionallyCompleted { @@ -675,3 +845,7 @@ func (u *Update) GetSize() int { } return size } + +func (u *Update) AcceptedEventID() int64 { + return u.acceptedEventID +} diff --git a/service/history/workflow/update/update_test.go b/service/history/workflow/update/update_test.go index 89c441f1a2b..1a05b98aeca 100644 --- a/service/history/workflow/update/update_test.go +++ b/service/history/workflow/update/update_test.go @@ -6,12 +6,14 @@ import ( "time" "github.com/stretchr/testify/require" + commonpb "go.temporal.io/api/common/v1" . "go.temporal.io/api/enums/v1" failurepb "go.temporal.io/api/failure/v1" historypb "go.temporal.io/api/history/v1" protocolpb "go.temporal.io/api/protocol/v1" "go.temporal.io/api/serviceerror" updatepb "go.temporal.io/api/update/v1" + workflowpb "go.temporal.io/api/workflow/v1" "go.temporal.io/server/common/effect" "go.temporal.io/server/common/future" "go.temporal.io/server/common/payloads" @@ -1188,3 +1190,410 @@ func assertAborted(t *testing.T, upd *update.Update, expectedErr error) { } } } + +func TestAttachCallbacks(t *testing.T) { + tv := testvars.New(t) + testCallbacks := []*commonpb.Callback{ + { + Variant: &commonpb.Callback_Nexus_{ + Nexus: &commonpb.Callback_Nexus{ + Url: "http://localhost:1234/callback", + }, + }, + }, + } + testRequest := &updatepb.Request{ + Meta: &updatepb.Meta{UpdateId: tv.UpdateID()}, + Input: &updatepb.Input{Name: "not_empty"}, + RequestId: tv.RequestID(), + CompletionCallbacks: testCallbacks, + } + + capturingStore := func(effects *effect.Buffer) (mockEventStore, *[]*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate) { + var captured []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate + store := mockEventStore{ + Controller: effects, + AddWorkflowExecutionOptionsUpdatedEventFunc: func( + _ *workflowpb.VersioningOverride, _ bool, _ string, _ []*commonpb.Callback, _ []*commonpb.Link, _ string, _ *commonpb.Priority, + _ *workflowpb.TimeSkippingConfig, workflowUpdateOptions []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) { + captured = workflowUpdateOptions + return &historypb.HistoryEvent{}, nil + }, + } + return store, &captured + } + + trackingStore := func(effects *effect.Buffer) (mockEventStore, *bool) { + eventCreated := false + store := mockEventStore{ + Controller: effects, + AddWorkflowExecutionOptionsUpdatedEventFunc: func( + _ *workflowpb.VersioningOverride, _ bool, _ string, _ []*commonpb.Callback, _ []*commonpb.Link, _ string, _ *commonpb.Priority, + _ *workflowpb.TimeSkippingConfig, _ []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) { + eventCreated = true + return &historypb.HistoryEvent{}, nil + }, + } + return store, &eventCreated + } + + countingOptionsStore := func(effects *effect.Buffer) (mockEventStore, *int) { + count := 0 + store := mockEventStore{ + Controller: effects, + AddWorkflowExecutionOptionsUpdatedEventFunc: func( + _ *workflowpb.VersioningOverride, _ bool, _ string, _ []*commonpb.Callback, _ []*commonpb.Link, _ string, _ *commonpb.Priority, + _ *workflowpb.TimeSkippingConfig, _ []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) { + count++ + return &historypb.HistoryEvent{}, nil + }, + } + return store, &count + } + + t.Run("on stateAccepted fires callbacks and returns true", func(t *testing.T) { + effects := &effect.Buffer{} + store, capturedOptions := capturingStore(effects) + upd := update.NewAccepted(tv.UpdateID(), testAcceptedEventID) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + require.Len(t, *capturedOptions, 1) + require.Equal(t, tv.UpdateID(), (*capturedOptions)[0].UpdateId) + require.Equal(t, tv.RequestID(), (*capturedOptions)[0].AttachedRequestId) + require.Equal(t, testCallbacks, (*capturedOptions)[0].AttachedCompletionCallbacks) + }) + + t.Run("on stateCompleted returns true without attaching callbacks", func(t *testing.T) { + effects := &effect.Buffer{} + store, eventCreated := trackingStore(effects) + upd := update.NewCompleted(tv.UpdateID(), future.NewReadyFuture[*updatepb.Outcome](successOutcome, nil)) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + require.False(t, *eventCreated, "should not attach callbacks when update is already completed") + }) + + t.Run("on stateCreated returns false without creating event", func(t *testing.T) { + effects := &effect.Buffer{} + store, eventCreated := trackingStore(effects) + upd := update.New(tv.UpdateID()) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.False(t, fired) + require.False(t, *eventCreated) + }) + + t.Run("on stateAdmitted returns false without creating event", func(t *testing.T) { + effects := &effect.Buffer{} + store, eventCreated := trackingStore(effects) + upd := update.NewAdmitted(tv.UpdateID(), nil) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.False(t, fired) + require.False(t, *eventCreated) + }) + + t.Run("on stateSent buffers callbacks and returns true", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + msg := send(t, upd, skipAlreadySent) + require.NotNil(t, msg) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + + // Accept the update — this should flush the buffered callbacks. + require.NoError(t, accept(t, store, upd)) + effects.Apply(context.Background()) + + require.Equal(t, 1, *optionsEventCount, "should flush one buffered callback on acceptance") + }) + + t.Run("on stateSent dedup by requestID buffers only once", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + // Call AttachCallbacks twice with the same requestID. + fired1, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired1) + fired2, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired2) + + require.NoError(t, accept(t, store, upd)) + effects.Apply(context.Background()) + + require.Equal(t, 1, *optionsEventCount, "duplicate requestID should be deduped, only one event written") + }) + + t.Run("on stateSent multiple different requestIDs", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + req1 := &updatepb.Request{ + Meta: &updatepb.Meta{UpdateId: tv.UpdateID()}, + Input: &updatepb.Input{Name: "not_empty"}, + RequestId: "request-1", + CompletionCallbacks: testCallbacks, + } + req2 := &updatepb.Request{ + Meta: &updatepb.Meta{UpdateId: tv.UpdateID()}, + Input: &updatepb.Input{Name: "not_empty"}, + RequestId: "request-2", + CompletionCallbacks: testCallbacks, + } + fired1, err := upd.AttachCallbacks(req1, store) + require.NoError(t, err) + require.True(t, fired1) + fired2, err := upd.AttachCallbacks(req2, store) + require.NoError(t, err) + require.True(t, fired2) + + require.NoError(t, accept(t, store, upd)) + effects.Apply(context.Background()) + + require.Equal(t, 2, *optionsEventCount, "two different requestIDs should produce two events") + }) + + t.Run("on stateSent flush skips already-persisted requestID", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + store.HasRequestIDFunc = func(requestID string) bool { + return requestID == tv.RequestID() + } + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + + require.NoError(t, accept(t, store, upd)) + effects.Apply(context.Background()) + + require.Equal(t, 0, *optionsEventCount, "already-persisted requestID should be skipped during flush") + }) + + t.Run("on stateSent flush error fails acceptance", func(t *testing.T) { + effects := &effect.Buffer{} + store := mockEventStore{ + Controller: effects, + AddWorkflowExecutionOptionsUpdatedEventFunc: func( + _ *workflowpb.VersioningOverride, _ bool, _ string, _ []*commonpb.Callback, _ []*commonpb.Link, _ string, _ *commonpb.Priority, + _ *workflowpb.TimeSkippingConfig, _ []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) { + return nil, serviceerror.NewInternal("flush error") + }, + } + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + + err = accept(t, store, upd) + require.Error(t, err) + require.ErrorContains(t, err, "flush error") + }) + + t.Run("provisional states still return ResourceExhausted", func(t *testing.T) { + effects := &effect.Buffer{} + store := mockEventStore{Controller: effects} + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + // Accept but do NOT apply effects — update is in stateProvisionallyAccepted. + require.NoError(t, accept(t, store, upd)) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.False(t, fired) + require.Error(t, err) + var resourceExhaustedErr *serviceerror.ResourceExhausted + require.ErrorAs(t, err, &resourceExhaustedErr) + }) + + t.Run("on stateSent rejection clears buffer", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + + err = reject(t, store, upd) + require.NoError(t, err) + effects.Apply(context.Background()) + + require.Equal(t, 0, *optionsEventCount, "rejected update should not flush buffered callbacks") + }) + + t.Run("buffered callbacks lost when Update struct is recreated", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + + // Simulate Update struct being lost — create a new one from mutable state. + upd2 := update.NewAdmitted(tv.UpdateID(), nil) + require.NoError(t, accept(t, store, upd2)) + effects.Apply(context.Background()) + + require.Equal(t, 0, *optionsEventCount, + "callbacks buffered on the lost Update struct should NOT be flushed on the new struct's acceptance") + }) + + t.Run("same requestID can be re-buffered on new Update struct after loss", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + upd := update.New(tv.UpdateID()) + mustAdmit(t, store, upd) + effects.Apply(context.Background()) + _ = send(t, upd, skipAlreadySent) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + + // Simulate loss — new struct from mutable state. + upd2 := update.NewAdmitted(tv.UpdateID(), nil) + _ = send(t, upd2, skipAlreadySent) + + // Same requestID can buffer again on new struct. + fired2, err := upd2.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired2) + + require.NoError(t, accept(t, store, upd2)) + effects.Apply(context.Background()) + + require.Equal(t, 1, *optionsEventCount, + "re-buffered callbacks on new struct should be flushed on acceptance") + }) + + t.Run("re-buffered requestID deduped against persisted state after loss", func(t *testing.T) { + effects := &effect.Buffer{} + store, optionsEventCount := countingOptionsStore(effects) + store.HasRequestIDFunc = func(requestID string) bool { + return requestID == tv.RequestID() + } + upd := update.NewAccepted(tv.UpdateID(), testAcceptedEventID) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + require.Equal(t, 0, *optionsEventCount, + "already-persisted requestID should not write another event") + }) + + t.Run("with EventStore error returns error", func(t *testing.T) { + effects := &effect.Buffer{} + store := mockEventStore{ + Controller: effects, + AddWorkflowExecutionOptionsUpdatedEventFunc: func( + _ *workflowpb.VersioningOverride, _ bool, _ string, _ []*commonpb.Callback, _ []*commonpb.Link, _ string, _ *commonpb.Priority, + _ *workflowpb.TimeSkippingConfig, _ []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) { + return nil, serviceerror.NewInternal("store error") + }, + } + upd := update.NewAccepted(tv.UpdateID(), testAcceptedEventID) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.False(t, fired) + require.Error(t, err) + require.ErrorContains(t, err, "store error") + }) + + t.Run("skips event when request has no callbacks and no request ID", func(t *testing.T) { + effects := &effect.Buffer{} + store, eventCreated := trackingStore(effects) + upd := update.NewAccepted(tv.UpdateID(), testAcceptedEventID) + emptyRequest := &updatepb.Request{ + Meta: &updatepb.Meta{UpdateId: tv.UpdateID()}, + Input: &updatepb.Input{Name: "not_empty"}, + } + + fired, err := upd.AttachCallbacks(emptyRequest, store) + require.NoError(t, err) + require.False(t, fired, "should return false when no callbacks to attach — preserves existing caller behavior") + require.False(t, *eventCreated, "should not create event when no callbacks and no request ID") + }) + + t.Run("dedup by requestID on stateAccepted returns true without creating event", func(t *testing.T) { + effects := &effect.Buffer{} + eventCreated := false + store := mockEventStore{ + Controller: effects, + HasRequestIDFunc: func(requestID string) bool { + return requestID == tv.RequestID() + }, + AddWorkflowExecutionOptionsUpdatedEventFunc: func( + _ *workflowpb.VersioningOverride, _ bool, _ string, _ []*commonpb.Callback, _ []*commonpb.Link, _ string, _ *commonpb.Priority, + _ *workflowpb.TimeSkippingConfig, _ []*historypb.WorkflowExecutionOptionsUpdatedEventAttributes_WorkflowUpdateOptionsUpdate, + ) (*historypb.HistoryEvent, error) { + eventCreated = true + return &historypb.HistoryEvent{}, nil + }, + } + upd := update.NewAccepted(tv.UpdateID(), testAcceptedEventID) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired, "should return true so caller can wait on existing update") + require.False(t, eventCreated, "should not create event for duplicate requestID") + }) + + t.Run("different requestID on stateAccepted creates event normally", func(t *testing.T) { + effects := &effect.Buffer{} + store, capturedOptions := capturingStore(effects) + store.HasRequestIDFunc = func(requestID string) bool { + return false // different requestID, not seen before + } + upd := update.NewAccepted(tv.UpdateID(), testAcceptedEventID) + + fired, err := upd.AttachCallbacks(testRequest, store) + require.NoError(t, err) + require.True(t, fired) + require.Len(t, *capturedOptions, 1) + require.Equal(t, tv.UpdateID(), (*capturedOptions)[0].UpdateId) + require.Equal(t, tv.RequestID(), (*capturedOptions)[0].AttachedRequestId) + }) +} diff --git a/service/history/workflow/update/validation.go b/service/history/workflow/update/validation.go index 0308e00e318..a59d4953b1d 100644 --- a/service/history/workflow/update/validation.go +++ b/service/history/workflow/update/validation.go @@ -44,6 +44,15 @@ func validateRequestMsg(updateID string, msg *updatepb.Request) error { return validateRequestMsgPrefix(updateID, "", msg) } +func callbacksRequireRequestID(msg *updatepb.Request) func() error { + return func() error { + if len(msg.GetCompletionCallbacks()) > 0 && msg.GetRequestId() == "" { + return serviceerror.NewInvalidArgumentf("invalid %T: request_id is required when completion_callbacks are set", msg) + } + return nil + } +} + func validateRequestMsgPrefix( updateID string, prefix string, @@ -56,6 +65,7 @@ func validateRequestMsgPrefix( eq(msg.GetMeta().GetUpdateId(), prefix+"meta.update_id", updateID, updateID, msg), notZero(msg.GetInput(), prefix+"input", msg), notZero(msg.GetInput().GetName(), prefix+"input.name", msg), + callbacksRequireRequestID(msg), ) } diff --git a/tests/nexus_workflow_update_test.go b/tests/nexus_workflow_update_test.go new file mode 100644 index 00000000000..6b95c4132c0 --- /dev/null +++ b/tests/nexus_workflow_update_test.go @@ -0,0 +1,1359 @@ +package tests + +import ( + "context" + "encoding/json" + "errors" + "sync/atomic" + "testing" + "time" + + "github.com/google/uuid" + "github.com/nexus-rpc/sdk-go/nexus" + "github.com/stretchr/testify/require" + commonpb "go.temporal.io/api/common/v1" + enumspb "go.temporal.io/api/enums/v1" + updatepb "go.temporal.io/api/update/v1" + "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/sdk/client" + "go.temporal.io/sdk/temporal" + "go.temporal.io/sdk/worker" + "go.temporal.io/sdk/workflow" + "go.temporal.io/server/common" + "go.temporal.io/server/common/dynamicconfig" + "go.temporal.io/server/common/nexus/nexustest" + "go.temporal.io/server/common/testing/await" + "go.temporal.io/server/common/testing/parallelsuite" + "go.temporal.io/server/tests/testcore" +) + +type NexusWorkflowUpdateTestSuite struct { + parallelsuite.Suite[*NexusWorkflowUpdateTestSuite] +} + +func TestNexusWorkflowUpdateTestSuite(t *testing.T) { + parallelsuite.Run(t, &NexusWorkflowUpdateTestSuite{}) +} + +// updateNexusTestConfig holds configuration for workflow update + nexus integration tests. +type updateNexusTestConfig struct { + taskQueue string + childWfID string + updateID string +} + +// newUpdateNexusTestConfig creates a config with randomized names to avoid collisions. +func newUpdateNexusTestConfig(t *testing.T) updateNexusTestConfig { + return updateNexusTestConfig{ + taskQueue: testcore.RandomizeStr(t.Name()), + childWfID: testcore.RandomizeStr("child-workflow-id"), + updateID: "update-id", + } +} + +// makeUpdateWithCallbackHandler creates a nexus handler that sends a workflow update with +// completion callbacks to the specified child workflow. onStart is an optional callback +// invoked at the start of each operation (e.g. for counting invocations). +// If the update is already completed (e.g., the workflow has finished), the handler returns +// the result synchronously instead of starting an async operation with callbacks. +func makeUpdateWithCallbackHandler( + env *NexusTestEnv, + t *testing.T, + cfg updateNexusTestConfig, + onStart func(), +) nexustest.Handler { + return nexustest.Handler{ + OnStartOperation: func( + ctx context.Context, + service, operation string, + input *nexus.LazyValue, + options nexus.StartOperationOptions, + ) (nexus.HandlerStartOperationResult[any], error) { + if onStart != nil { + onStart() + } + resp, err := env.FrontendClient().UpdateWorkflowExecution( + ctx, + &workflowservice.UpdateWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: cfg.childWfID, + }, + WaitPolicy: &updatepb.WaitPolicy{ + LifecycleStage: enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + }, + Request: &updatepb.Request{ + Meta: &updatepb.Meta{ + UpdateId: cfg.updateID, + }, + Input: &updatepb.Input{ + Name: "update", + Args: &commonpb.Payloads{ + Payloads: []*commonpb.Payload{testcore.MustToPayload(t, "test")}, + }, + }, + RequestId: uuid.NewString(), + CompletionCallbacks: []*commonpb.Callback{ + { + Variant: &commonpb.Callback_Nexus_{ + Nexus: &commonpb.Callback_Nexus{ + Url: options.CallbackURL, + Header: options.CallbackHeader, + }, + }, + }, + }, + }, + }, + ) + if err != nil { + return nil, nexus.NewHandlerErrorf(nexus.HandlerErrorTypeInternal, "update call failed: %v", err) + } + // Verify the response contains a link. + link := resp.GetLink() + require.NotNil(t, link, "update response should contain a link") + if workflowEvent := link.GetWorkflowEvent(); workflowEvent != nil { + // Accepted/completed update: link points to the accepted event. + require.Equal(t, cfg.childWfID, workflowEvent.GetWorkflowId()) + require.Equal(t, enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ACCEPTED, workflowEvent.GetRequestIdRef().GetEventType()) + } else if wfLink := link.GetWorkflow(); wfLink != nil { + // Rejected update: link points to the workflow with a reason. + require.Equal(t, cfg.childWfID, wfLink.GetWorkflowId()) + require.Equal(t, "Update rejected", wfLink.GetReason()) + } else { + require.Fail(t, "link should be a workflow event or workflow link") + } + // If the update is already completed, return the result synchronously. + if outcome := resp.GetOutcome(); outcome != nil { + if failure := outcome.GetFailure(); failure != nil { + return nil, &nexus.OperationError{ + State: nexus.OperationStateFailed, + Message: failure.GetMessage(), + } + } + if success := outcome.GetSuccess(); success != nil && len(success.GetPayloads()) > 0 { + var result string + if jsonErr := json.Unmarshal(success.GetPayloads()[0].GetData(), &result); jsonErr == nil { + return &nexus.HandlerStartOperationResultSync[any]{Value: result}, nil + } + } + } + return &nexus.HandlerStartOperationResultAsync{ + OperationToken: "test", + }, nil + }, + } +} + +func enableUpdateCallbacksOpts() []testcore.TestOption { + return []testcore.TestOption{ + testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), + testcore.WithDynamicConfig(dynamicconfig.EnableCHASMCallbacks, true), + testcore.WithDynamicConfig(dynamicconfig.EnableWorkflowUpdateCallbacks, true), + } +} + +// newUpdateChildWorkflow returns a child workflow function that registers an "update" +// handler and waits for a "stop" signal. If blockOnSignal is true, the update handler +// blocks on a "complete-update" signal before returning, which is useful for ensuring +// the update goes through the async path. +func newUpdateChildWorkflow(blockOnSignal bool) func(workflow.Context, string) (string, error) { + return func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + if blockOnSignal { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + } + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } +} + +// getFirstWFTaskCompleteEventID scans the workflow history and returns the event ID +// of the first WorkflowTaskCompleted event. +func (s *NexusWorkflowUpdateTestSuite) getFirstWFTaskCompleteEventID(ctx context.Context, env *NexusTestEnv, workflowID, runID string) int64 { + hist := env.SdkClient().GetWorkflowHistory(ctx, workflowID, runID, false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + for hist.HasNext() { + event, err := hist.Next() + s.NoError(err) + if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_TASK_COMPLETED { + return event.EventId + } + } + s.FailNow("couldn't find a WorkflowTaskCompleted event", "workflowID=%s runID=%s", workflowID, runID) + return 0 +} + +// newSimpleCallerWF returns a caller workflow that executes a nexus operation targeting +// childWfID and returns the string result. +func (s *NexusWorkflowUpdateTestSuite) newSimpleCallerWF(endpointName, childWfID string) func(workflow.Context) (string, error) { + return func(ctx workflow.Context) (string, error) { + nexusClient := workflow.NewNexusClient(endpointName, "test") + fut := nexusClient.ExecuteOperation(ctx, "operation", childWfID, workflow.NexusOperationOptions{}) + var result string + err := fut.Get(ctx, &result) + return result, err + } +} + +// awaitUpdateAccepted polls the workflow history until a WorkflowExecutionUpdateAccepted +// event is found, failing the test if it does not appear within 10 seconds. +func (s *NexusWorkflowUpdateTestSuite) awaitUpdateAccepted(ctx context.Context, env *NexusTestEnv, workflowID, runID string) { + await.Require(env.Context(), s.T(), func(t *await.T) { + hist := env.SdkClient().GetWorkflowHistory(ctx, workflowID, runID, false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + for hist.HasNext() { + event, err := hist.Next() + require.NoError(t, err) + if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ACCEPTED { + return + } + } + require.Fail(t, "update not yet accepted") + }, 10*time.Second, 500*time.Millisecond) +} + +// startWorker creates a worker on the given task queue, registers wfs, starts it, +// and schedules cleanup. +func (s *NexusWorkflowUpdateTestSuite) startWorker(env *NexusTestEnv, taskQueue string, wfs ...any) { + w := worker.New(env.SdkClient(), taskQueue, worker.Options{}) + for _, wf := range wfs { + w.RegisterWorkflow(wf) + } + s.NoError(w.Start()) + s.T().Cleanup(w.Stop) +} + +// requireNexusOperationError asserts that err is a WorkflowExecutionError with an inner NexusOperationError, +// and returns the inner NexusOperationError. +func (s *NexusWorkflowUpdateTestSuite) requireNexusOperationError(err error) *temporal.NexusOperationError { + var wee *temporal.WorkflowExecutionError + s.ErrorAs(err, &wee) + var noe *temporal.NexusOperationError + s.ErrorAs(wee, &noe) + return noe +} + +// assertAcceptedUpdateCompletedWorkflowError asserts the full error chain: +// WorkflowExecutionError -> NexusOperationError -> ApplicationError{Type: "AcceptedUpdateCompletedWorkflow"}. +// Used to assert the correct error for completion callbacks that failed because the update didn't complete +// before the workflow finishes. +func (s *NexusWorkflowUpdateTestSuite) assertAcceptedUpdateCompletedWorkflowError(err error) { + noe := s.requireNexusOperationError(err) + var appErr *temporal.ApplicationError + s.ErrorAs(noe, &appErr) + s.Equal("AcceptedUpdateCompletedWorkflow", appErr.Type()) +} + +// assertReappliedUpdateInNewRun verifies that updateID appears as an UpdateAdmitted event +// in runID's history with completion callbacks preserved. +func (s *NexusWorkflowUpdateTestSuite) assertReappliedUpdateInNewRun(ctx context.Context, env *NexusTestEnv, workflowID, runID, updateID string) { + hist := env.SdkClient().GetWorkflowHistory(ctx, workflowID, runID, false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + found := false + for hist.HasNext() { + event, err := hist.Next() + s.NoError(err) + if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ADMITTED { + attrs := event.GetWorkflowExecutionUpdateAdmittedEventAttributes() + if attrs.GetRequest().GetMeta().GetUpdateId() == updateID { + found = true + s.NotEmpty(attrs.GetRequest().GetCompletionCallbacks(), "reapplied update should preserve completion callbacks") + } + } + } + s.True(found, "expected reapplied UpdateAdmitted event in new run") +} + +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateAsyncNexusOperation() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + childWF := newUpdateChildWorkflow(false) + + callerWF := func(ctx workflow.Context) (string, error) { + cwf := workflow.ExecuteChildWorkflow( + workflow.WithWorkflowID(ctx, cfg.childWfID), + childWF, + "initial input", + ) + var childWE workflow.Execution + if err := cwf.GetChildWorkflowExecution().Get(ctx, &childWE); err != nil { + return "", err + } + nexusClient := workflow.NewNexusClient(endpointName, "test") + fut := nexusClient.ExecuteOperation(ctx, "operation", childWE.ID, workflow.NexusOperationOptions{}) + var result string + err := fut.Get(ctx, &result) + return result, err + } + + s.startWorker(env, cfg.taskQueue, callerWF, childWF) + + run, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + var result string + s.NoError(run.Get(ctx, &result)) + s.Equal("updated: test", result) + + // Verify the child workflow's history contains the update accepted event with callbacks. + childHistory := env.SdkClient().GetWorkflowHistory(ctx, cfg.childWfID, "", false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + foundUpdateAccepted := false + for childHistory.HasNext() { + event, err := childHistory.Next() + s.NoError(err) + if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ACCEPTED { + foundUpdateAccepted = true + attrs := event.GetWorkflowExecutionUpdateAcceptedEventAttributes() + s.NotNil(attrs) + s.Equal(cfg.updateID, attrs.GetAcceptedRequest().GetMeta().GetUpdateId()) + s.NotEmpty(attrs.GetAcceptedRequest().GetCompletionCallbacks()) + break + } + } + s.True(foundUpdateAccepted, "expected to find WorkflowExecutionUpdateAccepted event in child workflow history") +} + +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateAsyncAttachedNexusOperation() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + childWF := newUpdateChildWorkflow(true) + + callerWF := func(ctx workflow.Context) (string, error) { + cwf := workflow.ExecuteChildWorkflow( + workflow.WithWorkflowID(ctx, cfg.childWfID), + childWF, + "initial input", + ) + var childWE workflow.Execution + if err := cwf.GetChildWorkflowExecution().Get(ctx, &childWE); err != nil { + return "", err + } + nexusClient := workflow.NewNexusClient(endpointName, "test") + fut := nexusClient.ExecuteOperation(ctx, "operation", childWE.ID, workflow.NexusOperationOptions{}) + var exec workflow.NexusOperationExecution + if err := fut.GetNexusOperationExecution().Get(ctx, &exec); err != nil { + return "", err + } + // Send a second update to verify attaching after starting works. + afut := nexusClient.ExecuteOperation(ctx, "operation", childWE.ID, workflow.NexusOperationOptions{}) + var aexec workflow.NexusOperationExecution + if err := afut.GetNexusOperationExecution().Get(ctx, &aexec); err != nil { + return "", err + } + // Signal the child to complete the update now that both operations are attached. + if err := workflow.SignalExternalWorkflow(ctx, childWE.ID, "", "complete-update", nil).Get(ctx, nil); err != nil { + return "", err + } + var aresult string + if err := afut.Get(ctx, &aresult); err != nil { + return "", err + } + + var result string + err := fut.Get(ctx, &result) + return result, err + } + + s.startWorker(env, cfg.taskQueue, callerWF, childWF) + + run, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 10 * time.Second, + }, callerWF) + s.NoError(err) + var result string + s.NoError(run.Get(ctx, &result)) + s.Equal("updated: test", result) +} + +// TestWorkflowUpdateCallbackOnAlreadyCompletedUpdate verifies that when a second caller +// sends an update request with the same update ID after the update has already completed, +// the second request returns the result synchronously without attaching a new callback. +// The child workflow should only have one update callback (from the first request). +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateNoCallbackAttachedOnAlreadyCompletedUpdate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "already-completed-update-id" + + var operationCount atomic.Int32 + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, func() { operationCount.Add(1) }) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + childWF := newUpdateChildWorkflow(false) + + // Caller workflow sends two nexus operations targeting the same update. + // The first one triggers the update, the second one arrives after it completes + // and should still get the result via AttachCallbacks. + callerWF := func(ctx workflow.Context) (string, error) { + cwf := workflow.ExecuteChildWorkflow( + workflow.WithWorkflowID(ctx, cfg.childWfID), + childWF, + "initial input", + ) + var childWE workflow.Execution + if err := cwf.GetChildWorkflowExecution().Get(ctx, &childWE); err != nil { + return "", err + } + nexusClient := workflow.NewNexusClient(endpointName, "test") + + // First nexus operation: triggers the update. + fut1 := nexusClient.ExecuteOperation(ctx, "operation", childWE.ID, workflow.NexusOperationOptions{}) + var result1 string + if err := fut1.Get(ctx, &result1); err != nil { + return "", err + } + + // Second nexus operation: targets the same already-completed update. + fut2 := nexusClient.ExecuteOperation(ctx, "operation", childWE.ID, workflow.NexusOperationOptions{}) + var result2 string + if err := fut2.Get(ctx, &result2); err != nil { + return "", err + } + + return result1 + " | " + result2, nil + } + + s.startWorker(env, cfg.taskQueue, callerWF, childWF) + + run, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + var result string + s.NoError(run.Get(ctx, &result)) + s.Equal("updated: test | updated: test", result) + s.Equal(int32(2), operationCount.Load(), "expected two nexus operations to be started") + + // Verify the child workflow has exactly one update callback (from the first request). + // The second request returns synchronously because the update is already completed, + // so no additional callback is attached. + descResp, err := env.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + Execution: &commonpb.WorkflowExecution{ + WorkflowId: cfg.childWfID, + }, + }) + s.NoError(err) + updateCallbackCount := 0 + for _, cb := range descResp.GetCallbacks() { + if cb.GetTrigger().GetUpdateWorkflowExecutionCompleted() != nil { + updateCallbackCount++ + } + } + s.Equal(1, updateCallbackCount, "expected exactly one update callback on the child workflow") + + // Verify the child workflow has the correct request ID infos. + // Each nexus operation generates a unique request ID. If the second operation + // (targeting the already-completed update) had attached its request ID, we would + // see 3 entries instead of 2, or an OPTIONS_UPDATED entry. The count of 2 with + // only STARTED and UPDATE_ACCEPTED types proves the second request ID was not attached. + sdkDescResp, err := env.SdkClient().DescribeWorkflowExecution(ctx, cfg.childWfID, "") + s.NoError(err) + requestIDInfos := sdkDescResp.GetWorkflowExtendedInfo().GetRequestIdInfos() + s.NotNil(requestIDInfos) + s.Len(requestIDInfos, 2, "expected exactly 2 request ID infos: second operation should not attach") + cntStarted := 0 + cntAccepted := 0 + for _, info := range requestIDInfos { + s.False(info.Buffered) + s.GreaterOrEqual(info.EventId, common.FirstEventID) + s.NotEqual( + enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_OPTIONS_UPDATED, + info.EventType, + "second operation targeting completed update should not create an OPTIONS_UPDATED request ID", + ) + switch info.EventType { + case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_STARTED: + cntStarted++ + case enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ACCEPTED: + cntAccepted++ + default: + s.Failf("unexpected event type in request ID info", "got %v", info.EventType) + } + } + s.Equal(1, cntStarted, "expected one STARTED request ID info") + s.Equal(1, cntAccepted, "expected one UPDATE_ACCEPTED request ID info from first update acceptance") +} + +// TestDescribeWorkflowShowsUpdateCallbacks verifies that DescribeWorkflowExecution +// returns update-level callbacks after an update with callbacks is sent. +func (s *NexusWorkflowUpdateTestSuite) TestDescribeWorkflowShowsUpdateCallbacks() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + taskQueue := testcore.RandomizeStr(s.T().Name()) + updateID := "describe-callback-update-id" + callbackURL := "http://localhost:9999/callback" + + wf := func(ctx workflow.Context) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + // Wait for a signal so update stays in-progress while we describe. + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done", nil + } + + s.startWorker(env, taskQueue, wf) + + run, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: taskQueue, + }, wf) + s.NoError(err) + + // Send update with completion callbacks (don't wait for completion). + testPayload := testcore.MustToPayload(s.T(), "test") + updateDone := make(chan struct{}) + go func() { + defer close(updateDone) + _, _ = env.FrontendClient().UpdateWorkflowExecution(ctx, &workflowservice.UpdateWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: run.GetID(), + RunId: run.GetRunID(), + }, + WaitPolicy: &updatepb.WaitPolicy{ + LifecycleStage: enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + }, + Request: &updatepb.Request{ + Meta: &updatepb.Meta{ + UpdateId: updateID, + }, + Input: &updatepb.Input{ + Name: "update", + Args: &commonpb.Payloads{ + Payloads: []*commonpb.Payload{testPayload}, + }, + }, + RequestId: uuid.NewString(), + CompletionCallbacks: []*commonpb.Callback{ + { + Variant: &commonpb.Callback_Nexus_{ + Nexus: &commonpb.Callback_Nexus{ + Url: callbackURL, + }, + }, + }, + }, + }, + }) + }() + + // Wait until the update is accepted by checking DescribeWorkflowExecution. + await.Require(env.Context(), s.T(), func(t *await.T) { + desc, err := env.SdkClient().DescribeWorkflowExecution(ctx, run.GetID(), run.GetRunID()) + require.NoError(t, err) + require.NotNil(t, desc.GetCallbacks(), "callbacks should be present") + found := false + for _, cb := range desc.GetCallbacks() { + if cb.GetCallback().GetNexus().GetUrl() == callbackURL { + found = true + // Verify the trigger references the update. + trigger := cb.GetTrigger() + require.NotNil(t, trigger) + updateTrigger := trigger.GetUpdateWorkflowExecutionCompleted() + if updateTrigger != nil { + require.Equal(t, updateID, updateTrigger.GetUpdateId()) + } + } + } + require.True(t, found, "expected to find callback with URL %s", callbackURL) + }, 10*time.Second, 500*time.Millisecond) + + // Complete the update and stop the workflow. + s.NoError(env.SdkClient().SignalWorkflow(ctx, run.GetID(), run.GetRunID(), "complete-update", nil)) + <-updateDone + s.NoError(env.SdkClient().SignalWorkflow(ctx, run.GetID(), run.GetRunID(), "stop", nil)) +} + +// TestWorkflowUpdateCallbackAfterResetInflightUpdate verifies that when a workflow is +// reset while an update with completion callbacks is in-flight (accepted but not completed), +// the update is reapplied in the new run and the callback fires when the update completes. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackAfterResetInflightUpdate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler blocks on "complete-update" signal so the update + // stays in-flight while we perform the reset. + targetWF := func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } + + // Start target workflow independently (not as child) to avoid parent-child + // complications during reset. + s.startWorker(env, targetTaskQueue, targetWF) + + targetRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation that triggers the update with callbacks. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // Wait for the update to be accepted on the target workflow. + s.awaitUpdateAccepted(ctx, env, cfg.childWfID, targetRun.GetRunID()) + + // Reset the target workflow to the first WFT completed event (before the update). + resetResp, err := env.FrontendClient().ResetWorkflowExecution(ctx, &workflowservice.ResetWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: cfg.childWfID, + RunId: targetRun.GetRunID(), + }, + Reason: "test reset with inflight update", + RequestId: uuid.NewString(), + WorkflowTaskFinishEventId: s.getFirstWFTaskCompleteEventID(ctx, env, cfg.childWfID, targetRun.GetRunID()), + }) + s.NoError(err) + + // Verify the update was reapplied in the new run's history. + s.assertReappliedUpdateInNewRun(ctx, env, cfg.childWfID, resetResp.RunId, cfg.updateID) + + // Signal the new run to complete the update, which should trigger the callback. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, resetResp.RunId, "complete-update", nil)) + + // The callback fires -> nexus operation completes -> caller gets the result. + var result string + s.NoError(callerRun.Get(ctx, &result)) + s.Equal("updated: test", result) + + // Clean up: stop the new run of the target workflow. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, resetResp.RunId, "stop", nil)) +} + +// TestWorkflowUpdateCallbackAfterResetRejectedUpdate verifies that when a workflow is +// reset while an update with completion callbacks is in-flight (accepted but not completed), +// and the new run's workflow code rejects the reapplied update via a validator, the +// completion callback fires with a failure and the caller's nexus operation fails. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackAfterResetRejectedUpdate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Use a shared flag to switch behavior between runs. In the first run the + // update is accepted (and blocks); after we flip the flag the validator + // rejects every update. + var shouldReject atomic.Bool + + // Single workflow function used for both runs. + targetWF := func(ctx workflow.Context, input string) (string, error) { + err := workflow.SetUpdateHandlerWithOptions(ctx, "update", + func(ctx workflow.Context, input string) (string, error) { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }, + workflow.UpdateHandlerOptions{ + Validator: func(ctx workflow.Context, input string) error { + if shouldReject.Load() { + return errors.New("update rejected after reset") + } + return nil + }, + }, + ) + if err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } + + s.startWorker(env, targetTaskQueue, targetWF) + + targetRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation that triggers the update with callbacks. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // Wait for the update to be accepted on the target workflow. + s.awaitUpdateAccepted(ctx, env, cfg.childWfID, targetRun.GetRunID()) + + // Flip the flag so the validator rejects updates in the new run. + shouldReject.Store(true) + + // Reset the target workflow to the first WFT completed event (before the update). + resetResp, err := env.FrontendClient().ResetWorkflowExecution(ctx, &workflowservice.ResetWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: cfg.childWfID, + RunId: targetRun.GetRunID(), + }, + Reason: "test reset with inflight update expecting rejection", + RequestId: uuid.NewString(), + WorkflowTaskFinishEventId: s.getFirstWFTaskCompleteEventID(ctx, env, cfg.childWfID, targetRun.GetRunID()), + }) + s.NoError(err) + + // Verify the update was reapplied in the new run's history. + s.assertReappliedUpdateInNewRun(ctx, env, cfg.childWfID, resetResp.RunId, cfg.updateID) + + // The reapplied update is rejected by the validator -> callback fires with failure -> + // nexus operation fails -> caller workflow fails. + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the reapplied update was rejected") + + // Verify it's a NexusOperationError wrapping the rejection failure. + _ = s.requireNexusOperationError(err) + + // Clean up: stop the new run of the target workflow. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, resetResp.RunId, "stop", nil)) +} + +// TestWorkflowUpdateCallbackAfterResetCompletedUpdate verifies that when a workflow is +// reset after an update with callbacks has already completed, the update is reapplied in +// the new run, completes again, and a new nexus operation targeting the same update ID +// receives the result via the AttachCallbacks path. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackAfterResetCompletedUpdate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "reset-completed-update-id" + + var operationCount atomic.Int32 + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, func() { operationCount.Add(1) }) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler completes immediately. + targetWF := newUpdateChildWorkflow(false) + + s.startWorker(env, targetTaskQueue, targetWF) + + targetRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a single nexus operation. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + // First caller: triggers the update, it completes, callback fires. + run1, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + var result1 string + s.NoError(run1.Get(ctx, &result1)) + s.Equal("updated: test", result1) + + // Reset the target workflow to before the update. + resetResp, err := env.FrontendClient().ResetWorkflowExecution(ctx, &workflowservice.ResetWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: cfg.childWfID, + RunId: targetRun.GetRunID(), + }, + Reason: "test reset with completed update", + RequestId: uuid.NewString(), + WorkflowTaskFinishEventId: s.getFirstWFTaskCompleteEventID(ctx, env, cfg.childWfID, targetRun.GetRunID()), + }) + s.NoError(err) + + // The update is reapplied and completes again in the new run. + // Wait for the update to complete in the new run before sending the second operation. + await.Require(env.Context(), s.T(), func(t *await.T) { + hist := env.SdkClient().GetWorkflowHistory(ctx, cfg.childWfID, resetResp.RunId, false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + for hist.HasNext() { + event, err := hist.Next() + require.NoError(t, err) + if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_COMPLETED { + return + } + } + require.Fail(t, "update not yet completed in new run") + }, 10*time.Second, 500*time.Millisecond) + + // Second caller: sends a new nexus operation targeting the same update ID. + // Since the update is already completed in the new run, AttachCallbacks fires the callback. + run2, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + var result2 string + s.NoError(run2.Get(ctx, &result2)) + s.Equal("updated: test", result2) + + s.Equal(int32(2), operationCount.Load(), "expected two nexus operations to be started") + + // Clean up: stop the new run of the target workflow. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, resetResp.RunId, "stop", nil)) +} + +// TestWorkflowUpdateSyncReturnForCompletedWorkflow verifies that when a second nexus +// operation targets the same update ID on a workflow that has already completed, the +// handler detects the update is already completed and returns the result synchronously +// (instead of starting an async operation with callbacks). +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateSyncReturnForCompletedWorkflow() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "sync-return-completed-wf-update-id" + + var operationCount atomic.Int32 + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, func() { operationCount.Add(1) }) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler completes immediately. + targetWF := newUpdateChildWorkflow(false) + + s.startWorker(env, targetTaskQueue, targetWF) + + targetRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a single nexus operation. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + // First caller: triggers the update, it completes, callback fires. + run1, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + var result1 string + s.NoError(run1.Get(ctx, &result1)) + s.Equal("updated: test", result1) + + // Complete the target workflow by sending the "stop" signal. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, targetRun.GetRunID(), "stop", nil)) + + // Wait for the target workflow to complete. + var targetResult string + s.NoError(targetRun.Get(ctx, &targetResult)) + + // Second caller: sends a new nexus operation targeting the same update ID. + // Since the workflow is completed and the update was already completed, + // UpdateWorkflowExecution returns the outcome directly -> handler returns sync. + run2, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + var result2 string + s.NoError(run2.Get(ctx, &result2)) + s.Equal("updated: test", result2) + + s.Equal(int32(2), operationCount.Load(), "expected two nexus operations to be started") +} + +// TestWorkflowUpdateCallbackOnFailedUpdate verifies that when an update handler returns +// an error (update completes with a failure outcome), the completion callback fires and +// the caller's nexus operation completes with a failure. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackOnFailedUpdate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "failed-update-id" + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler returns an error after acceptance. + targetWF := func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + return "", temporal.NewApplicationError("update handler failed", "UpdateFailed", nil) + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } + + s.startWorker(env, targetTaskQueue, targetWF) + + _, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation targeting the child. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // The update is accepted but the handler returns an error -> update completes with + // failure -> callback fires -> nexus operation fails -> caller workflow fails. + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the update failed") + + // Verify it's a NexusOperationError wrapping the update failure. + _ = s.requireNexusOperationError(err) + + // Clean up: stop the target workflow. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, "", "stop", nil)) +} + +// TestWorkflowUpdateCallbackOnWorkflowTerminate verifies that when a workflow is +// terminated while an update with completion callbacks is in-flight (accepted, handler +// blocking), the ProcessCloseCallbacks mechanism fires the callback and the caller's +// nexus operation completes. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackOnWorkflowTerminate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "terminate-update-id" + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler blocks on a signal so it stays in-flight. + targetWF := func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } + + s.startWorker(env, targetTaskQueue, targetWF) + + _, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation targeting the child. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // Wait for the update to be accepted on the target. + s.awaitUpdateAccepted(ctx, env, cfg.childWfID, "") + + // Terminate the target workflow while the update is in-flight. + // ProcessCloseCallbacks should fire the update-level callbacks. + s.NoError(env.SdkClient().TerminateWorkflow(ctx, cfg.childWfID, "", "testing terminate with inflight update callback")) + + // The callback fires -> nexus operation completes -> caller workflow finishes. + // The caller should get an error (the nexus operation failed because the + // target was terminated). + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the target was terminated") + s.assertAcceptedUpdateCompletedWorkflowError(err) +} + +// TestWorkflowUpdateCallbackOnWorkflowComplete verifies that when a workflow completes +// normally while an update with completion callbacks is in-flight (accepted, handler +// blocking), the ProcessCloseCallbacks mechanism fires the callback and the caller's +// nexus operation completes with a failure (the run closes without completing the update). +// This exercises mutable_state_impl.go processCloseCallbacksChasm -> wf.ProcessCloseCallbacks. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackOnWorkflowComplete() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "complete-wf-update-id" + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Update handler blocks on "complete-update" signal so the update stays in-flight + // while the workflow itself completes via the "stop" signal. + targetWF := newUpdateChildWorkflow(true) + + s.startWorker(env, targetTaskQueue, targetWF) + + _, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation targeting the child. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // Wait for the update to be accepted on the target. + s.awaitUpdateAccepted(ctx, env, cfg.childWfID, "") + + // Complete the target workflow normally while the update is still in-flight. + // processCloseCallbacksChasm fires the update-level callbacks on workflow close. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, "", "stop", nil)) + + // The callback fires -> nexus operation completes with failure -> caller workflow fails. + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the target completed while update was in-flight") + s.assertAcceptedUpdateCompletedWorkflowError(err) +} + +// TestWorkflowUpdateCallbackOnWorkflowContinueAsNew verifies that when a workflow +// continues-as-new while an update with completion callbacks is in-flight (accepted, +// handler blocking), the update callbacks are fired and the caller's nexus operation +// completes with a failure (the old run is closed). +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackOnWorkflowContinueAsNew() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "continue-as-new-update-id" + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler blocks on a signal so it stays in-flight. + // When "continue-as-new" signal is received, the workflow continues as new. + var targetWF func(ctx workflow.Context, input string) (string, error) + targetWF = func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "continue-as-new") + signalCh.Receive(ctx, nil) + return "", workflow.NewContinueAsNewError(ctx, targetWF, "continued") + } + + s.startWorker(env, targetTaskQueue, targetWF) + + _, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation targeting the child. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // Wait for the update to be accepted on the target. + s.awaitUpdateAccepted(ctx, env, cfg.childWfID, "") + + // Signal the target workflow to continue-as-new while the update is in-flight. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, "", "continue-as-new", nil)) + + // The callback fires -> nexus operation completes -> caller workflow finishes. + // The caller should get an error (the nexus operation failed because the + // target continued as new and the update was aborted). + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the target continued as new") + s.assertAcceptedUpdateCompletedWorkflowError(err) +} + +// TestWorkflowUpdateCallbackOnWorkflowFailedWithRetry verifies that when a workflow +// fails with a retry policy (RetryState=IN_PROGRESS) while an update with completion +// callbacks is in-flight (accepted, handler blocking), the update callbacks are fired +// and the caller's nexus operation completes with a failure (the old run is closed). +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackOnWorkflowFailedWithRetry() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "failed-retry-update-id" + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: update handler blocks on a signal so it stays in-flight. + // When "fail" signal is received, the workflow returns an error (which will + // be retried due to the retry policy). + targetWF := func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "fail") + signalCh.Receive(ctx, nil) + return "", errors.New("intentional failure for retry test") + } + + s.startWorker(env, targetTaskQueue, targetWF) + + _, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + RetryPolicy: &temporal.RetryPolicy{ + InitialInterval: 1 * time.Second, + MaximumAttempts: 3, + BackoffCoefficient: 1, + }, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation targeting the child. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // Wait for the update to be accepted on the target. + s.awaitUpdateAccepted(ctx, env, cfg.childWfID, "") + + // Signal the target workflow to fail while the update is in-flight. + // The retry policy will cause a new run to be created. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, "", "fail", nil)) + + // The callback fires -> nexus operation completes -> caller workflow finishes. + // The caller should get an error (the nexus operation failed because the + // target failed and the update was aborted). + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the target workflow failed with retry") + s.assertAcceptedUpdateCompletedWorkflowError(err) +} + +// TestWorkflowUpdateCallbackOnRejectedUpdate verifies that when an update is rejected +// by the workflow's validator, the nexus handler detects the rejection (which is returned +// as a completed update with a failure outcome) and returns a synchronous failure to the +// caller. This tests the proper handling of rejection in the callback flow. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateCallbackOnRejectedUpdate() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + cfg := newUpdateNexusTestConfig(s.T()) + cfg.updateID = "rejected-update-id" + + h := makeUpdateWithCallbackHandler(env, s.T(), cfg, nil) + endpointName := env.createRandomExternalNexusServer(ctx, s.T(), h) + + targetTaskQueue := testcore.RandomizeStr("target-" + s.T().Name()) + + // Target workflow: validator rejects all updates. + targetWF := func(ctx workflow.Context, input string) (string, error) { + err := workflow.SetUpdateHandlerWithOptions(ctx, "update", + func(ctx workflow.Context, input string) (string, error) { + return "updated: " + input, nil + }, + workflow.UpdateHandlerOptions{ + Validator: func(ctx workflow.Context, input string) error { + return errors.New("update rejected by validator") + }, + }, + ) + if err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } + + s.startWorker(env, targetTaskQueue, targetWF) + + _, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + ID: cfg.childWfID, + TaskQueue: targetTaskQueue, + }, targetWF, "initial input") + s.NoError(err) + + // Caller workflow sends a nexus operation targeting the child. + callerWF := s.newSimpleCallerWF(endpointName, cfg.childWfID) + + s.startWorker(env, cfg.taskQueue, callerWF) + + callerRun, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: cfg.taskQueue, + WorkflowExecutionTimeout: 30 * time.Second, + }, callerWF) + s.NoError(err) + + // The update is rejected by the validator -> nexus handler detects rejection and + // returns sync failure -> nexus operation fails -> caller workflow fails. + var result string + err = callerRun.Get(ctx, &result) + s.Error(err, "expected caller workflow to fail because the update was rejected") + + // Verify it's a NexusOperationError containing the rejection message. + noe := s.requireNexusOperationError(err) + s.Contains(noe.Error(), "update rejected by validator") + + // Clean up: stop the target workflow. + s.NoError(env.SdkClient().SignalWorkflow(ctx, cfg.childWfID, "", "stop", nil)) +} + +// TestWorkflowUpdateRequestIDInAcceptedEvent verifies that when an update request includes +// a RequestId, it is preserved in the WorkflowExecutionUpdateAccepted event's AcceptedRequest. +func (s *NexusWorkflowUpdateTestSuite) TestWorkflowUpdateRequestIDInAcceptedEvent() { + env := newNexusTestEnv(s.T(), true, enableUpdateCallbacksOpts()...) + ctx := testcore.NewContext() + taskQueue := testcore.RandomizeStr(s.T().Name()) + updateID := "request-id-accepted-test" + requestID := uuid.NewString() + + wf := newUpdateChildWorkflow(false) + s.startWorker(env, taskQueue, wf) + + run, err := env.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: taskQueue, + }, wf, "initial input") + s.NoError(err) + + // Send an update with a specific RequestId and wait for completion. + _, err = env.FrontendClient().UpdateWorkflowExecution(ctx, &workflowservice.UpdateWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{ + WorkflowId: run.GetID(), + RunId: run.GetRunID(), + }, + WaitPolicy: &updatepb.WaitPolicy{ + LifecycleStage: enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + }, + Request: &updatepb.Request{ + Meta: &updatepb.Meta{ + UpdateId: updateID, + }, + Input: &updatepb.Input{ + Name: "update", + Args: &commonpb.Payloads{ + Payloads: []*commonpb.Payload{testcore.MustToPayload(s.T(), "test")}, + }, + }, + RequestId: requestID, + }, + }) + s.NoError(err) + + // Verify the accepted event contains the request ID in the AcceptedRequest. + hist := env.SdkClient().GetWorkflowHistory(ctx, run.GetID(), run.GetRunID(), false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + foundAccepted := false + for hist.HasNext() { + event, err := hist.Next() + s.NoError(err) + if event.EventType == enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_UPDATE_ACCEPTED { + foundAccepted = true + attrs := event.GetWorkflowExecutionUpdateAcceptedEventAttributes() + s.NotNil(attrs) + s.Equal(updateID, attrs.GetAcceptedRequest().GetMeta().GetUpdateId()) + s.Equal(requestID, attrs.GetAcceptedRequest().GetRequestId()) + break + } + } + s.True(foundAccepted, "expected to find WorkflowExecutionUpdateAccepted event") + + // Clean up. + s.NoError(env.SdkClient().SignalWorkflow(ctx, run.GetID(), run.GetRunID(), "stop", nil)) +} diff --git a/tests/update_workflow_sdk_test.go b/tests/update_workflow_sdk_test.go index 48ce8124797..8dc4902469d 100644 --- a/tests/update_workflow_sdk_test.go +++ b/tests/update_workflow_sdk_test.go @@ -6,14 +6,18 @@ import ( "testing" "time" + "github.com/google/uuid" "github.com/stretchr/testify/suite" + commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" "go.temporal.io/api/serviceerror" updatepb "go.temporal.io/api/update/v1" "go.temporal.io/api/workflowservice/v1" sdkclient "go.temporal.io/sdk/client" "go.temporal.io/sdk/temporal" + "go.temporal.io/sdk/worker" "go.temporal.io/sdk/workflow" + "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/namespace" "go.temporal.io/server/common/testing/testvars" "go.temporal.io/server/tests/testcore" @@ -398,3 +402,95 @@ func (s *UpdateWorkflowSdkSuite) pollUpdate(ctx context.Context, tv *testvars.Te WaitPolicy: waitPolicy, }) } + +// TestUpdateSameRequestIDDeduplicatesCallbacks verifies requestID-based +// deduplication in AttachCallbacks. The update blocks (stays in stateAccepted), then: +// - A second request with the same requestID is deduped (no new callback). +// - A third request with a different requestID creates an additional callback. +// +// The workflow should end up with exactly 2 update callbacks (from requestID1 and requestID2). +func (s *UpdateWorkflowSdkSuite) TestUpdateSameRequestIDDeduplicatesCallbacks() { + s.OverrideDynamicConfig(dynamicconfig.EnableChasm, true) + s.OverrideDynamicConfig(dynamicconfig.EnableCHASMCallbacks, true) + s.OverrideDynamicConfig(dynamicconfig.EnableWorkflowUpdateCallbacks, true) + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + taskQueue := testcore.RandomizeStr(s.T().Name()) + updateID := "dedup-callbacks-test" + requestID1 := uuid.NewString() + requestID2 := uuid.NewString() + + // Workflow where the update handler blocks until signaled. + wf := func(ctx workflow.Context, input string) (string, error) { + if err := workflow.SetUpdateHandler(ctx, "update", func(ctx workflow.Context, input string) (string, error) { + signalCh := workflow.GetSignalChannel(ctx, "complete-update") + signalCh.Receive(ctx, nil) + return "updated: " + input, nil + }); err != nil { + return "", err + } + signalCh := workflow.GetSignalChannel(ctx, "stop") + signalCh.Receive(ctx, nil) + return "done: " + input, nil + } + + w := worker.New(s.SdkClient(), taskQueue, worker.Options{}) + w.RegisterWorkflow(wf) + s.NoError(w.Start()) + s.T().Cleanup(w.Stop) + + run, err := s.SdkClient().ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{ + ID: testcore.RandomizeStr("wf"), + TaskQueue: taskQueue, + }, wf, "input") + s.NoError(err) + + makeRequest := func(reqID string) *workflowservice.UpdateWorkflowExecutionRequest { + return &workflowservice.UpdateWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowExecution: &commonpb.WorkflowExecution{WorkflowId: run.GetID()}, + WaitPolicy: &updatepb.WaitPolicy{LifecycleStage: enumspb.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED}, + Request: &updatepb.Request{ + Meta: &updatepb.Meta{UpdateId: updateID}, + Input: &updatepb.Input{Name: "update", Args: &commonpb.Payloads{Payloads: []*commonpb.Payload{testcore.MustToPayload(s.T(), "test")}}}, + RequestId: reqID, + CompletionCallbacks: []*commonpb.Callback{{ + Variant: &commonpb.Callback_Nexus_{Nexus: &commonpb.Callback_Nexus{Url: "http://localhost:9999/callback"}}, + }}, + }, + } + } + + // First request: triggers the update, waits for acceptance (update blocks in handler). + _, err = s.FrontendClient().UpdateWorkflowExecution(ctx, makeRequest(requestID1)) + s.NoError(err) + + // Second request: same requestID → should be deduped by AttachCallbacks (no new callback). + _, err = s.FrontendClient().UpdateWorkflowExecution(ctx, makeRequest(requestID1)) + s.NoError(err) + + // Third request: different requestID → should create a new callback via AttachCallbacks. + _, err = s.FrontendClient().UpdateWorkflowExecution(ctx, makeRequest(requestID2)) + s.NoError(err) + + // Verify exactly 2 update callbacks: one from requestID1 (first request), + // one from requestID2 (third request). The second request was deduped. + descResp, err := s.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: run.GetID()}, + }) + s.NoError(err) + updateCallbackCount := 0 + for _, cb := range descResp.GetCallbacks() { + if cb.GetTrigger().GetUpdateWorkflowExecutionCompleted() != nil { + updateCallbackCount++ + } + } + s.Equal(2, updateCallbackCount, "expected 2 callbacks: requestID1 (original) + requestID2 (new), with duplicate requestID1 deduped") + + // Clean up. + s.NoError(s.SdkClient().SignalWorkflow(ctx, run.GetID(), run.GetRunID(), "complete-update", nil)) + s.NoError(s.SdkClient().SignalWorkflow(ctx, run.GetID(), run.GetRunID(), "stop", nil)) +} From af1a2fbe2dfa8d3df274d8ad86310d16e814e44c Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Tue, 19 May 2026 18:00:44 -0700 Subject: [PATCH 63/73] Make temporal worker optional in tests (#10302) ## What changed? The Temporal worker service is now opt-in for `testcore.NewEnv`. Tests that need scheduler / batcher / worker-deployment system workflows opt in explicitly: ```go env := testcore.NewEnv(t, testcore.WithWorkerService("V1 scheduler")) ``` `FunctionalTestBase`-based tests are unchanged. ## Why? Profiling a 5-suite parallelsuite selection showed the system worker service was responsible for ~55% of live memory and ~4s of test wall time across all three persistence backends. Measured impact on the selection (sqlite, locally): | Metric | Before | After | |---|---|---| | Test wall (sqlite) | 33.0s | 29.1s | | Live memory (inuse_space) | 200 MB | 89 MB | --- tests/activity_api_batch_reset_test.go | 1 + tests/activity_api_batch_unpause_test.go | 6 +- .../activity_api_batch_update_options_test.go | 10 ++- tests/schedule_migration_test.go | 49 +++++++----- tests/schedule_test.go | 80 ++++++++++--------- tests/task_queue_stats_test.go | 1 + tests/testcore/functional_test_base.go | 11 ++- tests/testcore/test_cluster_pool.go | 3 +- tests/testcore/test_env.go | 10 +++ 9 files changed, 108 insertions(+), 63 deletions(-) diff --git a/tests/activity_api_batch_reset_test.go b/tests/activity_api_batch_reset_test.go index 43e17d157e9..332e945bf5a 100644 --- a/tests/activity_api_batch_reset_test.go +++ b/tests/activity_api_batch_reset_test.go @@ -32,6 +32,7 @@ func TestActivityAPIBatchResetClientTestSuite(t *testing.T) { func newBatchResetEnv(t *testing.T) *testcore.TestEnv { return testcore.NewEnv( t, + testcore.WithWorkerService("batch operations"), // These tests intentionally start multiple batch operations in the same namespace. // The default per-namespace limit is 1, so raise it to the functional test limit. testcore.WithDynamicConfig(dynamicconfig.FrontendMaxConcurrentBatchOperationPerNamespace, testcore.ClientSuiteLimit), diff --git a/tests/activity_api_batch_unpause_test.go b/tests/activity_api_batch_unpause_test.go index 1c3be332f63..f8261ea06cb 100644 --- a/tests/activity_api_batch_unpause_test.go +++ b/tests/activity_api_batch_unpause_test.go @@ -92,7 +92,7 @@ func (s *ActivityApiBatchUnpauseClientTestSuite) createWorkflow(env *testcore.Te } func (s *ActivityApiBatchUnpauseClientTestSuite) TestActivityBatchUnpause_Success() { - env := testcore.NewEnv(s.T()) + env := testcore.NewEnv(s.T(), testcore.WithWorkerService("batch operations")) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() @@ -199,7 +199,7 @@ func (s *ActivityApiBatchUnpauseClientTestSuite) TestActivityBatchUnpause_Succes } func (s *ActivityApiBatchUnpauseClientTestSuite) TestActivityBatchUnpause_Failed() { - env := testcore.NewEnv(s.T()) + env := testcore.NewEnv(s.T(), testcore.WithWorkerService("batch operations")) // neither activity type not "match all" is provided _, err := env.SdkClient().WorkflowService().StartBatchOperation(context.Background(), &workflowservice.StartBatchOperationRequest{ @@ -237,7 +237,7 @@ func (s *ActivityApiBatchUnpauseClientTestSuite) TestActivityBatchUnpause_Failed // This is an end-to-end complement to the unit-level checkNamespace tests: it // exercises the full path from StartBatchOperation through the batcher worker. func (s *ActivityApiBatchUnpauseClientTestSuite) TestBatchTerminate_NamespaceIsolation() { - env := testcore.NewEnv(s.T()) + env := testcore.NewEnv(s.T(), testcore.WithWorkerService("batch operations")) ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) defer cancel() diff --git a/tests/activity_api_batch_update_options_test.go b/tests/activity_api_batch_update_options_test.go index e095d62fce9..5c34ca96de6 100644 --- a/tests/activity_api_batch_update_options_test.go +++ b/tests/activity_api_batch_update_options_test.go @@ -45,7 +45,10 @@ func (s *ActivityAPIBatchUpdateOptionsSuite) createBatchUpdateOptionsWorkflow(en } func (s *ActivityAPIBatchUpdateOptionsSuite) TestActivityBatchUpdateOptionsSuccess() { - env := testcore.NewEnv(s.T(), testcore.WithDynamicConfig(dynamicconfig.FrontendMaxConcurrentBatchOperationPerNamespace, testcore.ClientSuiteLimit)) + env := testcore.NewEnv(s.T(), + testcore.WithWorkerService("batch operations"), + testcore.WithDynamicConfig(dynamicconfig.FrontendMaxConcurrentBatchOperationPerNamespace, testcore.ClientSuiteLimit), + ) ctx := env.Context() @@ -190,7 +193,10 @@ func (s *ActivityAPIBatchUpdateOptionsSuite) TestActivityBatchUpdateOptionsSucce } func (s *ActivityAPIBatchUpdateOptionsSuite) TestActivityBatchUpdateOptionsFailed() { - env := testcore.NewEnv(s.T(), testcore.WithDynamicConfig(dynamicconfig.FrontendMaxConcurrentBatchOperationPerNamespace, testcore.ClientSuiteLimit)) + env := testcore.NewEnv(s.T(), + testcore.WithWorkerService("batch operations"), + testcore.WithDynamicConfig(dynamicconfig.FrontendMaxConcurrentBatchOperationPerNamespace, testcore.ClientSuiteLimit), + ) // neither activity type nor "match all" is provided _, err := env.SdkClient().WorkflowService().StartBatchOperation(env.Context(), &workflowservice.StartBatchOperationRequest{ diff --git a/tests/schedule_migration_test.go b/tests/schedule_migration_test.go index 966d00b937c..1c54980070a 100644 --- a/tests/schedule_migration_test.go +++ b/tests/schedule_migration_test.go @@ -49,6 +49,7 @@ func TestScheduleMigrationTestSuite(t *testing.T) { func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2AlreadyExists() { env := testcore.NewEnv( s.T(), + testcore.WithWorkerService("scheduler operations"), testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), ) @@ -87,7 +88,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2AlreadyExists() { ScheduleId: sid, Schedule: sched, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }, }, ) @@ -144,7 +145,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2AlreadyExists() { TaskQueue: &taskqueuepb.TaskQueue{Name: primitives.PerNSWorkerTaskQueue}, Input: inputPayloads, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), WorkflowIdReusePolicy: enumspb.WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE, WorkflowIdConflictPolicy: enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL, } @@ -175,7 +176,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2AlreadyExists() { ScheduleId: sid, Target: adminservice.MigrateScheduleRequest_SCHEDULER_TARGET_CHASM, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }) s.NoError(err) @@ -210,6 +211,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2AlreadyExists() { func (s *ScheduleMigrationTestSuite) TestScheduleMigrationDynamicConfig() { env := testcore.NewEnv( s.T(), + testcore.WithWorkerService("scheduler operations"), testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerMigration, true), ) @@ -259,7 +261,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationDynamicConfig() { TaskQueue: &taskqueuepb.TaskQueue{Name: primitives.PerNSWorkerTaskQueue}, Input: inputPayloads, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), WorkflowIdReusePolicy: enumspb.WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE, WorkflowIdConflictPolicy: enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL, } @@ -319,6 +321,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationDynamicConfig() { func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV1ToV2() { env := testcore.NewEnv( s.T(), + testcore.WithWorkerService("scheduler operations"), testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), ) @@ -367,7 +370,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV1ToV2() { TaskQueue: &taskqueuepb.TaskQueue{Name: primitives.PerNSWorkerTaskQueue}, Input: inputPayloads, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), WorkflowIdReusePolicy: enumspb.WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE, WorkflowIdConflictPolicy: enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL, } @@ -401,7 +404,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV1ToV2() { ScheduleId: sid, Target: adminservice.MigrateScheduleRequest_SCHEDULER_TARGET_CHASM, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }) s.NoError(err) @@ -437,6 +440,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV1ToV2() { func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2ToV1() { env := testcore.NewEnv( s.T(), + testcore.WithWorkerService("scheduler operations"), testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerCreation, false), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerRouting, false), @@ -484,7 +488,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2ToV1() { ScheduleId: sid, Schedule: sched, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }, }, ) @@ -508,7 +512,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2ToV1() { ScheduleId: sid, Target: adminservice.MigrateScheduleRequest_SCHEDULER_TARGET_WORKFLOW, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }) s.NoError(err) @@ -608,6 +612,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2ToV1() { func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2ToV1Idempotent() { env := testcore.NewEnv( s.T(), + testcore.WithWorkerService("scheduler operations"), testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerCreation, false), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerRouting, false), @@ -648,7 +653,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2ToV1Idempotent() { ScheduleId: sid, Schedule: sched, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }, }, ) @@ -660,7 +665,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2ToV1Idempotent() { ScheduleId: sid, Target: adminservice.MigrateScheduleRequest_SCHEDULER_TARGET_WORKFLOW, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }) s.NoError(err) @@ -670,7 +675,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2ToV1Idempotent() { ScheduleId: sid, Target: adminservice.MigrateScheduleRequest_SCHEDULER_TARGET_WORKFLOW, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }) s.NoError(err) } @@ -678,6 +683,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2ToV1Idempotent() { func (s *ScheduleMigrationTestSuite) TestCHASMScheduleDescribeAfterDisablingCreationAndMigration() { env := testcore.NewEnv( s.T(), + testcore.WithWorkerService("scheduler operations"), testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerCreation, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerMigration, true), @@ -779,6 +785,7 @@ func (s *ScheduleMigrationTestSuite) TestCHASMScheduleDescribeAfterDisablingCrea func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2ToV1RoutingFallback() { env := testcore.NewEnv( s.T(), + testcore.WithWorkerService("scheduler operations"), testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerCreation, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerRouting, true), @@ -819,7 +826,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2ToV1RoutingFallback( ScheduleId: sid, Schedule: sched, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }, }, ) @@ -831,7 +838,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2ToV1RoutingFallback( ScheduleId: sid, Target: adminservice.MigrateScheduleRequest_SCHEDULER_TARGET_WORKFLOW, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }) s.NoError(err) @@ -914,6 +921,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV2ToV1RoutingFallback( func (s *ScheduleMigrationTestSuite) TestScheduleUpdateAfterDelete() { env := testcore.NewEnv( s.T(), + testcore.WithWorkerService("scheduler operations"), testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerCreation, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerRouting, true), @@ -955,7 +963,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleUpdateAfterDelete() { ScheduleId: sid, Schedule: schedule, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }, }, ) @@ -1024,6 +1032,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleUpdateAfterDelete() { func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV1ToV2WithClosedV2() { env := testcore.NewEnv( s.T(), + testcore.WithWorkerService("scheduler operations"), testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), ) @@ -1062,7 +1071,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV1ToV2WithClosedV2() { ScheduleId: sid, Schedule: sched, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }, }, ) @@ -1101,7 +1110,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV1ToV2WithClosedV2() { TaskQueue: &taskqueuepb.TaskQueue{Name: primitives.PerNSWorkerTaskQueue}, Input: inputPayloads, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), WorkflowIdReusePolicy: enumspb.WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE, WorkflowIdConflictPolicy: enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL, } @@ -1137,7 +1146,7 @@ func (s *ScheduleMigrationTestSuite) TestScheduleMigrationV1ToV2WithClosedV2() { ScheduleId: sid, Target: adminservice.MigrateScheduleRequest_SCHEDULER_TARGET_CHASM, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }) s.NoError(err) @@ -1179,6 +1188,7 @@ func TestScheduleMigrationV1ToV2NoDuplicateRecentActions(t *testing.T) { // a CHASM sentinel (which would block the migration activity). env := testcore.NewEnv( t, + testcore.WithWorkerService("V1 scheduler"), testcore.WithSdkWorker(), ) @@ -1259,7 +1269,7 @@ func TestScheduleMigrationV1ToV2NoDuplicateRecentActions(t *testing.T) { ScheduleId: sid, Target: adminservice.MigrateScheduleRequest_SCHEDULER_TARGET_CHASM, Identity: "test", - RequestId: testcore.RandomizeStr("request-id"), + RequestId: uuid.NewString(), }) require.NoError(t, err) @@ -1325,6 +1335,7 @@ func TestScheduleMigrationV1ToV2NoDuplicateRecentActions(t *testing.T) { func (s *ScheduleMigrationTestSuite) TestDeleteScheduleContextMetadata() { env := testcore.NewEnv( s.T(), + testcore.WithWorkerService("scheduler operations"), testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerRouting, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerSentinels, true), @@ -1532,6 +1543,7 @@ func (s *ScheduleMigrationTestSuite) TestDeleteScheduleContextMetadata() { func (s *ScheduleMigrationTestSuite) TestPatchScheduleContextMetadata() { env := testcore.NewEnv( s.T(), + testcore.WithWorkerService("scheduler operations"), testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerRouting, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerSentinels, true), @@ -1698,6 +1710,7 @@ func (s *ScheduleMigrationTestSuite) TestPatchScheduleContextMetadata() { func TestScheduleMigration_StaleRunningDoesNotSkipPending(t *testing.T) { env := testcore.NewEnv( t, + testcore.WithWorkerService("scheduler operations"), testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), ) diff --git a/tests/schedule_test.go b/tests/schedule_test.go index b124e72810d..2222cca10fc 100644 --- a/tests/schedule_test.go +++ b/tests/schedule_test.go @@ -58,13 +58,17 @@ var ( } ) -func scheduleCommonOpts() []testcore.TestOption { - return []testcore.TestOption{ - testcore.WithSdkWorker(), +func scheduleCommonOpts(t *testing.T) []testcore.TestOption { + opts := []testcore.TestOption{ testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerSentinels, true), testcore.WithDynamicConfig(dynamicconfig.FrontendAllowedExperiments, []string{"*"}), } + if strings.HasPrefix(t.Name(), "TestScheduleV1") { + // only v1 needs the worker service + opts = append(opts, testcore.WithWorkerService("V1 scheduler")) + } + return opts } func TestScheduleCHASM(t *testing.T) { @@ -128,7 +132,7 @@ func runSharedScheduleTests(t *testing.T, newContext contextFactory) { // BUFFER_ONE keeps exactly one start in the buffer while the first workflow // is still running. func testBufferSizeReportedWhenBuffered(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := testcore.RandomizeStr("sched-buffer-size") wid := testcore.RandomizeStr("sched-buffer-size-wf") @@ -189,7 +193,7 @@ func testBufferSizeReportedWhenBuffered(t *testing.T, newContext contextFactory) } func testDeletedScheduleOperations(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-deleted-ops" wid := "sched-test-deleted-ops-wf" @@ -245,7 +249,7 @@ func testDeletedScheduleOperations(t *testing.T, newContext contextFactory) { } func testBasics(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-basics" wid := "sched-test-basics-wf" @@ -675,7 +679,7 @@ func testBasics(t *testing.T, newContext contextFactory) { } func testInput(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-input" wid := "sched-test-input-wf" @@ -739,7 +743,7 @@ func testInput(t *testing.T, newContext contextFactory) { } func testLastCompletionAndError(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-last" wid := "sched-test-last-wf" @@ -814,7 +818,7 @@ func testLastCompletionAndError(t *testing.T, newContext contextFactory) { } func testListSchedulesReturnsWorkflowStatus(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-list-running" wid := "sched-test-list-running-wf" @@ -909,7 +913,7 @@ func testListSchedulesReturnsWorkflowStatus(t *testing.T, newContext contextFact } func testUpdateIntervalTakesEffect(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-update-interval" wid := "sched-test-update-interval-wf" @@ -974,7 +978,7 @@ func testUpdateIntervalTakesEffect(t *testing.T, newContext contextFactory) { } func testListScheduleMatchingTimes(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-list-matching-times" @@ -1023,7 +1027,7 @@ func testListScheduleMatchingTimes(t *testing.T, newContext contextFactory) { } func testLimitMemoSpecSize(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) expectedLimit := scheduler.CurrentTweakablePolicies.SpecFieldLengthLimit @@ -1094,7 +1098,7 @@ func testLimitMemoSpecSize(t *testing.T, newContext contextFactory) { } func testCountSchedules(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) // Create multiple schedules with different paused states sidPrefix := "sched-test-count-" @@ -1164,7 +1168,7 @@ func testCountSchedules(t *testing.T, newContext contextFactory) { } func testListSchedulesPagination(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) const numSchedules = 4 sidPrefix := "sched-test-pagination-" @@ -1235,7 +1239,7 @@ func testListSchedulesPagination(t *testing.T, newContext contextFactory) { } func testListSchedulesFilterAndEntryFields(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-list-fields" wt := "sched-test-list-fields-wt" @@ -1328,7 +1332,7 @@ func testListSchedulesFilterAndEntryFields(t *testing.T, newContext contextFacto } func testListSchedulesFilterByScheduleID(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid1 := "sched-filter-by-id-alpha" sid2 := "sched-filter-by-id-beta" @@ -1459,7 +1463,7 @@ func testListSchedulesFilterByScheduleID(t *testing.T, newContext contextFactory } func testScheduleInternalTaskQueue(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) errorMessageKeyword := "internal per-namespace task queue" // Test CreateSchedule with internal task queue @@ -1551,7 +1555,7 @@ func testScheduleInternalTaskQueue(t *testing.T, newContext contextFactory) { } func testScheduledWorkflowDoubleReset(t *testing.T, newContext contextFactory, enableCHASMCallbacks bool) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) s.OverrideDynamicConfig(dynamicconfig.EnableCHASMCallbacks, enableCHASMCallbacks) sid := "sched-test-double-reset" @@ -1708,7 +1712,7 @@ func testScheduledWorkflowDoubleReset(t *testing.T, newContext contextFactory, e } func testResetWithAdditionalCallback(t *testing.T, newContext contextFactory, enableCHASMCallbacks bool) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) s.OverrideDynamicConfig(dynamicconfig.EnableCHASMCallbacks, enableCHASMCallbacks) s.OverrideDynamicConfig( callback.AllowedAddresses, @@ -1891,7 +1895,7 @@ func testResetWithAdditionalCallback(t *testing.T, newContext contextFactory, en // testCreatesWorkflowSentinel tests that creating a CHASM schedule also starts a // dummy workflow to reserve the schedule ID in the V1 workflow ID-space. func testCreatesWorkflowSentinel(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := testcore.RandomizeStr("sid") wid := testcore.RandomizeStr("wid") @@ -1956,7 +1960,7 @@ func testCreatesWorkflowSentinel(t *testing.T, newContext contextFactory) { // testCreatesCHASMSentinel tests that creating a V1 schedule also creates a // CHASM sentinel to reserve the schedule ID in the CHASM execution space. func testCreatesCHASMSentinel(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := testcore.RandomizeStr("sid") wid := testcore.RandomizeStr("wid") @@ -2041,7 +2045,7 @@ func testCreatesCHASMSentinel(t *testing.T, newContext contextFactory) { // testSkipsWorkflowSentinelWhenDisabled asserts that a CHASM CreateSchedule // does not start the dummy V1 workflow when EnableCHASMSchedulerSentinels is off. func testSkipsWorkflowSentinelWhenDisabled(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, append(scheduleCommonOpts(), + s := testcore.NewEnv(t, append(scheduleCommonOpts(t), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerSentinels, false), )...) @@ -2090,7 +2094,7 @@ func testSkipsWorkflowSentinelWhenDisabled(t *testing.T, newContext contextFacto // testSkipsCHASMSentinelWhenDisabled asserts that a V1 CreateSchedule does not // create a CHASM sentinel when EnableCHASMSchedulerSentinels is off. func testSkipsCHASMSentinelWhenDisabled(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, append(scheduleCommonOpts(), + s := testcore.NewEnv(t, append(scheduleCommonOpts(t), testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerSentinels, false), )...) @@ -2144,7 +2148,7 @@ func testSkipsCHASMSentinelWhenDisabled(t *testing.T, newContext contextFactory) } func testCreateScheduleAlreadyExists(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-already-exists" @@ -2191,7 +2195,7 @@ func testCreateScheduleAlreadyExists(t *testing.T, newContext contextFactory) { // temporal.ErrScheduleAlreadyRunning. This tests the SDK's behavior E2E against // the handler. A similar test exists in the features repository. func testCreateScheduleDuplicateSdkError(t *testing.T, useCHASM bool) { - opts := scheduleCommonOpts() + opts := scheduleCommonOpts(t) if useCHASM { opts = append(opts, testcore.WithDynamicConfig(dynamicconfig.EnableCHASMSchedulerCreation, true)) } @@ -2219,7 +2223,7 @@ func testCreateScheduleDuplicateSdkError(t *testing.T, useCHASM bool) { } func testPatchRejectsExcessBackfillers(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-too-many-backfillers" wt := "sched-test-too-many-backfillers-wt" @@ -2297,7 +2301,7 @@ func testPatchRejectsExcessBackfillers(t *testing.T, newContext contextFactory) } func testMigrationCallbackAttach(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := testcore.RandomizeStr("sid") wid := testcore.RandomizeStr("wid") @@ -2427,7 +2431,7 @@ func testMigrationCallbackAttach(t *testing.T, newContext contextFactory) { // testCHASMCanListV1Schedules tests that a schedule created in the V1 stack // will also be visible in the V2 stack. func testCHASMCanListV1Schedules(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "schedule-created-on-v1" schedule := &schedulepb.Schedule{ @@ -2498,7 +2502,7 @@ func testCHASMCanListV1Schedules(t *testing.T, newContext contextFactory) { // testRefresh applies to V1 scheduler only; V2 does not support/need manual refresh. func testRefresh(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-refresh" wid := "sched-test-refresh-wf" @@ -2604,7 +2608,7 @@ func testRefresh(t *testing.T, newContext contextFactory) { // testListBeforeRun only applies to V1, as V2 scheduler does not involve the // per-NS worker or workflow. func testListBeforeRun(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, append(scheduleCommonOpts(), + s := testcore.NewEnv(t, append(scheduleCommonOpts(t), testcore.WithDynamicConfig(dynamicconfig.WorkerPerNamespaceWorkerCount, 0), )...) @@ -2652,7 +2656,7 @@ func testListBeforeRun(t *testing.T, newContext contextFactory) { // testRateLimit applies only to V1, as V2 scheduler does not impose its own rate limiting. func testRateLimit(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, append(scheduleCommonOpts(), + s := testcore.NewEnv(t, append(scheduleCommonOpts(t), testcore.WithDynamicConfig(dynamicconfig.SchedulerNamespaceStartWorkflowRPS, 1.0), )...) @@ -2707,7 +2711,7 @@ func testRateLimit(t *testing.T, newContext contextFactory) { // testNextTimeCache only applies to V1. func testNextTimeCache(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-next-time-cache" wid := "sched-test-next-time-cache-wf" @@ -2864,7 +2868,7 @@ func assertRecentActionsNoDuplicateRunIDs(t *testing.T, actions []*schedulepb.Sc } } func testUpdateScheduleMemo(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-update-memo" wid := "sched-test-update-memo-wf" @@ -2988,7 +2992,7 @@ func testUpdateScheduleMemo(t *testing.T, newContext contextFactory) { } func testUpdateScheduleMemoRejected(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-update-memo-rejected" wid := "sched-test-update-memo-rejected-wf" @@ -3052,7 +3056,7 @@ func testUpdateScheduleMemoOnly(t *testing.T, newContext contextFactory) { // the schedule when the field is nil, similar to how memo and search_attributes are handled. t.Skip("memo-only updates not yet supported: omitting the schedule field unsets the schedule") - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-update-memo-only" wid := "sched-test-update-memo-only-wf" @@ -3121,7 +3125,7 @@ func testUpdateScheduleMemoOnly(t *testing.T, newContext contextFactory) { } func testCHASMUnpauseResumesProcessing(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-unpause-resumes" wid := "sched-test-unpause-resumes-wf" @@ -3210,7 +3214,7 @@ func testCHASMUnpauseResumesProcessing(t *testing.T, newContext contextFactory) ) } func testUpdateScheduleRequestIDTooLong(t *testing.T, newContext contextFactory) { - s := testcore.NewEnv(t, scheduleCommonOpts()...) + s := testcore.NewEnv(t, scheduleCommonOpts(t)...) sid := "sched-test-update-reqid-too-long" wid := "sched-test-update-reqid-too-long-wf" @@ -3263,7 +3267,7 @@ func testUpdateScheduleRequestIDTooLong(t *testing.T, newContext contextFactory) func testUpdateScheduleBlobSizeLimit(t *testing.T, newContext contextFactory) { s := testcore.NewEnv(t, - append(scheduleCommonOpts(), + append(scheduleCommonOpts(t), testcore.WithDynamicConfig(dynamicconfig.BlobSizeLimitError, 1000), testcore.WithDynamicConfig(dynamicconfig.BlobSizeLimitWarn, 500), )..., diff --git a/tests/task_queue_stats_test.go b/tests/task_queue_stats_test.go index 4c01a90b408..adb502e7880 100644 --- a/tests/task_queue_stats_test.go +++ b/tests/task_queue_stats_test.go @@ -737,6 +737,7 @@ func newTaskQueueStatsContext( extraOpts ...testcore.TestOption, ) *taskQueueStatsContext { opts := []testcore.TestOption{ + testcore.WithWorkerService("worker-deployment versioning"), testcore.WithDynamicConfig(dynamicconfig.EnableDeploymentVersions, true), testcore.WithDynamicConfig(dynamicconfig.FrontendEnableWorkerVersioningWorkflowAPIs, true), testcore.WithDynamicConfig(dynamicconfig.MatchingUseNewMatcher, usePriMatcher), diff --git a/tests/testcore/functional_test_base.go b/tests/testcore/functional_test_base.go index cde853344f6..0bd41249375 100644 --- a/tests/testcore/functional_test_base.go +++ b/tests/testcore/functional_test_base.go @@ -99,6 +99,7 @@ type ( DynamicConfigOverrides map[dynamicconfig.Key]any ArchivalEnabled bool EnableMTLS bool + EnableWorkerService bool FaultInjectionConfig *config.FaultInjection NumHistoryShards int32 SharedCluster bool @@ -160,6 +161,12 @@ func WithMTLS() TestClusterOption { } } +func withWorkerService(enabled bool) TestClusterOption { + return func(params *TestClusterParams) { + params.EnableWorkerService = enabled + } +} + func WithFaultInjectionConfig(cfg *config.FaultInjection) TestClusterOption { return func(params *TestClusterParams) { params.FaultInjectionConfig = cfg @@ -299,6 +306,7 @@ func (s *FunctionalTestBase) setupCluster(options ...TestClusterOption) { EnableMTLS: params.EnableMTLS, CustomHistoryArchiverFactory: params.CustomHistoryArchiverFactory, CustomVisibilityArchiverFactory: params.CustomVisibilityArchiverFactory, + WorkerConfig: WorkerConfig{DisableWorker: !params.EnableWorkerService}, } // Apply configuration for shared clusters. @@ -376,7 +384,8 @@ func (s *FunctionalTestBase) checkTestShard() { func ApplyTestClusterOptions(options []TestClusterOption) TestClusterParams { params := TestClusterParams{ - ServiceOptions: make(map[primitives.ServiceName][]fx.Option), + ServiceOptions: make(map[primitives.ServiceName][]fx.Option), + EnableWorkerService: true, } for _, opt := range options { opt(¶ms) diff --git a/tests/testcore/test_cluster_pool.go b/tests/testcore/test_cluster_pool.go index 89dc6ae0f37..623e5556b22 100644 --- a/tests/testcore/test_cluster_pool.go +++ b/tests/testcore/test_cluster_pool.go @@ -175,7 +175,8 @@ func (p *clusterPool) createCluster(t *testing.T, dynamicConfig map[dynamicconfi tbase := &FunctionalTestBase{} tbase.SetT(t) - var opts []TestClusterOption + // Keep the worker service off unless explicitly enabled via WithWorkerService. + opts := []TestClusterOption{withWorkerService(false)} if shared { opts = append(opts, WithSharedCluster()) } diff --git a/tests/testcore/test_env.go b/tests/testcore/test_env.go index 90104fe0b5a..681340dbae5 100644 --- a/tests/testcore/test_env.go +++ b/tests/testcore/test_env.go @@ -124,6 +124,16 @@ func WithFxOptions(serviceName primitives.ServiceName, opts ...fx.Option) TestOp } } +// WithWorkerService enables the system worker service. The service is off by +// default to avoid the worker overhead. This implies a dedicated cluster. +func WithWorkerService(reason string) TestOption { + return func(o *testOptions) { + o.dedicatedCluster = true + o.clusterOptions = append(o.clusterOptions, withWorkerService(true)) + o.dedicatedReason = "worker service required: " + reason + } +} + // WithDynamicConfig overrides a dynamic config setting for the test. // For settings that can be namespace-scoped, a namespace constraint is applied. // For all others that require a dedicated cluster, this implies `WithDedicatedCluster`. From ba96a44854991f780ae430b09c87db784bcaf3e0 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Tue, 19 May 2026 18:13:56 -0700 Subject: [PATCH 64/73] Drop retry for Nexus endpoint not found (#10295) ## What changed? Removed retries in test when Nexus endpoint wasn't found. Follow-up to https://github.com/temporalio/temporal/pull/10208 ## Why? Nexus lookups are now strongly consistent. --- tests/nexus_api_test.go | 36 +++++++----------------------- tests/nexus_api_validation_test.go | 23 +++++-------------- 2 files changed, 13 insertions(+), 46 deletions(-) diff --git a/tests/nexus_api_test.go b/tests/nexus_api_test.go index 7217e3c715d..08f9486bf6f 100644 --- a/tests/nexus_api_test.go +++ b/tests/nexus_api_test.go @@ -289,26 +289,17 @@ func (s *NexusApiTestSuite) TestNexusStartOperation_Outcomes(useTemporalFailures pollerErrCh := env.nexusTaskPoller(env.Context(), s.T(), endpoint.Spec.Target.GetWorker().TaskQueue, tc.handler) - eventuallyTick := 500 * time.Millisecond header := nexus.Header{"key": "value", "temporal-nexus-failure-support": "true"} if tc.timeout > 0 { - eventuallyTick = tc.timeout + (100 * time.Millisecond) header[nexus.HeaderRequestTimeout] = tc.timeout.String() } - var result *nexusrpc.ClientStartOperationResponse[string] - - // Wait until the endpoint is loaded into the registry. - s.Eventually(func() bool { - result, err = nexusrpc.StartOperation(env.Context(), client, op, "input", nexus.StartOperationOptions{ - CallbackURL: "http://localhost/callback", - RequestID: "request-id", - Header: header, - Links: []nexus.Link{callerNexusLink}, - }) - var handlerErr *nexus.HandlerError - return err == nil || !(errors.As(err, &handlerErr) && handlerErr.Type == nexus.HandlerErrorTypeNotFound) - }, 10*time.Second, eventuallyTick) + result, err := nexusrpc.StartOperation(env.Context(), client, op, "input", nexus.StartOperationOptions{ + CallbackURL: "http://localhost/callback", + RequestID: "request-id", + Header: header, + Links: []nexus.Link{callerNexusLink}, + }) tc.assertion(s, result, err, headerCapture.lastHeaders) s.NoError(<-pollerErrCh) @@ -562,19 +553,12 @@ func (s *NexusApiTestSuite) TestNexusCancelOperation_Outcomes(useTemporalFailure handle, err := client.NewOperationHandle("operation", "token") s.NoError(err) - eventuallyTick := 500 * time.Millisecond header := nexus.Header{"key": "value"} if tc.timeout > 0 { - eventuallyTick = tc.timeout + (100 * time.Millisecond) header[nexus.HeaderRequestTimeout] = tc.timeout.String() } - // Wait until the endpoint is loaded into the registry. - s.Eventually(func() bool { - err = handle.Cancel(env.Context(), nexus.CancelOperationOptions{Header: header}) - var handlerErr *nexus.HandlerError - return err == nil || !(errors.As(err, &handlerErr) && handlerErr.Type == nexus.HandlerErrorTypeNotFound) - }, 10*time.Second, eventuallyTick) + err = handle.Cancel(env.Context(), nexus.CancelOperationOptions{Header: header}) tc.assertion(s, err, headerCapture.lastHeaders) s.NoError(<-pollerErrCh) @@ -708,11 +692,7 @@ func (s *NexusApiTestSuite) TestNexusClientNameMetricPropagation(useTemporalFail }) s.NoError(err) - s.Eventually(func() bool { - _, err = nexusrpc.StartOperation(env.Context(), client, op, "input", nexus.StartOperationOptions{}) - var handlerErr *nexus.HandlerError - return err == nil || (!errors.As(err, &handlerErr) || handlerErr.Type != nexus.HandlerErrorTypeNotFound) - }, 10*time.Second, 500*time.Millisecond) + _, err = nexusrpc.StartOperation(env.Context(), client, op, "input", nexus.StartOperationOptions{}) s.NoError(err) s.NoError(<-pollerErrCh) diff --git a/tests/nexus_api_validation_test.go b/tests/nexus_api_validation_test.go index 42d337ed8a1..bf94d29fc4e 100644 --- a/tests/nexus_api_validation_test.go +++ b/tests/nexus_api_validation_test.go @@ -6,7 +6,6 @@ import ( "fmt" "strings" "testing" - "time" "github.com/google/uuid" "github.com/nexus-rpc/sdk-go/nexus" @@ -198,12 +197,7 @@ func (s *NexusAPIValidationTestSuite) TestNexusStartOperation_Forbidden() { capture := env.StartNamespaceMetricCapture() - // Wait until the endpoint is loaded into the registry. - s.Eventually(func() bool { - _, err = nexusrpc.StartOperation(env.Context(), client, op, "input", nexus.StartOperationOptions{}) - var handlerErr *nexus.HandlerError - return err == nil || (!errors.As(err, &handlerErr) || handlerErr.Type != nexus.HandlerErrorTypeNotFound) - }, 10*time.Second, 1*time.Second) + _, err = nexusrpc.StartOperation(env.Context(), client, op, "input", nexus.StartOperationOptions{}) var handlerErr *nexus.HandlerError s.ErrorAs(err, &handlerErr) @@ -242,17 +236,10 @@ func (s *NexusAPIValidationTestSuite) TestNexusStartOperation_PayloadSizeLimit() client, err := nexusrpc.NewHTTPClient(nexusrpc.HTTPClientOptions{BaseURL: dispatchURL, Service: "test-service"}) s.NoError(err) - var result *nexusrpc.ClientStartOperationResponse[string] - - // Wait until the endpoint is loaded into the registry. - s.Eventually(func() bool { - result, err = nexusrpc.StartOperation(env.Context(), client, op, input, nexus.StartOperationOptions{ - CallbackURL: "http://localhost/callback", - RequestID: "request-id", - }) - var handlerErr *nexus.HandlerError - return err == nil || (!errors.As(err, &handlerErr) || handlerErr.Type != nexus.HandlerErrorTypeNotFound) - }, 10*time.Second, 500*time.Millisecond) + result, err := nexusrpc.StartOperation(env.Context(), client, op, input, nexus.StartOperationOptions{ + CallbackURL: "http://localhost/callback", + RequestID: "request-id", + }) s.Nil(result) var handlerErr *nexus.HandlerError From 98e1bfc69f89a042cd8b10d56505a792eb86a495 Mon Sep 17 00:00:00 2001 From: Yu Xia Date: Tue, 19 May 2026 19:28:24 -0700 Subject: [PATCH 65/73] handle chasm not found cleanup (#10182) ## What changed? Handle non workflow chasm clean up logic in replication ## Why? Support clean up logic for both workflow and chasm ## How did you test it? - [ ] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) --- .../executable_sync_versioned_transition_task.go | 15 ++++----------- service/history/replication/executable_task.go | 5 +++++ .../history/replication/executable_task_mock.go | 14 ++++++++++++++ ...xecutable_verify_versioned_transition_task.go | 16 ++++------------ 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/service/history/replication/executable_sync_versioned_transition_task.go b/service/history/replication/executable_sync_versioned_transition_task.go index ff2b9688b98..c7f40cae41a 100644 --- a/service/history/replication/executable_sync_versioned_transition_task.go +++ b/service/history/replication/executable_sync_versioned_transition_task.go @@ -228,17 +228,10 @@ func (e *ExecutableSyncVersionedTransitionTask) HandleErr(err error) error { tag.WorkflowID(e.WorkflowID), tag.WorkflowRunID(e.RunID), ) - // workflow is not found in source cluster, cleanup workflow in target cluster - ctx, cancel := newTaskContext(e.NamespaceName(), e.Config.ReplicationTaskApplyTimeout(), callerInfo) - defer cancel() - return e.DeleteWorkflow( - ctx, - definition.NewWorkflowKey( - e.NamespaceID, - e.WorkflowID, - e.RunID, - ), - ) + // Workflow is not found in source cluster, cleanup workflow in target cluster. + // This handles workflow deletion from source cluster and this is optional as deletion operation will replicate to target clusters. + deletionTask := NewExecutableDeleteExecutionTask(e.ProcessToolBox, e.TaskID(), e.TaskCreationTime(), e.SourceClusterName(), e.SourceShardKey(), e.ReplicationTask()) + return deletionTask.Execute() default: return err } diff --git a/service/history/replication/executable_task.go b/service/history/replication/executable_task.go index 726ac615daa..48a66b18011 100644 --- a/service/history/replication/executable_task.go +++ b/service/history/replication/executable_task.go @@ -56,6 +56,7 @@ type ( TaskID() int64 TaskCreationTime() time.Time SourceClusterName() string + SourceShardKey() ClusterShardKey Ack() Nack(err error) Abort() @@ -164,6 +165,10 @@ func (e *ExecutableTaskImpl) SourceClusterName() string { return e.sourceClusterName } +func (e *ExecutableTaskImpl) SourceShardKey() ClusterShardKey { + return e.sourceShardKey +} + func (e *ExecutableTaskImpl) ReplicationTask() *replicationspb.ReplicationTask { return e.replicationTask } diff --git a/service/history/replication/executable_task_mock.go b/service/history/replication/executable_task_mock.go index e3cfc75ca67..d01f27038b9 100644 --- a/service/history/replication/executable_task_mock.go +++ b/service/history/replication/executable_task_mock.go @@ -302,6 +302,20 @@ func (mr *MockExecutableTaskMockRecorder) SourceClusterName() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SourceClusterName", reflect.TypeOf((*MockExecutableTask)(nil).SourceClusterName)) } +// SourceShardKey mocks base method. +func (m *MockExecutableTask) SourceShardKey() ClusterShardKey { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SourceShardKey") + ret0, _ := ret[0].(ClusterShardKey) + return ret0 +} + +// SourceShardKey indicates an expected call of SourceShardKey. +func (mr *MockExecutableTaskMockRecorder) SourceShardKey() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SourceShardKey", reflect.TypeOf((*MockExecutableTask)(nil).SourceShardKey)) +} + // State mocks base method. func (m *MockExecutableTask) State() tasks.State { m.ctrl.T.Helper() diff --git a/service/history/replication/executable_verify_versioned_transition_task.go b/service/history/replication/executable_verify_versioned_transition_task.go index 15628c7d1a3..1290f517d85 100644 --- a/service/history/replication/executable_verify_versioned_transition_task.go +++ b/service/history/replication/executable_verify_versioned_transition_task.go @@ -307,18 +307,10 @@ func (e *ExecutableVerifyVersionedTransitionTask) HandleErr(err error) error { tag.WorkflowID(e.WorkflowID), tag.WorkflowRunID(e.RunID), ) - callerInfo := getReplicaitonCallerInfo(e.GetPriority()) - // workflow is not found in source cluster, cleanup workflow in target cluster - ctx, cancel := newTaskContext(e.NamespaceName(), e.Config.ReplicationTaskApplyTimeout(), callerInfo) - defer cancel() - return e.DeleteWorkflow( - ctx, - definition.NewWorkflowKey( - e.NamespaceID, - e.WorkflowID, - e.RunID, - ), - ) + // workflow is not found in source cluster, cleanup workflow in target cluster. + // This handles workflow deletion from source cluster and this is optional as deletion operation will replicate to target clusters. + deletionTask := NewExecutableDeleteExecutionTask(e.ProcessToolBox, e.TaskID(), e.TaskCreationTime(), e.SourceClusterName(), e.SourceShardKey(), e.ReplicationTask()) + return deletionTask.Execute() default: return err } From ffd1f24a800c350a0806c0a77af4e32c8fd7e253 Mon Sep 17 00:00:00 2001 From: Prathyush PV Date: Wed, 20 May 2026 06:58:08 -0700 Subject: [PATCH 66/73] Clamp ClockedRateLimiter burst to a minimum of 1 (#10327) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What changed? `ClockedRateLimiter.SetBurstAt` now clamps `newBurst` to a minimum of 1 when the limiter's rate is positive. `rate=0, burst=0` is still accepted so callers can fully pause the limiter. ## Why? A zero or negative burst on an actively rate-limiting limiter stalls it — `Allow`/`Wait` can never succeed and waiters block indefinitely. The conditional clamp prevents that while preserving the existing pause semantic (rate=0, burst=0) used by the matching task queue rate limiter. ## How did you test it? - [x] built - [x] covered by existing tests --- common/quotas/clocked_rate_limiter.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/common/quotas/clocked_rate_limiter.go b/common/quotas/clocked_rate_limiter.go index e96076ab546..a1041d85d8c 100644 --- a/common/quotas/clocked_rate_limiter.go +++ b/common/quotas/clocked_rate_limiter.go @@ -146,6 +146,10 @@ func (l ClockedRateLimiter) SetLimitAt(t time.Time, newLimit rate.Limit) { } func (l ClockedRateLimiter) SetBurstAt(t time.Time, newBurst int) { + // Clamp burst to >=1 when rate is positive; burst=0 with rate=0 is allowed for pause. + if newBurst < 1 && l.rateLimiter.Limit() > 0 { + newBurst = 1 + } l.rateLimiter.SetBurstAt(t, newBurst) } From 021dba1f3b8c6ef808bc39bd6f54d93ed7d4aefd Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Wed, 20 May 2026 10:36:22 -0600 Subject: [PATCH 67/73] tests: migrate callbacks_test.go to TestEnv (#10330) ## What changed? WISOTT ## Why? Part of our migration process to testcore.TestEnv for reliability and speed purposes. ## How did you test it? - [ ] built - [ ] run locally and tested manually - [X] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) ## Potential risks None, test file changes only --- tests/callbacks_test.go | 212 ++++++++++++++++------------------------ 1 file changed, 85 insertions(+), 127 deletions(-) diff --git a/tests/callbacks_test.go b/tests/callbacks_test.go index 89b13bb5797..d3f4d87a6ab 100644 --- a/tests/callbacks_test.go +++ b/tests/callbacks_test.go @@ -9,24 +9,21 @@ import ( "github.com/google/uuid" "github.com/nexus-rpc/sdk-go/nexus" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" "go.temporal.io/api/serviceerror" taskqueuepb "go.temporal.io/api/taskqueue/v1" workflowpb "go.temporal.io/api/workflow/v1" "go.temporal.io/api/workflowservice/v1" - "go.temporal.io/sdk/client" - "go.temporal.io/sdk/worker" "go.temporal.io/sdk/workflow" "go.temporal.io/server/chasm/lib/callback" "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/nexus/nexusrpc" + "go.temporal.io/server/common/testing/await" + "go.temporal.io/server/common/testing/parallelsuite" "go.temporal.io/server/common/testing/protoassert" "go.temporal.io/server/common/testing/protorequire" - "go.temporal.io/server/common/testing/testvars" "go.temporal.io/server/tests/testcore" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/durationpb" @@ -43,28 +40,18 @@ func (h *completionHandler) CompleteOperation(ctx context.Context, request *nexu } type CallbacksSuite struct { - testcore.FunctionalTestBase - - chasmEnabled bool + parallelsuite.Suite[*CallbacksSuite] } func TestCallbacksSuiteHSM(t *testing.T) { - t.Parallel() - suite.Run(t, new(CallbacksSuite)) + parallelsuite.Run(t, &CallbacksSuite{}, []testcore.TestOption{}) } func TestCallbacksSuiteCHASM(t *testing.T) { - t.Parallel() - suite.Run(t, &CallbacksSuite{chasmEnabled: true}) -} - -func (s *CallbacksSuite) SetupSuite() { - s.SetupSuiteWithCluster( - testcore.WithDynamicConfigOverrides(map[dynamicconfig.Key]any{ - dynamicconfig.EnableChasm.Key(): s.chasmEnabled, - dynamicconfig.EnableCHASMCallbacks.Key(): s.chasmEnabled, - }), - ) + parallelsuite.Run(t, &CallbacksSuite{}, []testcore.TestOption{ + testcore.WithDynamicConfig(dynamicconfig.EnableChasm, true), + testcore.WithDynamicConfig(dynamicconfig.EnableCHASMCallbacks, true), + }) } func (s *CallbacksSuite) runNexusCompletionHTTPServer(t *testing.T, h *completionHandler) string { @@ -76,9 +63,16 @@ func (s *CallbacksSuite) runNexusCompletionHTTPServer(t *testing.T, h *completio return srv.URL } -func (s *CallbacksSuite) TestWorkflowCallbacks_InvalidArgument() { - ctx := testcore.NewContext() - taskQueue := testcore.RandomizeStr(s.T().Name()) +func (s *CallbacksSuite) newTestEnv(opts ...testcore.TestOption) *testcore.TestEnv { + env := testcore.NewEnv(s.T(), opts...) + env.OverrideDynamicConfig( + callback.AllowedAddresses, + []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, + ) + return env +} + +func (s *CallbacksSuite) TestWorkflowCallbacks_InvalidArgument(opts []testcore.TestOption) { workflowType := "test" cases := []struct { @@ -120,17 +114,20 @@ func (s *CallbacksSuite) TestWorkflowCallbacks_InvalidArgument() { }, } - s.OverrideDynamicConfig(dynamicconfig.FrontendCallbackURLMaxLength, 50) - s.OverrideDynamicConfig(dynamicconfig.FrontendCallbackHeaderMaxSize, 6) - s.OverrideDynamicConfig(dynamicconfig.MaxCallbacksPerWorkflow, 2) - s.OverrideDynamicConfig(callback.MaxPerExecution, 2) - s.OverrideDynamicConfig( - callback.AllowedAddresses, - []any{map[string]any{"Pattern": "some-ignored-address", "AllowInsecure": true}, map[string]any{"Pattern": "some-secure-address", "AllowInsecure": false}}, - ) - for _, tc := range cases { - s.Run(tc.name, func() { + s.Run(tc.name, func(s *CallbacksSuite) { + env := testcore.NewEnv(s.T(), opts...) + env.OverrideDynamicConfig(dynamicconfig.FrontendCallbackURLMaxLength, 50) + env.OverrideDynamicConfig(dynamicconfig.FrontendCallbackHeaderMaxSize, 6) + env.OverrideDynamicConfig(dynamicconfig.MaxCallbacksPerWorkflow, 2) + env.OverrideDynamicConfig(callback.MaxPerExecution, 2) + env.OverrideDynamicConfig( + callback.AllowedAddresses, + []any{map[string]any{"Pattern": "some-ignored-address", "AllowInsecure": true}, map[string]any{"Pattern": "some-secure-address", "AllowInsecure": false}}, + ) + + taskQueue := testcore.RandomizeStr(s.T().Name()) + cbs := make([]*commonpb.Callback, 0, len(tc.urls)) for _, url := range tc.urls { cbs = append(cbs, &commonpb.Callback{ @@ -144,7 +141,7 @@ func (s *CallbacksSuite) TestWorkflowCallbacks_InvalidArgument() { } request := &workflowservice.StartWorkflowExecutionRequest{ RequestId: uuid.NewString(), - Namespace: s.Namespace().String(), + Namespace: env.Namespace().String(), WorkflowId: testcore.RandomizeStr(s.T().Name()), WorkflowType: &commonpb.WorkflowType{Name: workflowType}, TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, @@ -154,7 +151,7 @@ func (s *CallbacksSuite) TestWorkflowCallbacks_InvalidArgument() { CompletionCallbacks: cbs, } - _, err := s.FrontendClient().StartWorkflowExecution(ctx, request) + _, err := env.FrontendClient().StartWorkflowExecution(s.Context(), request) var invalidArgument *serviceerror.InvalidArgument s.ErrorAs(err, &invalidArgument) s.Equal(tc.message, err.Error()) @@ -162,12 +159,7 @@ func (s *CallbacksSuite) TestWorkflowCallbacks_InvalidArgument() { } } -func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { - s.OverrideDynamicConfig( - callback.AllowedAddresses, - []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, - ) - +func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver(opts []testcore.TestOption) { cases := []struct { name string wf func(workflow.Context) (int, error) @@ -192,7 +184,6 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { workflow.GetSignalChannel(ctx, "continue").Receive(ctx, nil) return 0, workflow.Sleep(ctx, 10*time.Second) } - s.Greater(info.Attempt, int32(1)) return 666, nil }, runTimeout: 500 * time.Millisecond, @@ -205,7 +196,6 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { workflow.GetSignalChannel(ctx, "continue").Receive(ctx, nil) return 0, errors.New("intentional workflow failure") } - s.Greater(info.Attempt, int32(1)) return 666, nil }, runTimeout: 100 * time.Second, @@ -213,18 +203,14 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { } for _, tc := range cases { - s.Run(tc.name, func() { - tv := testvars.New(s.T()) - ctx := testcore.NewContext() - sdkClient, err := client.Dial(client.Options{ - HostPort: s.FrontendGRPCAddress(), - Namespace: s.Namespace().String(), - }) - s.NoError(err) + s.Run(tc.name, func(s *CallbacksSuite) { + env := s.newTestEnv(opts...) + + ctx := s.Context() + sdkClient := env.SdkClient() - taskQueue := testcore.RandomizeStr(s.T().Name()) workflowType := "test" - workflowID := tv.WorkflowID() + workflowID := env.Tv().WorkflowID() ch := &completionHandler{ requestCh: make(chan *nexusrpc.CompletionRequest, 2), @@ -236,16 +222,13 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { }() callbackAddress := s.runNexusCompletionHTTPServer(s.T(), ch) - w := worker.New(sdkClient, taskQueue, worker.Options{}) - w.RegisterWorkflowWithOptions(tc.wf, workflow.RegisterOptions{Name: workflowType}) - s.NoError(w.Start()) - defer w.Stop() + env.SdkWorker().RegisterWorkflowWithOptions(tc.wf, workflow.RegisterOptions{Name: workflowType}) links := []*commonpb.Link{ { Variant: &commonpb.Link_WorkflowEvent_{ WorkflowEvent: &commonpb.Link_WorkflowEvent{ - Namespace: s.Namespace().String(), + Namespace: env.Namespace().String(), WorkflowId: "some-caller-wfid-1", RunId: "some-caller-runid-1", }, @@ -254,7 +237,7 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { { Variant: &commonpb.Link_WorkflowEvent_{ WorkflowEvent: &commonpb.Link_WorkflowEvent{ - Namespace: s.Namespace().String(), + Namespace: env.Namespace().String(), WorkflowId: "some-caller-wfid-2", RunId: "some-caller-runid-2", }, @@ -283,10 +266,10 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { request := &workflowservice.StartWorkflowExecutionRequest{ RequestId: uuid.NewString(), - Namespace: s.Namespace().String(), + Namespace: env.Namespace().String(), WorkflowId: workflowID, WorkflowType: &commonpb.WorkflowType{Name: workflowType}, - TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + TaskQueue: &taskqueuepb.TaskQueue{Name: env.WorkerTaskQueue(), Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, Input: nil, WorkflowRunTimeout: durationpb.New(tc.runTimeout), Identity: s.T().Name(), @@ -299,7 +282,7 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { Links: []*commonpb.Link{links[0]}, } - response1, err := s.FrontendClient().StartWorkflowExecution(ctx, request) + response1, err := env.FrontendClient().StartWorkflowExecution(ctx, request) s.NoError(err) workflowExecution := &commonpb.WorkflowExecution{ @@ -318,15 +301,15 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { request2.CompletionCallbacks = []*commonpb.Callback{cbs[1]} request2.Links = []*commonpb.Link{links[1]} - response2, err := s.FrontendClient().StartWorkflowExecution(ctx, request2) + response2, err := env.FrontendClient().StartWorkflowExecution(ctx, request2) s.NoError(err) s.False(response2.Started) s.Equal(workflowExecution.RunId, response2.RunId) - _, err = s.FrontendClient().SignalWorkflowExecution( + _, err = env.FrontendClient().SignalWorkflowExecution( ctx, &workflowservice.SignalWorkflowExecutionRequest{ - Namespace: s.Namespace().String(), + Namespace: env.Namespace().String(), WorkflowExecution: workflowExecution, SignalName: "continue", }, @@ -356,10 +339,10 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { ch.requestCompleteCh <- err } - getHistoryResponse, err := s.FrontendClient().GetWorkflowExecutionHistory( + getHistoryResponse, err := env.FrontendClient().GetWorkflowExecutionHistory( ctx, &workflowservice.GetWorkflowExecutionHistoryRequest{ - Namespace: s.Namespace().String(), + Namespace: env.Namespace().String(), Execution: &commonpb.WorkflowExecution{ WorkflowId: workflowID, }, @@ -376,7 +359,7 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { // Start event contains all callbacks attached to the first workflow. s.ProtoElementsMatch(cbs, startEventAttr.CompletionCallbacks) - s.EventuallyWithT(func(col *assert.CollectT) { + await.Require(s.Context(), s.T(), func(col *await.T) { description, err := sdkClient.DescribeWorkflowExecution(ctx, workflowID, "") require.NoError(col, err) require.Len(col, description.Callbacks, len(cbs)) @@ -418,22 +401,14 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() { } } -func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback() { - s.OverrideDynamicConfig( - callback.AllowedAddresses, - []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, - ) +func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback(opts []testcore.TestOption) { + env := s.newTestEnv(opts...) - tv := testvars.New(s.T()) - ctx := testcore.NewContext() - sdkClient, err := client.Dial(client.Options{ - HostPort: s.FrontendGRPCAddress(), - Namespace: s.Namespace().String(), - }) - s.NoError(err) + ctx := s.Context() + sdkClient := env.SdkClient() - taskQueue := tv.TaskQueue() - workflowID := tv.WorkflowID() + taskQueue := &taskqueuepb.TaskQueue{Name: env.WorkerTaskQueue(), Kind: enumspb.TASK_QUEUE_KIND_NORMAL} + workflowID := env.Tv().WorkflowID() ch := &completionHandler{ requestCh: make(chan *nexusrpc.CompletionRequest, 2), @@ -445,8 +420,6 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback() { }() callbackAddress := s.runNexusCompletionHTTPServer(s.T(), ch) - w := worker.New(sdkClient, taskQueue.GetName(), worker.Options{}) - // A workflow that completes once it has been reset. longRunningWorkflow := func(ctx workflow.Context) error { return workflow.Await(ctx, func() bool { @@ -456,11 +429,9 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback() { }) } - w.RegisterWorkflowWithOptions(longRunningWorkflow, workflow.RegisterOptions{ + env.SdkWorker().RegisterWorkflowWithOptions(longRunningWorkflow, workflow.RegisterOptions{ Name: "longRunningWorkflow", }) - s.NoError(w.Start()) - defer w.Stop() cbs := []*commonpb.Callback{ { @@ -481,7 +452,7 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback() { request1 := &workflowservice.StartWorkflowExecutionRequest{ RequestId: uuid.NewString(), - Namespace: s.Namespace().String(), + Namespace: env.Namespace().String(), WorkflowId: workflowID, WorkflowType: &commonpb.WorkflowType{Name: "longRunningWorkflow"}, TaskQueue: taskQueue, @@ -490,7 +461,7 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback() { CompletionCallbacks: []*commonpb.Callback{cbs[0]}, } - startResponse1, err := s.FrontendClient().StartWorkflowExecution(ctx, request1) + startResponse1, err := env.FrontendClient().StartWorkflowExecution(ctx, request1) s.NoError(err) // Get history, iterate to ensure workflow task completed event exists. @@ -503,7 +474,7 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback() { 2 WorkflowTaskScheduled 3 WorkflowTaskStarted 4 WorkflowTaskCompleted`, - s.GetHistoryFunc(s.Namespace().String(), workflowExecution), + env.GetHistoryFunc(env.Namespace().String(), workflowExecution), 5*time.Second, 10*time.Millisecond) @@ -517,7 +488,7 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback() { } request2.CompletionCallbacks = []*commonpb.Callback{cbs[1]} - startResponse2, err := s.FrontendClient().StartWorkflowExecution(ctx, request2) + startResponse2, err := env.FrontendClient().StartWorkflowExecution(ctx, request2) s.NoError(err) s.False(startResponse2.Started) s.Equal(workflowExecution.RunId, startResponse2.RunId) @@ -529,13 +500,13 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback() { 3 WorkflowTaskStarted 4 WorkflowTaskCompleted 5 WorkflowExecutionOptionsUpdated`, - s.GetHistoryFunc(s.Namespace().String(), workflowExecution), + env.GetHistoryFunc(env.Namespace().String(), workflowExecution), 5*time.Second, 10*time.Millisecond) // Reset workflow must copy all callbacks even after the reset point. resetWfResponse, err := sdkClient.ResetWorkflowExecution(ctx, &workflowservice.ResetWorkflowExecutionRequest{ - Namespace: s.Namespace().String(), + Namespace: env.Namespace().String(), WorkflowExecution: workflowExecution, Reason: "TestNexusResetWorkflowWithCallback", @@ -577,8 +548,8 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback() { } } - s.EventuallyWithT( - func(t *assert.CollectT) { + await.Require(s.Context(), s.T(), + func(t *await.T) { // Get the description of the run post-reset and ensure its callbacks are in SUCCEEDED // state. description, err = sdkClient.DescribeWorkflowExecution(ctx, resetWorkflowRun.GetID(), "") @@ -608,11 +579,8 @@ func blockingWorkflow(ctx workflow.Context) error { }) } -func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback_ResetToNotBaseRun() { - s.OverrideDynamicConfig( - callback.AllowedAddresses, - []any{map[string]any{"Pattern": "*", "AllowInsecure": true}}, - ) +func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback_ResetToNotBaseRun(opts []testcore.TestOption) { + env := s.newTestEnv(opts...) /* * 1. Start WF w/ no callbacks and immediately terminate @@ -621,16 +589,10 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback_ResetToNotBaseRun() * 4. Verify callback is called */ - tv := testvars.New(s.T()) - ctx := testcore.NewContext() - sdkClient, err := client.Dial(client.Options{ - HostPort: s.FrontendGRPCAddress(), - Namespace: s.Namespace().String(), - }) - s.NoError(err) + ctx := s.Context() - taskQueue := tv.TaskQueue() - workflowID := tv.WorkflowID() + taskQueue := &taskqueuepb.TaskQueue{Name: env.WorkerTaskQueue(), Kind: enumspb.TASK_QUEUE_KIND_NORMAL} + workflowID := env.Tv().WorkflowID() ch := &completionHandler{ requestCh: make(chan *nexusrpc.CompletionRequest, 1), @@ -642,16 +604,12 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback_ResetToNotBaseRun() }() callbackAddress := s.runNexusCompletionHTTPServer(s.T(), ch) - w := worker.New(sdkClient, taskQueue.GetName(), worker.Options{}) - - w.RegisterWorkflow(blockingWorkflow) - s.NoError(w.Start()) - defer w.Stop() + env.SdkWorker().RegisterWorkflow(blockingWorkflow) // 1. Start WF w/ no callbacks and immediately terminate request1 := &workflowservice.StartWorkflowExecutionRequest{ RequestId: uuid.NewString(), - Namespace: s.Namespace().String(), + Namespace: env.Namespace().String(), WorkflowId: workflowID, WorkflowType: &commonpb.WorkflowType{Name: "blockingWorkflow"}, TaskQueue: taskQueue, @@ -660,7 +618,7 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback_ResetToNotBaseRun() Identity: s.T().Name(), } - startResponse1, err := s.FrontendClient().StartWorkflowExecution(ctx, request1) + startResponse1, err := env.FrontendClient().StartWorkflowExecution(ctx, request1) s.NoError(err) // Validate the workflow started, then terminate it @@ -673,15 +631,15 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback_ResetToNotBaseRun() 2 WorkflowTaskScheduled 3 WorkflowTaskStarted 4 WorkflowTaskCompleted`, - s.GetHistoryFunc(s.Namespace().String(), workflowExecution), + env.GetHistoryFunc(env.Namespace().String(), workflowExecution), 5*time.Second, 10*time.Millisecond) - _, err = s.FrontendClient().TerminateWorkflowExecution(ctx, &workflowservice.TerminateWorkflowExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().TerminateWorkflowExecution(ctx, &workflowservice.TerminateWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), WorkflowExecution: workflowExecution, Reason: s.T().Name(), - Identity: tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) s.NoError(err) @@ -694,12 +652,12 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback_ResetToNotBaseRun() request2.RequestId = uuid.NewString() request2.CompletionCallbacks = cbs - _, err = s.FrontendClient().StartWorkflowExecution(ctx, request2) + _, err = env.FrontendClient().StartWorkflowExecution(ctx, request2) s.NoError(err) // 3. Reset workflow back to the first (terminated) run as base; must copy callbacks - _, err = sdkClient.ResetWorkflowExecution(ctx, &workflowservice.ResetWorkflowExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.SdkClient().ResetWorkflowExecution(ctx, &workflowservice.ResetWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), WorkflowExecution: workflowExecution, // base = first (terminated) run Reason: s.T().Name(), WorkflowTaskFinishEventId: 4, @@ -717,13 +675,13 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback_ResetToNotBaseRun() } // Ensure the original workflow runs to completion to avoid leaving dangling runs - _, err = s.FrontendClient().TerminateWorkflowExecution(ctx, &workflowservice.TerminateWorkflowExecutionRequest{ - Namespace: s.Namespace().String(), + _, err = env.FrontendClient().TerminateWorkflowExecution(ctx, &workflowservice.TerminateWorkflowExecutionRequest{ + Namespace: env.Namespace().String(), WorkflowExecution: &commonpb.WorkflowExecution{ WorkflowId: workflowID, }, Reason: s.T().Name(), - Identity: tv.WorkerIdentity(), + Identity: env.Tv().WorkerIdentity(), }) s.NoError(err) } From cc1b161cfdc7cad23c0bdad7be3dcf976812e8d2 Mon Sep 17 00:00:00 2001 From: Rodrigo Zhou Date: Wed, 20 May 2026 10:51:32 -0700 Subject: [PATCH 68/73] Fix calls to sadefs.DecodeValue (#10334) --- service/history/workflow/mutable_state_impl.go | 4 ++-- service/history/workflow/mutable_state_impl_test.go | 4 ++-- tests/versioning_3_test.go | 4 ++-- tests/versioning_test.go | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/service/history/workflow/mutable_state_impl.go b/service/history/workflow/mutable_state_impl.go index 2bfad286d93..8eca6df50df 100644 --- a/service/history/workflow/mutable_state_impl.go +++ b/service/history/workflow/mutable_state_impl.go @@ -3730,7 +3730,7 @@ func (ms *MutableStateImpl) loadBuildIds() ([]string, error) { if !found { return []string{}, nil } - decoded, err := sadefs.DecodeValue(saPayload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, true) + decoded, err := sadefs.DecodeValue(saPayload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, false) if err != nil { return nil, err } @@ -3776,7 +3776,7 @@ func (ms *MutableStateImpl) loadUsedDeploymentVersions() ([]string, error) { if !found { return []string{}, nil } - decoded, err := sadefs.DecodeValue(saPayload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, true) + decoded, err := sadefs.DecodeValue(saPayload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, false) if err != nil { return nil, err } diff --git a/service/history/workflow/mutable_state_impl_test.go b/service/history/workflow/mutable_state_impl_test.go index 8c65db2017b..cc158759448 100644 --- a/service/history/workflow/mutable_state_impl_test.go +++ b/service/history/workflow/mutable_state_impl_test.go @@ -3942,7 +3942,7 @@ func (s *mutableStateSuite) getBuildIdsFromMutableState() []string { if !found { return []string{} } - decoded, err := sadefs.DecodeValue(payload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, true) + decoded, err := sadefs.DecodeValue(payload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, false) s.NoError(err) buildIDs, ok := decoded.([]string) s.True(ok) @@ -3954,7 +3954,7 @@ func (s *mutableStateSuite) getUsedDeploymentVersionsFromMutableState() []string if !found { return []string{} } - decoded, err := sadefs.DecodeValue(payload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, true) + decoded, err := sadefs.DecodeValue(payload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, false) s.NoError(err) usedDeploymentVersions, ok := decoded.([]string) s.True(ok) diff --git a/tests/versioning_3_test.go b/tests/versioning_3_test.go index d17d53877df..b3738677f16 100644 --- a/tests/versioning_3_test.go +++ b/tests/versioning_3_test.go @@ -4541,7 +4541,7 @@ func (s *Versioning3Suite) verifyVersioningSAs( if behavior == vbPinned { payload, ok := w.GetSearchAttributes().GetIndexedFields()["BuildIds"] a.True(ok) - searchAttrAny, err := sadefs.DecodeValue(payload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, true) + searchAttrAny, err := sadefs.DecodeValue(payload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, false) a.NoError(err) var searchAttr []string if searchAttrAny != nil { @@ -4556,7 +4556,7 @@ func (s *Versioning3Suite) verifyVersioningSAs( // Validate TemporalUsedWorkerDeploymentVersions search attribute versionPayload, ok := w.GetSearchAttributes().GetIndexedFields()["TemporalUsedWorkerDeploymentVersions"] a.True(ok) - versionAttrAny, err := sadefs.DecodeValue(versionPayload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, true) + versionAttrAny, err := sadefs.DecodeValue(versionPayload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, false) a.NoError(err) var versionAttr []string if versionAttrAny != nil { diff --git a/tests/versioning_test.go b/tests/versioning_test.go index 2cf371a45e9..6a05de023b2 100644 --- a/tests/versioning_test.go +++ b/tests/versioning_test.go @@ -4995,7 +4995,7 @@ func (s *VersioningIntegSuite) validateWorkflowBuildIds( dw, err := s.SdkClient().DescribeWorkflowExecution(ctx, wfId, runId) s.NoError(err) saPayload := dw.GetWorkflowExecutionInfo().GetSearchAttributes().GetIndexedFields()["BuildIds"] - searchAttrAny, err := sadefs.DecodeValue(saPayload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, true) + searchAttrAny, err := sadefs.DecodeValue(saPayload, enumspb.INDEXED_VALUE_TYPE_KEYWORD_LIST, false) var searchAttr []string if searchAttrAny != nil { searchAttr = searchAttrAny.([]string) From 092b56d3ef9e273251c66dac31113d58317eefe2 Mon Sep 17 00:00:00 2001 From: Rodrigo Zhou Date: Wed, 20 May 2026 12:12:53 -0700 Subject: [PATCH 69/73] Replace calls to payload.Encode with sadefs.EncodeValue (#10267) ## What changed? Replace calls to `payload.Encode` with `sadefs.EncodeValue`/`sadefs.MustEncodeValue`. ## Why? `payload.Encode` doesn't set the metadata type, which might be useful when decoding. Besides, encoding search attributes value should've used `sadefs.EncodeValue` anyway. ## How did you test it? - [x] built - [ ] run locally and tested manually - [x] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) ## Potential risks --- chasm/lib/scheduler/scheduler.go | 19 +++++++++---------- common/searchattribute/stringify.go | 9 +-------- .../history/visibility_queue_task_executor.go | 4 ++-- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/chasm/lib/scheduler/scheduler.go b/chasm/lib/scheduler/scheduler.go index 621e273b97c..672311675b6 100644 --- a/chasm/lib/scheduler/scheduler.go +++ b/chasm/lib/scheduler/scheduler.go @@ -20,7 +20,6 @@ import ( "go.temporal.io/server/common/contextutil" "go.temporal.io/server/common/payload" "go.temporal.io/server/common/primitives/timestamp" - "go.temporal.io/server/common/searchattribute/sadefs" "go.temporal.io/server/common/util" "go.temporal.io/server/service/worker/scheduler" "google.golang.org/protobuf/proto" @@ -938,15 +937,15 @@ func (s *Scheduler) ListInfo( func (s *Scheduler) startWorkflowSearchAttributes( nominal time.Time, ) *commonpb.SearchAttributes { - attributes := s.Schedule.GetAction().GetStartWorkflow().GetSearchAttributes() - - fields := util.CloneMapNonNil(attributes.GetIndexedFields()) - if p, err := payload.Encode(nominal); err == nil { - fields[sadefs.TemporalScheduledStartTime] = p - } - if p, err := payload.Encode(s.ScheduleId); err == nil { - fields[sadefs.TemporalScheduledById] = p - } + scheduledStartTime := chasm.SearchAttributeTemporalScheduledStartTime.Value(nominal) + scheduledByID := chasm.SearchAttributeTemporalScheduledByID.Value(s.ScheduleId) + fields := payload.MergeMapOfPayload( + s.Schedule.GetAction().GetStartWorkflow().GetSearchAttributes().GetIndexedFields(), + map[string]*commonpb.Payload{ + scheduledStartTime.Field: scheduledStartTime.Value.MustEncode(), + scheduledByID.Field: scheduledByID.Value.MustEncode(), + }, + ) return &commonpb.SearchAttributes{ IndexedFields: fields, } diff --git a/common/searchattribute/stringify.go b/common/searchattribute/stringify.go index 32a92063c5f..01424f75cf6 100644 --- a/common/searchattribute/stringify.go +++ b/common/searchattribute/stringify.go @@ -10,7 +10,6 @@ import ( commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" - "go.temporal.io/server/common/payload" "go.temporal.io/server/common/searchattribute/sadefs" ) @@ -121,13 +120,7 @@ func parseValueOrArray(valStr string, t enumspb.IndexedValueType) (*commonpb.Pay } } - valPayload, err := payload.Encode(val) - if err != nil { - return nil, err - } - - sadefs.SetMetadataType(valPayload, t) - return valPayload, nil + return sadefs.EncodeValue(val, t) } func parseValueTyped(valStr string, t enumspb.IndexedValueType) (any, error) { diff --git a/service/history/visibility_queue_task_executor.go b/service/history/visibility_queue_task_executor.go index 7401965a2c4..1c7ca08703c 100644 --- a/service/history/visibility_queue_task_executor.go +++ b/service/history/visibility_queue_task_executor.go @@ -576,8 +576,8 @@ func (t *visibilityQueueTaskExecutor) getClosedVisibilityRequest( externalPayloadCount := executionInfo.GetExecutionStats().GetExternalPayloadCount() externalPayloadSizeBytes := executionInfo.GetExecutionStats().GetExternalPayloadSize() if externalPayloadCount > 0 { - externalPayloadCountPayload, _ := payload.Encode(externalPayloadCount) - externalPayloadSizeBytesPayload, _ := payload.Encode(externalPayloadSizeBytes) + externalPayloadCountPayload := sadefs.MustEncodeValue(externalPayloadCount, enumspb.INDEXED_VALUE_TYPE_INT) + externalPayloadSizeBytesPayload := sadefs.MustEncodeValue(externalPayloadSizeBytes, enumspb.INDEXED_VALUE_TYPE_INT) base.SearchAttributes.IndexedFields[sadefs.TemporalExternalPayloadCount] = externalPayloadCountPayload base.SearchAttributes.IndexedFields[sadefs.TemporalExternalPayloadSizeBytes] = externalPayloadSizeBytesPayload } From 01aa279c462fd9e7efc8e0ba6bbc4554b51557dd Mon Sep 17 00:00:00 2001 From: Sean Kane Date: Wed, 20 May 2026 15:49:35 -0600 Subject: [PATCH 70/73] Implement SignalWithStart as a system nexus endpoint (#9833) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What changed? This PR adds `SignalWithStartWorkflowExecution` as a synchronous Nexus operation exposed via `__temporal_system endpoint`, allowing workflows to signal-with-start other workflows through the CHASM Nexus operation framework. Key changes: - `chasm/lib/workflow/nexus_service.go`: `workflowServiceNexusHandler` implements the `SignalWithStartWorkflowExecution` Nexus sync operation by resolving the namespace and delegating to the History service. A `SignalWithStartOperationProcessor` handles input enrichment (namespace, request ID, links) and routing via CHASM's `NexusOperationProcessorResult`. - `chasm/lib/workflow/library.go` — library now holds the `workflowServiceNexusHandler`, the workflow `Config`, the SA mapper provider, and the SA validator. `newLibrary` (used by fx) takes those dependencies, while the public `NewLibrary` keeps its old signature for external callers. Adds `NexusServices()` so the library registers its Nexus service via CHASM. - `chasm/lib/workflow/validator.go`: `RequestValidator` consolidates the `SignalWithStartWorkflowExecution` validation logic (previously inlined in `WorkflowHandler`) into a reusable, injectable struct. This same validator is used by both the frontend handler and the new CHASM processor. - common/dynamicconfig/constants.go — adds `EnableSignalWithStartFromWorkflow` (namespace-scoped, default false). - `service/frontend/workflow_handler.go`: Removed the `SignalWithStart` validation block - `service/history/fx.go`: Provides a `HistoryServiceServerProvider` so the CHASM workflow library can call the history handler directly. - `temporal/fx.go`: Removes the now-redundant `ChasmLibraryOptions` grouping; each service module registers its own CHASM libraries. - `components/nexusoperations/workflow/commands.go`: `NotFound` and `InvalidArgument` errors during Nexus command handling are now surfaced as workflow task failures instead of being treated as transient handler errors. - `common/payloads`: Adds `EncodeSingle`, `MustEncodeSingle`, and `MustEncode` helpers used in tests. - `cmd/tools/getproto`: Adds support for nexus-proto-annotations proto imports. - `tests/signal_with_start_from_workflow_test.go`: Functional test suite covering the happy path, duplicate detection, conflict policies, and validation rejection for the new Nexus operation. ## Why? This functionality is one of our most requested GitHub issues. ## How did you test it? - [X] built - [X] run locally and tested manually - [ ] covered by existing tests - [X] added new unit test(s) - [X] added new functional test(s) ## Potential risks The history service now directly exposes a `HistoryServiceServer` interface via `fx` for injection into the CHASM workflow library. This tight coupling between the CHASM workflow library and the history handler could complicate future layering — callers outside the history service should not adopt this pattern. The feature is gated by `history.enableSignalWithStartFromWorkflow` for rollout. --------- Co-authored-by: Roey Berman --- chasm/lib/workflow/config.go | 24 + chasm/lib/workflow/fx.go | 9 + chasm/lib/workflow/library.go | 47 +- chasm/lib/workflow/nexus_service.go | 140 +++ chasm/lib/workflow/validator.go | 259 +++++ chasm/lib/workflow/validator_test.go | 156 +++ chasm/nexus_operation_processor.go | 6 +- cmd/tools/getproto/files.go | 4 + cmd/tools/getproto/main.go | 17 +- common/dynamicconfig/constants.go | 6 + common/payloads/payloads.go | 27 + .../nexusoperations/workflow/commands.go | 16 +- go.mod | 2 +- service/frontend/admin_handler.go | 7 +- service/frontend/errors.go | 47 +- service/frontend/fx.go | 11 + service/frontend/workflow_handler.go | 223 +---- service/frontend/workflow_handler_test.go | 35 +- .../signal_with_start_workflow.go | 5 +- service/history/fx.go | 12 + service/history/handler.go | 3 +- service/worker/fx.go | 4 + temporal/fx.go | 10 +- tests/signal_with_start_from_workflow_test.go | 918 ++++++++++++++++++ tests/testcore/onebox.go | 17 +- 25 files changed, 1734 insertions(+), 271 deletions(-) create mode 100644 chasm/lib/workflow/config.go create mode 100644 chasm/lib/workflow/nexus_service.go create mode 100644 chasm/lib/workflow/validator.go create mode 100644 chasm/lib/workflow/validator_test.go create mode 100644 tests/signal_with_start_from_workflow_test.go diff --git a/chasm/lib/workflow/config.go b/chasm/lib/workflow/config.go new file mode 100644 index 00000000000..7c91c1430d3 --- /dev/null +++ b/chasm/lib/workflow/config.go @@ -0,0 +1,24 @@ +package workflow + +import ( + "go.temporal.io/server/common/dynamicconfig" + "go.temporal.io/server/common/retrypolicy" +) + +type Config struct { + maxIDLengthLimit dynamicconfig.IntPropertyFn + defaultWorkflowRetrySettings dynamicconfig.TypedPropertyFnWithNamespaceFilter[retrypolicy.DefaultRetrySettings] + maxLinksPerRequest dynamicconfig.IntPropertyFnWithNamespaceFilter + linkMaxSize dynamicconfig.IntPropertyFnWithNamespaceFilter + enableSignalWithStartFromWorkflow dynamicconfig.BoolPropertyFnWithNamespaceFilter +} + +func NewConfig(dc *dynamicconfig.Collection) Config { + return Config{ + maxIDLengthLimit: dynamicconfig.MaxIDLengthLimit.Get(dc), + defaultWorkflowRetrySettings: dynamicconfig.DefaultWorkflowRetryPolicy.Get(dc), + maxLinksPerRequest: dynamicconfig.FrontendMaxLinksPerRequest.Get(dc), + linkMaxSize: dynamicconfig.FrontendLinkMaxSize.Get(dc), + enableSignalWithStartFromWorkflow: dynamicconfig.EnableSignalWithStartFromWorkflow.Get(dc), + } +} diff --git a/chasm/lib/workflow/fx.go b/chasm/lib/workflow/fx.go index 310ff1b2117..52730794faa 100644 --- a/chasm/lib/workflow/fx.go +++ b/chasm/lib/workflow/fx.go @@ -1,6 +1,7 @@ package workflow import ( + "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/chasm" "go.temporal.io/server/chasm/lib/nexusoperation" "go.uber.org/fx" @@ -8,6 +9,7 @@ import ( var Module = fx.Module( "chasm.lib.workflow", + fx.Provide(NewConfig), fx.Provide(NewRegistry), fx.Provide(newLibrary), fx.Invoke(func( @@ -23,3 +25,10 @@ var Module = fx.Module( return chasmRegistry.Register(library) }), ) + +// HistoryHandlerModule wires the workflow library's Nexus handler to the +// history service. Only include this in services that provide +// historyservice.HistoryServiceServer (the history service). +var HistoryHandlerModule = fx.Invoke(func(library *library, historyHandler historyservice.HistoryServiceServer) { + library.workflowServiceNexusHandler.setHistoryHandler(historyHandler) +}) diff --git a/chasm/lib/workflow/library.go b/chasm/lib/workflow/library.go index 88be1d5b864..811f975d280 100644 --- a/chasm/lib/workflow/library.go +++ b/chasm/lib/workflow/library.go @@ -1,24 +1,45 @@ package workflow import ( + "github.com/nexus-rpc/sdk-go/nexus" "go.temporal.io/server/chasm" + "go.temporal.io/server/common/namespace" + "go.temporal.io/server/common/searchattribute" ) type library struct { chasm.UnimplementedLibrary - registry *Registry + registry *Registry + workflowServiceNexusHandler *workflowServiceNexusHandler + config Config + saMapperProvider searchattribute.MapperProvider + saValidator *searchattribute.Validator } -func newLibrary(registry *Registry) *library { +func newLibrary( + registry *Registry, + namespaceRegistry namespace.Registry, + config Config, + saMapperProvider searchattribute.MapperProvider, + saValidator *searchattribute.Validator, +) *library { return &library{ - registry: registry, + registry: registry, + config: config, + saMapperProvider: saMapperProvider, + saValidator: saValidator, + workflowServiceNexusHandler: &workflowServiceNexusHandler{ + config: config, + namespaceRegistry: namespaceRegistry, + }, } } // NewLibrary creates a new CHASM library for the workflow package. +// Use newLibrary (via fx) for the full setup including Nexus services. func NewLibrary(registry *Registry) chasm.Library { - return newLibrary(registry) + return &library{registry: registry} } func (l *library) Name() string { @@ -55,3 +76,21 @@ func (l *library) Components() []*chasm.RegistrableComponent { func SetEventRegistryOnContext[C chasm.Context](ctx C, registry *Registry) C { return chasm.ContextWithValue(ctx, ctxKeyWorkflowContext, &workflowContext{registry: registry}) } + +func (l *library) NexusServices() []*nexus.Service { + if l.workflowServiceNexusHandler == nil { + return nil + } + return []*nexus.Service{ + mustNewWorkflowServiceNexusHandler(l.workflowServiceNexusHandler), + } +} + +func (l *library) NexusServiceProcessors() []*chasm.NexusServiceProcessor { + if l.workflowServiceNexusHandler == nil { + return nil + } + return []*chasm.NexusServiceProcessor{ + NewWorkflowServiceNexusServiceProcessor(l.config, l.saMapperProvider, l.saValidator), + } +} diff --git a/chasm/lib/workflow/nexus_service.go b/chasm/lib/workflow/nexus_service.go new file mode 100644 index 00000000000..0b9709960ff --- /dev/null +++ b/chasm/lib/workflow/nexus_service.go @@ -0,0 +1,140 @@ +package workflow + +import ( + "context" + + "github.com/nexus-rpc/sdk-go/nexus" + commonpb "go.temporal.io/api/common/v1" + "go.temporal.io/api/serviceerror" + "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/api/workflowservice/v1/workflowservicenexus" + "go.temporal.io/server/api/historyservice/v1" + "go.temporal.io/server/chasm" + "go.temporal.io/server/common/namespace" + commonnexus "go.temporal.io/server/common/nexus" + "go.temporal.io/server/common/searchattribute" +) + +var ErrSignalWithStartOperationDisabled = serviceerror.NewUnimplemented("SignalWithStart operation is disabled") + +type workflowServiceNexusHandler struct { + config Config + namespaceRegistry namespace.Registry + historyHandler historyservice.HistoryServiceServer +} + +// signalWithStartWorkflowExecution implements the SignalWithStartWorkflowExecution Nexus operation. +func (h *workflowServiceNexusHandler) signalWithStartWorkflowExecution( + ctx context.Context, + req *workflowservice.SignalWithStartWorkflowExecutionRequest, + options nexus.StartOperationOptions, +) (*workflowservice.SignalWithStartWorkflowExecutionResponse, error) { + if !h.config.enableSignalWithStartFromWorkflow(req.GetNamespace()) { + return nil, ErrSignalWithStartOperationDisabled + } + nsID, err := h.namespaceRegistry.GetNamespaceID(namespace.Name(req.GetNamespace())) + if err != nil { + return nil, serviceerror.NewInvalidArgumentf("Invalid namespace %q: %v", req.GetNamespace(), err) + } + res, err := h.historyHandler.SignalWithStartWorkflowExecution(ctx, &historyservice.SignalWithStartWorkflowExecutionRequest{ + NamespaceId: nsID.String(), + SignalWithStartRequest: req, + }) + if err != nil { + return nil, err + } + link := commonnexus.ConvertLinkWorkflowEventToNexusLink(&commonpb.Link_WorkflowEvent{ + Namespace: req.GetNamespace(), + WorkflowId: req.GetWorkflowId(), + RunId: res.GetRunId(), + Reference: &commonpb.Link_WorkflowEvent_RequestIdRef{ + RequestIdRef: &commonpb.Link_WorkflowEvent_RequestIdReference{ + RequestId: req.GetRequestId(), + }, + }, + }) + nexus.AddHandlerLinks(ctx, link) + return &workflowservice.SignalWithStartWorkflowExecutionResponse{ + RunId: res.GetRunId(), + Started: res.GetStarted(), + }, nil +} + +func mustNewWorkflowServiceNexusHandler( + handler *workflowServiceNexusHandler, +) *nexus.Service { + svc := nexus.NewService(workflowservicenexus.WorkflowService.ServiceName) + svc.MustRegister(nexus.NewSyncOperation( + workflowservicenexus.WorkflowService.SignalWithStartWorkflowExecution.Name(), + handler.signalWithStartWorkflowExecution, + )) + return svc +} + +func (h *workflowServiceNexusHandler) setHistoryHandler(handler historyservice.HistoryServiceServer) { + h.historyHandler = handler +} + +type SignalWithStartOperationProcessor struct { + validator *RequestValidator +} + +func (o SignalWithStartOperationProcessor) ProcessInput(ctx chasm.NexusOperationProcessorContext, request *workflowservice.SignalWithStartWorkflowExecutionRequest) (*chasm.NexusOperationProcessorResult, error) { + if !o.validator.config.enableSignalWithStartFromWorkflow(ctx.Namespace.Name().String()) { + return nil, ErrSignalWithStartOperationDisabled + } + if request == nil { + return nil, serviceerror.NewInvalidArgument("Request is empty") + } + if request.GetNamespace() == "" { + request.Namespace = ctx.Namespace.Name().String() + } else if request.GetNamespace() != ctx.Namespace.Name().String() { + return nil, serviceerror.NewInvalidArgumentf("Namespace in request %q does not match namespace in context %q", request.GetNamespace(), ctx.Namespace.Name().String()) + } + + if request.GetRequestId() != "" { + return nil, serviceerror.NewInvalidArgument("RequestID should not be set on the request") + } + request.RequestId = ctx.RequestID + + if len(request.GetLinks()) > 0 { + return nil, serviceerror.NewInvalidArgument("Links should not be set on the request") + } + request.Links = make([]*commonpb.Link, len(ctx.Links)) + for i, link := range ctx.Links { + wLink, err := commonnexus.ConvertNexusLinkToLinkWorkflowEvent(link) + if err != nil { + return nil, serviceerror.NewInvalidArgumentf("Cannot convert %v link %v: %v", link.Type, link.URL, err) + } + request.Links[i] = &commonpb.Link{ + Variant: &commonpb.Link_WorkflowEvent_{ + WorkflowEvent: wLink, + }, + } + } + + if err := o.validator.ValidateSignalWithStartRequest(request); err != nil { + return nil, err + } + + return &chasm.NexusOperationProcessorResult{ + RoutingKey: chasm.NexusOperationRoutingKeyExecution{ + NamespaceID: ctx.Namespace.ID().String(), + BusinessID: request.WorkflowId, + }, + }, nil +} + +func NewWorkflowServiceNexusServiceProcessor( + config Config, + saMapperProvider searchattribute.MapperProvider, + saValidator *searchattribute.Validator, +) *chasm.NexusServiceProcessor { + sp := chasm.NewNexusServiceProcessor(workflowservicenexus.WorkflowService.ServiceName) + op := SignalWithStartOperationProcessor{validator: NewValidator(config, saMapperProvider, saValidator)} + sp.MustRegisterOperation( + workflowservicenexus.WorkflowService.SignalWithStartWorkflowExecution.Name(), + chasm.NewRegisterableNexusOperationProcessor(op), + ) + return sp +} diff --git a/chasm/lib/workflow/validator.go b/chasm/lib/workflow/validator.go new file mode 100644 index 00000000000..17dc9e978f6 --- /dev/null +++ b/chasm/lib/workflow/validator.go @@ -0,0 +1,259 @@ +package workflow + +import ( + "fmt" + + "github.com/google/uuid" + commonpb "go.temporal.io/api/common/v1" + enumspb "go.temporal.io/api/enums/v1" + "go.temporal.io/api/serviceerror" + "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/server/common/enums" + "go.temporal.io/server/common/primitives/timestamp" + "go.temporal.io/server/common/priorities" + "go.temporal.io/server/common/retrypolicy" + "go.temporal.io/server/common/searchattribute" + "go.temporal.io/server/common/tqid" + "google.golang.org/protobuf/types/known/durationpb" +) + +var ( + ErrWorkflowIDNotSet = serviceerror.NewInvalidArgument("WorkflowId is not set on request.") + errConflictPolicyFailNotSupported = serviceerror.NewInvalidArgument("Invalid WorkflowIDConflictPolicy: WORKFLOW_ID_CONFLICT_POLICY_FAIL is not supported for this operation.") + errIncompatibleIDReusePolicyTerminateIfRunning = serviceerror.NewInvalidArgument("Invalid WorkflowIDReusePolicy: WORKFLOW_ID_REUSE_POLICY_TERMINATE_IF_RUNNING cannot be used together with a WorkflowIDConflictPolicy") + errIncompatibleIDReusePolicyRejectDuplicate = serviceerror.NewInvalidArgument("Invalid WorkflowIDReusePolicy: WORKFLOW_ID_REUSE_POLICY_REJECT_DUPLICATE cannot be used together with WorkflowIdConflictPolicy WORKFLOW_ID_CONFLICT_POLICY_TERMINATE_EXISTING") + errInvalidWorkflowExecutionTimeoutSeconds = serviceerror.NewInvalidArgument("An invalid WorkflowExecutionTimeoutSeconds is set on request.") + errInvalidWorkflowRunTimeoutSeconds = serviceerror.NewInvalidArgument("An invalid WorkflowRunTimeoutSeconds is set on request.") + errInvalidWorkflowTaskTimeoutSeconds = serviceerror.NewInvalidArgument("An invalid WorkflowTaskTimeoutSeconds is set on request.") + ErrCronAndStartDelaySet = serviceerror.NewInvalidArgument("CronSchedule and WorkflowStartDelay may not be used together.") + ErrInvalidWorkflowStartDelaySeconds = serviceerror.NewInvalidArgument("An invalid WorkflowStartDelaySeconds is set on request.") +) + +type RequestValidator struct { + config Config + saMapperProvider searchattribute.MapperProvider + saValidator *searchattribute.Validator +} + +func NewValidator( + config Config, + saMapperProvider searchattribute.MapperProvider, + saValidator *searchattribute.Validator, +) *RequestValidator { + return &RequestValidator{ + config: config, + saMapperProvider: saMapperProvider, + saValidator: saValidator, + } +} + +func (v *RequestValidator) ValidateWorkflowID( + workflowID string, +) error { + if workflowID == "" { + return ErrWorkflowIDNotSet + } + if len(workflowID) > v.config.maxIDLengthLimit() { + return serviceerror.NewInvalidArgumentf("WorkflowId exceeds maximum allowed length (%d/%d)", len(workflowID), v.config.maxIDLengthLimit()) + } + return nil +} + +type StartWorkflowTimeoutLikeRequest interface { + GetWorkflowExecutionTimeout() *durationpb.Duration + GetWorkflowRunTimeout() *durationpb.Duration + GetWorkflowTaskTimeout() *durationpb.Duration +} + +func (v *RequestValidator) ValidateWorkflowTimeouts( + request StartWorkflowTimeoutLikeRequest, +) error { + if err := timestamp.ValidateAndCapProtoDuration(request.GetWorkflowExecutionTimeout()); err != nil { + return fmt.Errorf("%w cause: %v", errInvalidWorkflowExecutionTimeoutSeconds, err) + } + + if err := timestamp.ValidateAndCapProtoDuration(request.GetWorkflowRunTimeout()); err != nil { + return fmt.Errorf("%w cause: %v", errInvalidWorkflowRunTimeoutSeconds, err) + } + + if err := timestamp.ValidateAndCapProtoDuration(request.GetWorkflowTaskTimeout()); err != nil { + return fmt.Errorf("%w cause: %v", errInvalidWorkflowTaskTimeoutSeconds, err) + } + + return nil +} + +func (v *RequestValidator) ValidateRetryPolicy(namespaceName string, retryPolicy *commonpb.RetryPolicy) error { + if retryPolicy == nil { + // By default, if the user does not explicitly set a retry policy for a Workflow, do not perform any retries. + return nil + } + + retrypolicy.EnsureDefaults(retryPolicy, v.config.defaultWorkflowRetrySettings(namespaceName)) + return retrypolicy.Validate(retryPolicy) +} + +func (v *RequestValidator) ValidateWorkflowStartDelay( + cronSchedule string, + startDelay *durationpb.Duration, +) error { + if len(cronSchedule) > 0 && startDelay != nil { + return ErrCronAndStartDelaySet + } + + if err := timestamp.ValidateAndCapProtoDuration(startDelay); err != nil { + return fmt.Errorf("%w cause: %v", ErrInvalidWorkflowStartDelaySeconds, err) + } + + return nil +} +func (v *RequestValidator) ValidateWorkflowIDReusePolicy( + reusePolicy enumspb.WorkflowIdReusePolicy, + conflictPolicy enumspb.WorkflowIdConflictPolicy, +) error { + if conflictPolicy != enumspb.WORKFLOW_ID_CONFLICT_POLICY_UNSPECIFIED && + reusePolicy == enumspb.WORKFLOW_ID_REUSE_POLICY_TERMINATE_IF_RUNNING { //nolint:staticcheck // SA1019: kept for backwards compatibility + return errIncompatibleIDReusePolicyTerminateIfRunning + } + if conflictPolicy == enumspb.WORKFLOW_ID_CONFLICT_POLICY_TERMINATE_EXISTING && + reusePolicy == enumspb.WORKFLOW_ID_REUSE_POLICY_REJECT_DUPLICATE { + return errIncompatibleIDReusePolicyRejectDuplicate + } + return nil +} + +func (v *RequestValidator) ValidateLinks( + ns string, + links []*commonpb.Link, +) error { + maxAllowedLinks := v.config.maxLinksPerRequest(ns) + if len(links) > maxAllowedLinks { + return serviceerror.NewInvalidArgumentf("cannot attach more than %d links per request, got %d", maxAllowedLinks, len(links)) + } + + maxSize := v.config.linkMaxSize(ns) + for _, l := range links { + if l.Size() > maxSize { + return serviceerror.NewInvalidArgumentf("link exceeds allowed size of %d, got %d", maxSize, l.Size()) + } + switch t := l.Variant.(type) { + case *commonpb.Link_WorkflowEvent_: + if t.WorkflowEvent.GetNamespace() == "" { + return serviceerror.NewInvalidArgument("workflow event link must not have an empty namespace field") + } + if t.WorkflowEvent.GetWorkflowId() == "" { + return serviceerror.NewInvalidArgument("workflow event link must not have an empty workflow ID field") + } + if t.WorkflowEvent.GetRunId() == "" { + return serviceerror.NewInvalidArgument("workflow event link must not have an empty run ID field") + } + if t.WorkflowEvent.GetEventRef().GetEventType() == enumspb.EVENT_TYPE_UNSPECIFIED && t.WorkflowEvent.GetEventRef().GetEventId() != 0 { + return serviceerror.NewInvalidArgument("workflow event link ref cannot have an unspecified event type and a non-zero event ID") + } + case *commonpb.Link_BatchJob_: + if t.BatchJob.GetJobId() == "" { + return serviceerror.NewInvalidArgument("batch job link must not have an empty job ID") + } + default: + return serviceerror.NewInvalidArgument("unsupported link variant") + } + } + return nil +} + +func (v *RequestValidator) UnaliasedSearchAttributesFrom( + attributes *commonpb.SearchAttributes, + namespaceName string, +) (*commonpb.SearchAttributes, error) { + sa, err := searchattribute.UnaliasFields(v.saMapperProvider, attributes, namespaceName) + if err != nil { + return nil, err + } + + if err = v.ValidateSearchAttributes(sa, namespaceName); err != nil { + return nil, err + } + return sa, nil +} + +func (v *RequestValidator) ValidateSearchAttributes(searchAttributes *commonpb.SearchAttributes, namespaceName string) error { + if err := v.saValidator.Validate(searchAttributes, namespaceName); err != nil { + return err + } + return v.saValidator.ValidateSize(searchAttributes, namespaceName) +} + +func (v *RequestValidator) ValidateSignalWithStartRequest(request *workflowservice.SignalWithStartWorkflowExecutionRequest) error { + if request == nil { + return serviceerror.NewInvalidArgument("request is empty") + } + + if err := v.ValidateWorkflowID(request.GetWorkflowId()); err != nil { + return err + } + + if request.GetSignalName() == "" { + return serviceerror.NewInvalidArgument("signal not set") + } + + if len(request.GetSignalName()) > v.config.maxIDLengthLimit() { + return serviceerror.NewInvalidArgumentf("signal name exceeds maximum allowed length (%d/%d)", len(request.GetSignalName()), v.config.maxIDLengthLimit()) + } + + if request.GetWorkflowType().GetName() == "" { + return serviceerror.NewInvalidArgument("workflow type not set") + } + + if len(request.GetWorkflowType().GetName()) > v.config.maxIDLengthLimit() { + return serviceerror.NewInvalidArgumentf("workflow type name exceeds maximum allowed length (%d/%d)", len(request.GetWorkflowType().GetName()), v.config.maxIDLengthLimit()) + } + + if err := tqid.NormalizeAndValidateUserDefined(request.TaskQueue, "", "", v.config.maxIDLengthLimit()); err != nil { + return err + } + + if request.RequestId == "" { + // For easy direct API use, we default the request ID here but expect all + // SDKs and other auto-retrying clients to set it + request.RequestId = uuid.NewString() + } else if len(request.RequestId) > v.config.maxIDLengthLimit() { + return serviceerror.NewInvalidArgumentf("Request ID exceeds maximum allowed length (%d/%d)", len(request.RequestId), v.config.maxIDLengthLimit()) + } + + if err := v.ValidateWorkflowTimeouts(request); err != nil { + return err + } + + if err := v.ValidateRetryPolicy(request.GetNamespace(), request.RetryPolicy); err != nil { + return err + } + + if err := v.ValidateWorkflowStartDelay(request.GetCronSchedule(), request.WorkflowStartDelay); err != nil { + return err + } + + if err := v.ValidateWorkflowIDReusePolicy( + request.WorkflowIdReusePolicy, + request.WorkflowIdConflictPolicy, + ); err != nil { + return err + } + + if request.WorkflowIdConflictPolicy == enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL { + return errConflictPolicyFailNotSupported + } + + enums.SetDefaultWorkflowIDPolicies(&request.WorkflowIdReusePolicy, &request.WorkflowIdConflictPolicy, enumspb.WORKFLOW_ID_CONFLICT_POLICY_USE_EXISTING) + + sa, err := v.UnaliasedSearchAttributesFrom(request.GetSearchAttributes(), request.GetNamespace()) + if err != nil { + return err + } + request.SearchAttributes = sa + + if err := priorities.Validate(request.Priority); err != nil { + return err + } + + return v.ValidateLinks(request.GetNamespace(), request.GetLinks()) +} diff --git a/chasm/lib/workflow/validator_test.go b/chasm/lib/workflow/validator_test.go new file mode 100644 index 00000000000..fab2decda85 --- /dev/null +++ b/chasm/lib/workflow/validator_test.go @@ -0,0 +1,156 @@ +package workflow + +import ( + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + commonpb "go.temporal.io/api/common/v1" + enumspb "go.temporal.io/api/enums/v1" + taskqueuepb "go.temporal.io/api/taskqueue/v1" + "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/server/common/dynamicconfig" + "go.temporal.io/server/common/retrypolicy" + "go.temporal.io/server/common/searchattribute" + "google.golang.org/protobuf/types/known/durationpb" +) + +const testMaxIDLen = 1000 + +func newTestValidator() *RequestValidator { + saValidator := searchattribute.NewValidator( + searchattribute.NewTestProvider(), + searchattribute.NewTestMapperProvider(nil), + dynamicconfig.GetIntPropertyFnFilteredByNamespace(100), + dynamicconfig.GetIntPropertyFnFilteredByNamespace(1024), + dynamicconfig.GetIntPropertyFnFilteredByNamespace(4096), + nil, // visibility manager not needed when SA is nil + dynamicconfig.GetBoolPropertyFnFilteredByNamespace(true), + dynamicconfig.GetBoolPropertyFnFilteredByNamespace(false), + ) + return NewValidator( + Config{ + maxIDLengthLimit: func() int { return testMaxIDLen }, + defaultWorkflowRetrySettings: func(ns string) retrypolicy.DefaultRetrySettings { + return retrypolicy.DefaultDefaultRetrySettings + }, + maxLinksPerRequest: func(ns string) int { return 10 }, + linkMaxSize: func(ns string) int { return 1024 }, + }, + searchattribute.NewTestMapperProvider(nil), + saValidator, + ) +} + +func validSWSRequest() *workflowservice.SignalWithStartWorkflowExecutionRequest { + return &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: "test-workflow-id", + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "test-workflow-type"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: "test-task-queue"}, + } +} + +func TestValidateSignalWithStartRequest(t *testing.T) { + v := newTestValidator() + + t.Run("HappyPath", func(t *testing.T) { + req := validSWSRequest() + err := v.ValidateSignalWithStartRequest(req) + require.NoError(t, err) + }) + + t.Run("NilRequest", func(t *testing.T) { + err := v.ValidateSignalWithStartRequest(nil) + require.ErrorContains(t, err, "request is empty") + }) + + t.Run("EmptyWorkflowID", func(t *testing.T) { + req := validSWSRequest() + req.WorkflowId = "" + err := v.ValidateSignalWithStartRequest(req) + require.ErrorIs(t, err, ErrWorkflowIDNotSet) + }) + + t.Run("WorkflowIDTooLong", func(t *testing.T) { + req := validSWSRequest() + req.WorkflowId = strings.Repeat("a", testMaxIDLen+1) + err := v.ValidateSignalWithStartRequest(req) + require.ErrorContains(t, err, "WorkflowId exceeds maximum allowed length") + }) + + t.Run("EmptySignalName", func(t *testing.T) { + req := validSWSRequest() + req.SignalName = "" + err := v.ValidateSignalWithStartRequest(req) + require.ErrorContains(t, err, "signal not set") + }) + + t.Run("SignalNameTooLong", func(t *testing.T) { + req := validSWSRequest() + req.SignalName = strings.Repeat("s", testMaxIDLen+1) + err := v.ValidateSignalWithStartRequest(req) + require.ErrorContains(t, err, "signal name exceeds maximum allowed length") + }) + + t.Run("EmptyWorkflowType", func(t *testing.T) { + req := validSWSRequest() + req.WorkflowType = &commonpb.WorkflowType{Name: ""} + err := v.ValidateSignalWithStartRequest(req) + require.ErrorContains(t, err, "workflow type not set") + }) + + t.Run("WorkflowTypeTooLong", func(t *testing.T) { + req := validSWSRequest() + req.WorkflowType = &commonpb.WorkflowType{Name: strings.Repeat("t", testMaxIDLen+1)} + err := v.ValidateSignalWithStartRequest(req) + require.ErrorContains(t, err, "workflow type name exceeds maximum allowed length") + }) + + t.Run("AutoGeneratesRequestID", func(t *testing.T) { + req := validSWSRequest() + req.RequestId = "" + err := v.ValidateSignalWithStartRequest(req) + require.NoError(t, err) + require.NotEmpty(t, req.RequestId, "empty RequestId should be auto-populated") + }) + + t.Run("RequestIDTooLong", func(t *testing.T) { + req := validSWSRequest() + req.RequestId = strings.Repeat("r", testMaxIDLen+1) + err := v.ValidateSignalWithStartRequest(req) + require.ErrorContains(t, err, "Request ID exceeds maximum allowed length") + }) + + t.Run("ConflictPolicyFailNotSupported", func(t *testing.T) { + req := validSWSRequest() + req.WorkflowIdConflictPolicy = enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL + err := v.ValidateSignalWithStartRequest(req) + require.ErrorContains(t, err, "WORKFLOW_ID_CONFLICT_POLICY_FAIL is not supported") + }) + + t.Run("IncompatibleTerminateIfRunningWithConflictPolicy", func(t *testing.T) { + req := validSWSRequest() + req.WorkflowIdReusePolicy = enumspb.WORKFLOW_ID_REUSE_POLICY_TERMINATE_IF_RUNNING //nolint:staticcheck // SA1019: testing backwards-compatible validation path + req.WorkflowIdConflictPolicy = enumspb.WORKFLOW_ID_CONFLICT_POLICY_USE_EXISTING + err := v.ValidateSignalWithStartRequest(req) + require.ErrorContains(t, err, "WORKFLOW_ID_REUSE_POLICY_TERMINATE_IF_RUNNING cannot be used together with a WorkflowIDConflictPolicy") + }) + + t.Run("IncompatibleRejectDuplicateWithTerminateExisting", func(t *testing.T) { + req := validSWSRequest() + req.WorkflowIdReusePolicy = enumspb.WORKFLOW_ID_REUSE_POLICY_REJECT_DUPLICATE + req.WorkflowIdConflictPolicy = enumspb.WORKFLOW_ID_CONFLICT_POLICY_TERMINATE_EXISTING + err := v.ValidateSignalWithStartRequest(req) + require.ErrorContains(t, err, "WORKFLOW_ID_REUSE_POLICY_REJECT_DUPLICATE cannot be used together with WorkflowIdConflictPolicy WORKFLOW_ID_CONFLICT_POLICY_TERMINATE_EXISTING") + }) + + t.Run("CronAndStartDelaySetTogether", func(t *testing.T) { + req := validSWSRequest() + req.CronSchedule = "0 * * * *" + req.WorkflowStartDelay = durationpb.New(5 * time.Second) + err := v.ValidateSignalWithStartRequest(req) + require.ErrorIs(t, err, ErrCronAndStartDelaySet) + }) +} diff --git a/chasm/nexus_operation_processor.go b/chasm/nexus_operation_processor.go index 91610726726..f055f51f529 100644 --- a/chasm/nexus_operation_processor.go +++ b/chasm/nexus_operation_processor.go @@ -8,7 +8,7 @@ import ( commonpb "go.temporal.io/api/common/v1" "go.temporal.io/server/common" "go.temporal.io/server/common/namespace" - "go.temporal.io/server/common/payloads" + sdkconverter "go.temporal.io/server/common/sdk" ) // NexusOperationProcessorContext contains context for processing a Nexus operation's input, including the target @@ -81,7 +81,7 @@ type RegisterableNexusOperationProcessor struct { func nexusOperationProcessorAdapter[I any](processor NexusOperationProcessor[I]) func(ctx NexusOperationProcessorContext, input *commonpb.Payload) (*NexusOperationProcessorResult, error) { return func(ctx NexusOperationProcessorContext, input *commonpb.Payload) (*NexusOperationProcessorResult, error) { var i I - if err := payloads.Decode(&commonpb.Payloads{Payloads: []*commonpb.Payload{input}}, &i); err != nil { + if err := sdkconverter.PreferProtoDataConverter.FromPayloads(&commonpb.Payloads{Payloads: []*commonpb.Payload{input}}, &i); err != nil { return nil, nexus.NewHandlerErrorf(nexus.HandlerErrorTypeBadRequest, "failed to decode input payload: %v", err) } result, err := processor.ProcessInput(ctx, i) @@ -89,7 +89,7 @@ func nexusOperationProcessorAdapter[I any](processor NexusOperationProcessor[I]) return nil, err } if ctx.ReserializeInputPayload { - pls, err := payloads.Encode(i) + pls, err := sdkconverter.PreferProtoDataConverter.ToPayloads(i) if err != nil { herr := nexus.NewHandlerErrorf(nexus.HandlerErrorTypeInternal, "failed to re-encode input payload: %v", err) herr.RetryBehavior = nexus.HandlerErrorRetryBehaviorNonRetryable diff --git a/cmd/tools/getproto/files.go b/cmd/tools/getproto/files.go index e333a06f297..e4e3922200e 100644 --- a/cmd/tools/getproto/files.go +++ b/cmd/tools/getproto/files.go @@ -6,6 +6,7 @@ package main import ( "google.golang.org/protobuf/reflect/protoreflect" + nexusannotations "github.com/nexus-rpc/nexus-proto-annotations/go/nexusannotations/v1" activity "go.temporal.io/api/activity/v1" batch "go.temporal.io/api/batch/v1" callback "go.temporal.io/api/callback/v1" @@ -31,6 +32,7 @@ import ( worker "go.temporal.io/api/worker/v1" workflow "go.temporal.io/api/workflow/v1" workflowservice "go.temporal.io/api/workflowservice/v1" + descriptorpb "google.golang.org/protobuf/types/descriptorpb" anypb "google.golang.org/protobuf/types/known/anypb" durationpb "google.golang.org/protobuf/types/known/durationpb" emptypb "google.golang.org/protobuf/types/known/emptypb" @@ -42,11 +44,13 @@ import ( func init() { importMap = make(map[string]protoreflect.FileDescriptor) importMap["google/protobuf/any.proto"] = anypb.File_google_protobuf_any_proto + importMap["google/protobuf/descriptor.proto"] = descriptorpb.File_google_protobuf_descriptor_proto importMap["google/protobuf/duration.proto"] = durationpb.File_google_protobuf_duration_proto importMap["google/protobuf/empty.proto"] = emptypb.File_google_protobuf_empty_proto importMap["google/protobuf/field_mask.proto"] = fieldmaskpb.File_google_protobuf_field_mask_proto importMap["google/protobuf/timestamp.proto"] = timestamppb.File_google_protobuf_timestamp_proto importMap["google/protobuf/wrappers.proto"] = wrapperspb.File_google_protobuf_wrappers_proto + importMap["nexusannotations/v1/options.proto"] = nexusannotations.File_nexusannotations_v1_options_proto importMap["temporal/api/activity/v1/message.proto"] = activity.File_temporal_api_activity_v1_message_proto importMap["temporal/api/batch/v1/message.proto"] = batch.File_temporal_api_batch_v1_message_proto importMap["temporal/api/callback/v1/message.proto"] = callback.File_temporal_api_callback_v1_message_proto diff --git a/cmd/tools/getproto/main.go b/cmd/tools/getproto/main.go index f1e7113ca40..f635b7b9fc7 100644 --- a/cmd/tools/getproto/main.go +++ b/cmd/tools/getproto/main.go @@ -45,7 +45,8 @@ func findProtoImports() []string { if match := matchImport.FindStringSubmatch(line); len(match) > 0 { i := match[1] if strings.HasPrefix(i, "temporal/api/") || - strings.HasPrefix(i, "google/") { + strings.HasPrefix(i, "google/") || + strings.HasPrefix(i, "nexus/") { importMap[i] = struct{}{} } } @@ -84,9 +85,19 @@ func genFileList(protoImports []string) { } else if strings.HasPrefix(i, "google/") { base := strings.TrimSuffix(filepath.Base(i), ".proto") + "pb" base = strings.ReplaceAll(base, "field_mask", "fieldmask") - goImport := "google.golang.org/protobuf/types/known/" + base + var goImport string + if base == "descriptorpb" { + goImport = "google.golang.org/protobuf/types/descriptorpb" + } else { + goImport = "google.golang.org/protobuf/types/known/" + base + } goImportsMap[goImport] = base protoToPackage[i] = base + } else if strings.HasPrefix(i, "nexusannotations/") { + goImport := filepath.Dir(strings.Replace(i, "nexusannotations/", "github.com/nexus-rpc/nexus-proto-annotations/go/nexusannotations/", 1)) + importName := "nexusannotations" + goImportsMap[goImport] = importName + protoToPackage[i] = importName } } goImports := expmaps.Keys(goImportsMap) @@ -146,7 +157,7 @@ func checkImports(files map[string]protoreflect.FileDescriptor) { num := imports.Len() for i := range num { imp := imports.Get(i).Path() - if strings.HasPrefix(imp, "temporal/api/") || strings.HasPrefix(imp, "google/") { + if strings.HasPrefix(imp, "temporal/api/") || strings.HasPrefix(imp, "google/") || strings.HasPrefix(imp, "nexus/") { if _, ok := files[imp]; !ok { missing[imp] = struct{}{} } diff --git a/common/dynamicconfig/constants.go b/common/dynamicconfig/constants.go index 04557e0d1ae..26794c36f15 100644 --- a/common/dynamicconfig/constants.go +++ b/common/dynamicconfig/constants.go @@ -2972,6 +2972,12 @@ to the CHASM (V2) implementation on active scheduler workflows.`, instead of the previous HSM backed implementation.`, ) + EnableSignalWithStartFromWorkflow = NewNamespaceBoolSetting( + "history.enableSignalWithStartFromWorkflow", + false, + `Controls whether signal with start from workflow is enabled.`, + ) + EnableCHASMSignalBacklinks = NewNamespaceBoolSetting( "history.enableCHASMSignalBacklinks", false, diff --git a/common/payloads/payloads.go b/common/payloads/payloads.go index 75d899f23c1..9f7a5beeadd 100644 --- a/common/payloads/payloads.go +++ b/common/payloads/payloads.go @@ -34,6 +34,33 @@ func Encode(value ...any) (*commonpb.Payloads, error) { return defaultDataConverter.ToPayloads(value...) } +func EncodeSingle(value any) (*commonpb.Payload, error) { + ps, err := defaultDataConverter.ToPayloads(value) + if err != nil { + return nil, err + } + if len(ps.GetPayloads()) < 1 { + return nil, nil + } + return ps.GetPayloads()[0], nil +} + +func MustEncodeSingle(value any) *commonpb.Payload { + p, err := EncodeSingle(value) + if err != nil { + panic(fmt.Sprintf("unable to encode single payload: %v", err)) //nolint:forbidigo // Must-helper: callers opt into panic on encode failure + } + return p +} + +func MustEncode(value ...any) *commonpb.Payloads { + p, err := defaultDataConverter.ToPayloads(value...) + if err != nil { + panic(fmt.Sprintf("unable to encode payloads: %v", err)) //nolint:forbidigo // Must-helper: callers opt into panic on encode failure + } + return p +} + func Decode(ps *commonpb.Payloads, valuePtr ...any) error { return defaultDataConverter.FromPayloads(ps, valuePtr...) } diff --git a/components/nexusoperations/workflow/commands.go b/components/nexusoperations/workflow/commands.go index 5d0b5303d9e..ca0b66703df 100644 --- a/components/nexusoperations/workflow/commands.go +++ b/components/nexusoperations/workflow/commands.go @@ -66,8 +66,16 @@ func (ch *commandHandler) HandleScheduleCommand( // Links are not needed for validation. }, attrs.Service, attrs.Operation, attrs.Input) if err != nil { - var handlerErr *nexus.HandlerError - if errors.As(err, &handlerErr) { + _, isNotFound := errors.AsType[*serviceerror.NotFound](err) + _, isInvalidArgument := errors.AsType[*serviceerror.InvalidArgument](err) + if isNotFound || isInvalidArgument { + return chasmworkflow.FailWorkflowTaskError{ + Cause: enumspb.WORKFLOW_TASK_FAILED_CAUSE_BAD_SCHEDULE_NEXUS_OPERATION_ATTRIBUTES, + Message: err.Error(), + } + } + handlerErr, isHandlerError := errors.AsType[*nexus.HandlerError](err) + if isHandlerError { // nolint:exhaustive switch handlerErr.Type { case nexus.HandlerErrorTypeNotFound, nexus.HandlerErrorTypeBadRequest: @@ -83,12 +91,12 @@ func (ch *commandHandler) HandleScheduleCommand( } else { endpoint, err := ch.endpointRegistry.GetByName(ctx, ns.ID(), attrs.Endpoint) if err != nil { - if errors.As(err, new(*serviceerror.NotFound)) { + if _, ok := errors.AsType[*serviceerror.NotFound](err); ok { return chasmworkflow.FailWorkflowTaskError{ Cause: enumspb.WORKFLOW_TASK_FAILED_CAUSE_BAD_SCHEDULE_NEXUS_OPERATION_ATTRIBUTES, Message: fmt.Sprintf("endpoint %q not found", attrs.Endpoint), } - } else if errors.As(err, new(*serviceerror.PermissionDenied)) { + } else if _, ok := errors.AsType[*serviceerror.PermissionDenied](err); ok { return chasmworkflow.FailWorkflowTaskError{ Cause: enumspb.WORKFLOW_TASK_FAILED_CAUSE_BAD_SCHEDULE_NEXUS_OPERATION_ATTRIBUTES, Message: fmt.Sprintf("caller namespace %q unauthorized for %q", ns.Name(), attrs.Endpoint), diff --git a/go.mod b/go.mod index 2192ce7eaa9..bcfa8b7f664 100644 --- a/go.mod +++ b/go.mod @@ -37,6 +37,7 @@ require ( github.com/lib/pq v1.12.3 github.com/maruel/panicparse/v2 v2.5.0 github.com/mitchellh/mapstructure v1.5.0 + github.com/nexus-rpc/nexus-proto-annotations v0.1.0 github.com/nexus-rpc/sdk-go v0.6.0 github.com/olekukonko/tablewriter v0.0.5 github.com/olivere/elastic/v7 v7.0.32 @@ -99,7 +100,6 @@ require ( github.com/go-openapi/swag/typeutils v0.26.0 // indirect github.com/go-openapi/swag/yamlutils v0.26.0 // indirect github.com/hashicorp/go-version v1.9.0 // indirect - github.com/nexus-rpc/nexus-proto-annotations v0.1.0 // indirect go.opentelemetry.io/collector/featuregate v1.56.0 // indirect ) diff --git a/service/frontend/admin_handler.go b/service/frontend/admin_handler.go index 26a65194143..6d0c73860ed 100644 --- a/service/frontend/admin_handler.go +++ b/service/frontend/admin_handler.go @@ -34,6 +34,7 @@ import ( replicationspb "go.temporal.io/server/api/replication/v1" "go.temporal.io/server/chasm" schedulerpb "go.temporal.io/server/chasm/lib/scheduler/gen/schedulerpb/v1" + "go.temporal.io/server/chasm/lib/workflow" serverClient "go.temporal.io/server/client" "go.temporal.io/server/client/admin" "go.temporal.io/server/client/frontend" @@ -987,7 +988,7 @@ func (adh *AdminHandler) validateGetWorkflowExecutionRawHistoryV2Request( execution := request.Execution if execution.GetWorkflowId() == "" { - return errWorkflowIDNotSet + return workflow.ErrWorkflowIDNotSet } // TODO currently, this API is only going to be used by re-send history events // to remote cluster if kafka is lossy again, in the future, this API can be used @@ -1375,10 +1376,10 @@ func (adh *AdminHandler) ReapplyEvents(ctx context.Context, request *adminservic return nil, errExecutionNotSet } if request.GetWorkflowExecution().GetWorkflowId() == "" { - return nil, errWorkflowIDNotSet + return nil, workflow.ErrWorkflowIDNotSet } if request.GetEvents() == nil { - return nil, errWorkflowIDNotSet + return nil, workflow.ErrWorkflowIDNotSet } namespaceEntry, err := adh.namespaceRegistry.GetNamespaceByID(namespace.ID(request.GetNamespaceId())) if err != nil { diff --git a/service/frontend/errors.go b/service/frontend/errors.go index c6bbbb6c118..15935a4b5d1 100644 --- a/service/frontend/errors.go +++ b/service/frontend/errors.go @@ -7,23 +7,17 @@ import ( var ( errInvalidTaskToken = serviceerror.NewInvalidArgument("Invalid TaskToken.") errDeserializingToken = serviceerror.NewInvalidArgument("Error deserializing task token.") - errTaskQueueNotSet = serviceerror.NewInvalidArgument("TaskQueue is not set on request.") - errExecutionNotSet = serviceerror.NewInvalidArgument("Execution is not set on request.") errWorkflowIDNotSet = serviceerror.NewInvalidArgument("WorkflowId is not set on request.") + errExecutionNotSet = serviceerror.NewInvalidArgument("Execution is not set on request.") errActivityIDNotSet = serviceerror.NewInvalidArgument("ActivityId is not set on request.") errActivityIDOrTypeNotSet = serviceerror.NewInvalidArgument("Either Activity.Id or Activity.Type should be set on request.") errSignalNameNotSet = serviceerror.NewInvalidArgument("SignalName is not set on request.") errInvalidRunID = serviceerror.NewInvalidArgument("Invalid RunId.") - errInvalidNextPageToken = serviceerror.NewInvalidArgument("Invalid NextPageToken.") // DEPRECATED - errNextPageTokenRunIDMismatch = serviceerror.NewInvalidArgument("RunId in the request does not match the NextPageToken.") // DEPRECATED errQueryNotSet = serviceerror.NewInvalidArgument("WorkflowQuery is not set on request.") errQueryTypeNotSet = serviceerror.NewInvalidArgument("QueryType is not set on request.") errRequestNotSet = serviceerror.NewInvalidArgument("Request is nil.") errRequestIDNotSet = serviceerror.NewInvalidArgument("RequestId is not set on request.") errWorkflowTypeNotSet = serviceerror.NewInvalidArgument("WorkflowType is not set on request.") - errInvalidWorkflowExecutionTimeoutSeconds = serviceerror.NewInvalidArgument("An invalid WorkflowExecutionTimeoutSeconds is set on request.") - errInvalidWorkflowRunTimeoutSeconds = serviceerror.NewInvalidArgument("An invalid WorkflowRunTimeoutSeconds is set on request.") - errInvalidWorkflowTaskTimeoutSeconds = serviceerror.NewInvalidArgument("An invalid WorkflowTaskTimeoutSeconds is set on request.") errQueryDisallowedForNamespace = serviceerror.NewInvalidArgument("Namespace is not allowed to query, please contact temporal team to re-enable queries.") errClusterNameNotSet = serviceerror.NewInvalidArgument("Cluster name is not set.") errEmptyReplicationInfo = serviceerror.NewInvalidArgument("Replication task info is not set.") @@ -34,7 +28,6 @@ var ( errWorkflowIDTooLong = serviceerror.NewInvalidArgument("WorkflowId length exceeds limit.") errWorkflowRuleIDTooLong = serviceerror.NewInvalidArgument("Workflow Rule Id length exceeds limit.") errSignalNameTooLong = serviceerror.NewInvalidArgument("SignalName length exceeds limit.") - errTaskQueueTooLong = serviceerror.NewInvalidArgument("TaskQueue length exceeds limit.") errRequestIDTooLong = serviceerror.NewInvalidArgument("RequestId length exceeds limit.") errIdentityTooLong = serviceerror.NewInvalidArgument("Identity length exceeds limit.") errNotesTooLong = serviceerror.NewInvalidArgument("Schedule notes exceeds limit.") @@ -43,11 +36,8 @@ var ( errClusterIsNotConfiguredForReadingArchivalVisibility = serviceerror.NewInvalidArgument("Cluster is not configured for reading archived visibility records.") errNamespaceIsNotConfiguredForVisibilityArchival = serviceerror.NewInvalidArgument("Namespace is not configured for visibility archival.") errSearchAttributesNotSet = serviceerror.NewInvalidArgument("SearchAttributes are not set on request.") - errInvalidPageSize = serviceerror.NewInvalidArgument("Invalid PageSize.") // DEPRECATED - errInvalidPaginationToken = serviceerror.NewInvalidArgument("Invalid pagination token.") // DEPRECATED - errInvalidFirstNextEventCombination = serviceerror.NewInvalidArgument("Invalid FirstEventId and NextEventId combination.") // DEPRECATED - errInvalidVersionHistories = serviceerror.NewInvalidArgument("Invalid version histories.") // DEPRECATED - errInvalidEventQueryRange = serviceerror.NewInvalidArgument("Invalid event query range.") // DEPRECATED + errInvalidPageSize = serviceerror.NewInvalidArgument("Invalid PageSize.") // DEPRECATED + errInvalidEventQueryRange = serviceerror.NewInvalidArgument("Invalid event query range.") // DEPRECATED errDLQTypeIsNotSupported = serviceerror.NewInvalidArgument("The DLQ type is not supported.") errFailureMustHaveApplicationFailureInfo = serviceerror.NewInvalidArgument("Failure must have ApplicationFailureInfo.") errStatusFilterMustBeNotRunning = serviceerror.NewInvalidArgument("StatusFilter must be specified and must be not Running.") @@ -59,16 +49,11 @@ var ( errMigrationTargetNotSet = serviceerror.NewInvalidArgument("Target is not set on request.") errNamespaceNotSet = serviceerror.NewInvalidArgument("Namespace is not set on request.") errReasonNotSet = serviceerror.NewInvalidArgument("Reason is not set on request.") - errBatchOperationNotSet = serviceerror.NewInvalidArgument("Batch operation is not set on request.") - errCronAndStartDelaySet = serviceerror.NewInvalidArgument("CronSchedule and WorkflowStartDelay may not be used together.") - errInvalidWorkflowStartDelaySeconds = serviceerror.NewInvalidArgument("An invalid WorkflowStartDelaySeconds is set on request.") errRaceConditionAddingSearchAttributes = serviceerror.NewUnavailable("Generated search attributes mapping unavailable.") errUseVersioningWithoutBuildId = serviceerror.NewInvalidArgument("WorkerVersionStamp must be present if UseVersioning is true.") errUseVersioningWithoutNormalName = serviceerror.NewInvalidArgument("NormalName must be set on sticky queue when UseVersioning is true or DeploymentOptions are set.") errBuildIdTooLong = serviceerror.NewInvalidArgument("Build ID exceeds configured limit.workerBuildIdSize, use a shorter build ID.") - errDeploymentOptionsNotSet = serviceerror.NewInvalidArgument("Both deployment name and build id must be set when versioning is enabled.") - errIncompatibleIDReusePolicyTerminateIfRunning = serviceerror.NewInvalidArgument("Invalid WorkflowIDReusePolicy: WORKFLOW_ID_REUSE_POLICY_TERMINATE_IF_RUNNING cannot be used together with a WorkflowIDConflictPolicy") - errIncompatibleIDReusePolicyRejectDuplicate = serviceerror.NewInvalidArgument("Invalid WorkflowIDReusePolicy: WORKFLOW_ID_REUSE_POLICY_REJECT_DUPLICATE cannot be used together with WorkflowIdConflictPolicy WORKFLOW_ID_CONFLICT_POLICY_TERMINATE_EXISTING") + errDeploymentOptionsNotSet = serviceerror.NewInvalidArgument("Deployment name and build id must be set together.") errUseEnhancedDescribeOnStickyQueue = serviceerror.NewInvalidArgument("Enhanced DescribeTaskQueue is not valid for a sticky queue, use api_mode=UNSPECIFIED or a normal queue.") errUseEnhancedDescribeOnNonRootQueue = serviceerror.NewInvalidArgument("Enhanced DescribeTaskQueue is not valid for non-root queue partitions, use api_mode=UNSPECIFIED or a normal queue root name.") errTaskQueuePartitionInvalid = serviceerror.NewInvalidArgument("Task Queue Partition invalid, use a different Task Queue or Task Queue Type") @@ -82,15 +67,14 @@ var ( errMultiOpNotStartAndUpdate = serviceerror.NewInvalidArgument("Operations have to be exactly [Start, Update].") errMultiOpAborted = serviceerror.NewMultiOperationAborted("Operation was aborted.") - errUpdateMetaNotSet = serviceerror.NewInvalidArgument("Update meta is not set on request.") - errUpdateInputNotSet = serviceerror.NewInvalidArgument("Update input is not set on request.") - errUpdateNameNotSet = serviceerror.NewInvalidArgument("Update name is not set on request.") - errUpdateIDTooLong = serviceerror.NewInvalidArgument("UpdateId length exceeds limit.") - errUpdateRefNotSet = serviceerror.NewInvalidArgument("UpdateRef is not set on request.") - errUpdateWaitPolicyNotSet = serviceerror.NewInvalidArgument("WaitPolicy is not set on request.") - errSourceClusterNotSet = serviceerror.NewInvalidArgument("SourceCluster is not set on request.") - errTargetClusterNotSet = serviceerror.NewInvalidArgument("TargetCluster is not set on request.") - errInvalidDLQJobToken = serviceerror.NewInvalidArgument("Invalid DLQ job token.") + errUpdateMetaNotSet = serviceerror.NewInvalidArgument("Update meta is not set on request.") + errUpdateInputNotSet = serviceerror.NewInvalidArgument("Update input is not set on request.") + errUpdateNameNotSet = serviceerror.NewInvalidArgument("Update name is not set on request.") + errUpdateIDTooLong = serviceerror.NewInvalidArgument("UpdateId length exceeds limit.") + errUpdateRefNotSet = serviceerror.NewInvalidArgument("UpdateRef is not set on request.") + errSourceClusterNotSet = serviceerror.NewInvalidArgument("SourceCluster is not set on request.") + errTargetClusterNotSet = serviceerror.NewInvalidArgument("TargetCluster is not set on request.") + errInvalidDLQJobToken = serviceerror.NewInvalidArgument("Invalid DLQ job token.") errPageSizeTooBigMessage = "PageSize is larger than allowed %d." @@ -110,18 +94,13 @@ var ( errUnableToGetNamespaceInfoMessage = "Unable to get namespace %v info with error: %v" errUnableToCreateFrontendClientMessage = "Unable to create frontend client with error: %v." errTooManySearchAttributesMessage = "Unable to create search attributes: cannot have more than %d search attribute of type %s." - errUnsupportedIDConflictPolicy = "Invalid WorkflowIDConflictPolicy: %v is not supported for this operation." errListNotAllowed = serviceerror.NewPermissionDenied("List is disabled on this namespace.", "") errSchedulesNotAllowed = serviceerror.NewPermissionDenied("Schedules are disabled on this namespace.", "") - errDeploymentsNotAllowed = serviceerror.NewPermissionDenied("Deployments (deprecated) are disabled on this namespace.", "") errDeploymentVersionsNotAllowed = serviceerror.NewPermissionDenied("Worker Deployment Versions are disabled on this namespace.", "") - errBatchAPINotAllowed = serviceerror.NewPermissionDenied("Batch operation feature are disabled on this namespace.", "") - errBatchOpsWorkflowFilterNotSet = serviceerror.NewInvalidArgument("Workflow executions and visibility filter are not set on request.") - errBatchOpsWorkflowFiltersNotAllowed = serviceerror.NewInvalidArgument("Workflow executions and visibility filter are both set on request. Only one of them is allowed.") - errBatchOpsMaxWorkflowExecutionCount = serviceerror.NewInvalidArgument("Workflow executions count exceeded.") + errBatchAPINotAllowed = serviceerror.NewPermissionDenied("Batch operation feature are disabled on this namespace.", "") errUpdateWorkflowExecutionAPINotAllowed = serviceerror.NewPermissionDenied("UpdateWorkflowExecution operation is disabled on this namespace.", "") errUpdateWorkflowExecutionAsyncAcceptedNotAllowed = serviceerror.NewPermissionDenied("UpdateWorkflowExecution issued asynchronously and waiting on update accepted is disabled on this namespace.", "") diff --git a/service/frontend/fx.go b/service/frontend/fx.go index 53320dbaddd..1ab40a77985 100644 --- a/service/frontend/fx.go +++ b/service/frontend/fx.go @@ -11,6 +11,7 @@ import ( "go.temporal.io/server/chasm/lib/callback" chasmnexus "go.temporal.io/server/chasm/lib/nexusoperation" nexusoperationpb "go.temporal.io/server/chasm/lib/nexusoperation/gen/nexusoperationpb/v1" + chasmscheduler "go.temporal.io/server/chasm/lib/scheduler" "go.temporal.io/server/chasm/lib/scheduler/gen/schedulerpb/v1" chasmworkflow "go.temporal.io/server/chasm/lib/workflow" "go.temporal.io/server/client" @@ -127,7 +128,9 @@ var Module = fx.Options( fx.Provide(schedulerpb.NewSchedulerServiceLayeredClient), fx.Provide(chasmnexus.NewFrontendHandler), chasmnexus.Module, + chasmscheduler.Module, chasmworkflow.Module, + callback.Module, activity.FrontendModule, fx.Provide(visibility.ChasmVisibilityManagerProvider), fx.Provide(chasm.ChasmVisibilityInterceptorProvider), @@ -853,6 +856,7 @@ func callbackValidatorProvider(dc *dynamicconfig.Collection) callback.Validator } func HandlerProvider( + dc *dynamicconfig.Collection, cfg *config.Config, serviceName primitives.ServiceName, dcRedirectionPolicy config.DCRedirectionPolicy, @@ -878,6 +882,7 @@ func HandlerProvider( namespaceRegistry namespace.Registry, saMapperProvider searchattribute.MapperProvider, saProvider searchattribute.Provider, + saValidator *searchattribute.Validator, clusterMetadata cluster.Metadata, archivalMetadata archiver.ArchivalMetadata, healthServer *health.Server, @@ -915,6 +920,7 @@ func HandlerProvider( namespaceRegistry, saMapperProvider, saProvider, + saValidator, clusterMetadata, archivalMetadata, healthServer, @@ -927,6 +933,11 @@ func HandlerProvider( nexusOperationHandler, registry, workerDeploymentReadRateLimiter, + chasmworkflow.NewValidator( + chasmworkflow.NewConfig(dc), + saMapperProvider, + saValidator, + ), ) return wfHandler } diff --git a/service/frontend/workflow_handler.go b/service/frontend/workflow_handler.go index e85aad1a354..638ceba6d2e 100644 --- a/service/frontend/workflow_handler.go +++ b/service/frontend/workflow_handler.go @@ -39,7 +39,8 @@ import ( "go.temporal.io/server/chasm/lib/callback" chasmnexus "go.temporal.io/server/chasm/lib/nexusoperation" chasmscheduler "go.temporal.io/server/chasm/lib/scheduler" - "go.temporal.io/server/chasm/lib/scheduler/gen/schedulerpb/v1" + schedulerpb "go.temporal.io/server/chasm/lib/scheduler/gen/schedulerpb/v1" + "go.temporal.io/server/chasm/lib/workflow" "go.temporal.io/server/client/frontend" matchingclient "go.temporal.io/server/client/matching" "go.temporal.io/server/common" @@ -64,7 +65,6 @@ import ( "go.temporal.io/server/common/payloads" "go.temporal.io/server/common/persistence" "go.temporal.io/server/common/persistence/serialization" - "go.temporal.io/server/common/persistence/visibility" "go.temporal.io/server/common/persistence/visibility/manager" "go.temporal.io/server/common/primitives" "go.temporal.io/server/common/primitives/timestamp" @@ -89,7 +89,6 @@ import ( healthpb "google.golang.org/grpc/health/grpc_health_v1" "google.golang.org/protobuf/encoding/protowire" "google.golang.org/protobuf/proto" - "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/fieldmaskpb" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -123,6 +122,8 @@ type ( ActivityHandler NexusOperationHandler + validator *workflow.RequestValidator + status int32 callbackValidator callback.Validator @@ -322,6 +323,7 @@ func NewWorkflowHandler( namespaceRegistry namespace.Registry, saMapperProvider searchattribute.MapperProvider, saProvider searchattribute.Provider, + saValidator *searchattribute.Validator, clusterMetadata cluster.Metadata, archivalMetadata archiver.ArchivalMetadata, healthServer *health.Server, @@ -334,6 +336,7 @@ func NewWorkflowHandler( nexusOperationHandler chasmnexus.FrontendHandler, registry *chasm.Registry, workerDeploymentReadRateLimiter quotas.RequestRateLimiter, + validator *workflow.RequestValidator, ) *WorkflowHandler { handler := &WorkflowHandler{ ActivityHandler: activityHandler, @@ -370,19 +373,7 @@ func NewWorkflowHandler( namespaceRegistry: namespaceRegistry, saProvider: saProvider, saMapperProvider: saMapperProvider, - saValidator: searchattribute.NewValidator( - saProvider, - saMapperProvider, - config.SearchAttributesNumberOfKeysLimit, - config.SearchAttributesSizeOfValueLimit, - config.SearchAttributesTotalSizeLimit, - visibilityMgr, - visibility.AllowListForValidation( - visibilityMgr.GetStoreNames(), - config.VisibilityAllowList, - ), - config.SuppressErrorSetSystemSearchAttribute, - ), + saValidator: saValidator, archivalMetadata: archivalMetadata, healthServer: healthServer, overrides: NewOverrides(), @@ -393,6 +384,7 @@ func NewWorkflowHandler( httpEnabled: httpEnabled, registry: registry, workerDeploymentReadRateLimiter: workerDeploymentReadRateLimiter, + validator: validator, } return handler @@ -620,16 +612,16 @@ func (wh *WorkflowHandler) prepareStartWorkflowRequest( enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL, ) - if err := wh.validateWorkflowID(request.GetWorkflowId()); err != nil { + if err := wh.validator.ValidateWorkflowID(request.GetWorkflowId()); err != nil { return nil, err } namespaceName := namespace.Name(request.GetNamespace()) - if err := wh.validateRetryPolicy(namespaceName, request.RetryPolicy); err != nil { + if err := wh.validator.ValidateRetryPolicy(request.GetNamespace(), request.RetryPolicy); err != nil { return nil, err } - if err := wh.validateWorkflowStartDelay(request.GetCronSchedule(), request.WorkflowStartDelay); err != nil { + if err := wh.validator.ValidateWorkflowStartDelay(request.GetCronSchedule(), request.WorkflowStartDelay); err != nil { return nil, err } @@ -649,7 +641,7 @@ func (wh *WorkflowHandler) prepareStartWorkflowRequest( return nil, err } - if err := wh.validateStartWorkflowTimeouts(request); err != nil { + if err := wh.validator.ValidateWorkflowTimeouts(request); err != nil { return nil, err } @@ -657,8 +649,7 @@ func (wh *WorkflowHandler) prepareStartWorkflowRequest( return nil, err } - if err := wh.validateWorkflowIdReusePolicy( - namespaceName, + if err := wh.validator.ValidateWorkflowIDReusePolicy( request.WorkflowIdReusePolicy, request.WorkflowIdConflictPolicy); err != nil { return nil, err @@ -668,7 +659,7 @@ func (wh *WorkflowHandler) prepareStartWorkflowRequest( return nil, err } - sa, err := wh.unaliasedSearchAttributesFrom(request.GetSearchAttributes(), namespaceName) + sa, err := wh.validator.UnaliasedSearchAttributesFrom(request.GetSearchAttributes(), request.GetNamespace()) if err != nil { return nil, err } @@ -696,7 +687,7 @@ func (wh *WorkflowHandler) prepareStartWorkflowRequest( for _, cb := range request.GetCompletionCallbacks() { allLinks = append(allLinks, cb.GetLinks()...) } - if err := wh.validateLinks(namespaceName, allLinks); err != nil { + if err := wh.validator.ValidateLinks(namespaceName.String(), allLinks); err != nil { return nil, err } @@ -734,7 +725,7 @@ func (wh *WorkflowHandler) unaliasedSearchAttributesFrom( return nil, err } - if err = wh.validateSearchAttributes(sa, namespaceName); err != nil { + if err = wh.validator.ValidateSearchAttributes(sa, namespaceName.String()); err != nil { return nil, err } return sa, nil @@ -2231,7 +2222,7 @@ func (wh *WorkflowHandler) RequestCancelWorkflowExecution(ctx context.Context, r return nil, err } - if err := wh.validateLinks(namespace.Name(request.GetNamespace()), request.GetLinks()); err != nil { + if err := wh.validator.ValidateLinks(request.GetNamespace(), request.GetLinks()); err != nil { return nil, err } @@ -2276,7 +2267,7 @@ func (wh *WorkflowHandler) SignalWorkflowExecution(ctx context.Context, request return nil, errRequestIDTooLong } - if err := wh.validateLinks(namespace.Name(request.GetNamespace()), request.GetLinks()); err != nil { + if err := wh.validator.ValidateLinks(request.GetNamespace(), request.GetLinks()); err != nil { return nil, err } @@ -2326,90 +2317,25 @@ func (wh *WorkflowHandler) SignalWithStartWorkflowExecution(ctx context.Context, return nil, errRequestNotSet } - // Apply defaults before validation; must be first for idempotency on internal retries. - enums.SetDefaultWorkflowIDPolicies( - &request.WorkflowIdReusePolicy, - &request.WorkflowIdConflictPolicy, - enumspb.WORKFLOW_ID_CONFLICT_POLICY_USE_EXISTING, - ) - - if err := wh.validateWorkflowID(request.GetWorkflowId()); err != nil { - return nil, err - } - - if request.GetSignalName() == "" { - return nil, errSignalNameNotSet - } - - if len(request.GetSignalName()) > wh.config.MaxIDLengthLimit() { - return nil, errSignalNameTooLong - } - - if request.WorkflowType == nil || request.WorkflowType.GetName() == "" { - return nil, errWorkflowTypeNotSet - } - - if len(request.WorkflowType.GetName()) > wh.config.MaxIDLengthLimit() { - return nil, errWorkflowTypeTooLong - } - - namespaceName := namespace.Name(request.GetNamespace()) - if err := tqid.NormalizeAndValidateUserDefined(request.TaskQueue, "", "", wh.config.MaxIDLengthLimit()); err != nil { - return nil, err - } - - if err := validateRequestId(&request.RequestId, wh.config.MaxIDLengthLimit()); err != nil { - return nil, err - } - - if err := wh.validateSignalWithStartWorkflowTimeouts(request); err != nil { - return nil, err - } - - if err := wh.validateRetryPolicy(namespaceName, request.RetryPolicy); err != nil { - return nil, err - } - - if err := wh.validateWorkflowStartDelay(request.GetCronSchedule(), request.WorkflowStartDelay); err != nil { - return nil, err - } - - if err := wh.validateWorkflowIdReusePolicy( - namespaceName, - request.WorkflowIdReusePolicy, - request.WorkflowIdConflictPolicy, - ); err != nil { - return nil, err - } - - if request.WorkflowIdConflictPolicy == enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL { - // Signal-with-*Required*-Start is not supported - name := enumspb.WorkflowIdConflictPolicy_name[int32(request.WorkflowIdConflictPolicy.Number())] - return nil, serviceerror.NewInvalidArgumentf(errUnsupportedIDConflictPolicy, name) - } - if err := backoff.ValidateSchedule(request.GetCronSchedule()); err != nil { return nil, err } - sa, err := wh.unaliasedSearchAttributesFrom(request.GetSearchAttributes(), namespaceName) - if err != nil { - return nil, err - } - if sa != request.GetSearchAttributes() { - // cloning here so in case of retry the field is set to the current search attributes + // The validator will modify the request proto if there are any search attributes to validate. + // To avoid modifying the caller's object, which may be reused on retry, we clone the request here if needed. + if len(request.GetSearchAttributes().GetIndexedFields()) > 0 { request = common.CloneProto(request) - request.SearchAttributes = sa } - if err := priorities.Validate(request.Priority); err != nil { + if err := wh.validator.ValidateSignalWithStartRequest(request); err != nil { return nil, err } - if err := wh.validateLinks(namespaceName, request.GetLinks()); err != nil { + if err := wh.validator.ValidateLinks(request.GetNamespace(), request.GetLinks()); err != nil { return nil, err } + namespaceName := namespace.Name(request.GetNamespace()) if err := wh.validateTimeSkippingConfig(request.GetTimeSkippingConfig(), namespaceName); err != nil { return nil, err } @@ -2499,7 +2425,7 @@ func (wh *WorkflowHandler) TerminateWorkflowExecution(ctx context.Context, reque return nil, err } - if err := wh.validateLinks(namespace.Name(request.GetNamespace()), request.GetLinks()); err != nil { + if err := wh.validator.ValidateLinks(request.GetNamespace(), request.GetLinks()); err != nil { return nil, err } @@ -3535,7 +3461,7 @@ func (wh *WorkflowHandler) createScheduleCHASM( // Search attribute validation happens as part of unaliasing on the V1 codepath, // must be done explicitly here (even though we aren't using the unaliased // attributes). - if _, err = wh.unaliasedSearchAttributesFrom(request.GetSearchAttributes(), namespaceName); err != nil { + if _, err = wh.validator.UnaliasedSearchAttributesFrom(request.GetSearchAttributes(), request.Namespace); err != nil { return nil, err } @@ -3629,7 +3555,7 @@ func (wh *WorkflowHandler) createScheduleWorkflow( // Add namespace division before unaliasing search attributes. searchattribute.AddSearchAttribute(&request.SearchAttributes, sadefs.TemporalNamespaceDivision, payload.EncodeString(scheduler.NamespaceDivision)) - sa, err := wh.unaliasedSearchAttributesFrom(request.GetSearchAttributes(), namespaceName) + sa, err := wh.validator.UnaliasedSearchAttributesFrom(request.GetSearchAttributes(), request.Namespace) if err != nil { return nil, err } @@ -3808,7 +3734,7 @@ func (wh *WorkflowHandler) CreateSchedule( // We apply this validation to both V1 and V2 schedules, even though CHASM // schedules don't need the workflow ID prefix, so that we can roll back to V1 and // not overrun the limit. - if err := wh.validateWorkflowID(workflowID); err != nil { + if err := wh.validator.ValidateWorkflowID(workflowID); err != nil { return nil, err } @@ -3900,7 +3826,7 @@ func (wh *WorkflowHandler) validateStartWorkflowArgsForSchedule( return nil } - if err := wh.validateWorkflowID(startWorkflow.WorkflowId + scheduler.AppendedTimestampForValidation); err != nil { + if err := wh.validator.ValidateWorkflowID(startWorkflow.WorkflowId + scheduler.AppendedTimestampForValidation); err != nil { return err } @@ -3917,7 +3843,7 @@ func (wh *WorkflowHandler) validateStartWorkflowArgsForSchedule( return err } - if err := wh.validateStartWorkflowTimeouts(&workflowservice.StartWorkflowExecutionRequest{ + if err := wh.validator.ValidateWorkflowTimeouts(&workflowservice.StartWorkflowExecutionRequest{ WorkflowExecutionTimeout: startWorkflow.WorkflowExecutionTimeout, WorkflowRunTimeout: startWorkflow.WorkflowRunTimeout, WorkflowTaskTimeout: startWorkflow.WorkflowTaskTimeout, @@ -3941,7 +3867,7 @@ func (wh *WorkflowHandler) validateStartWorkflowArgsForSchedule( if err != nil { return err } - return wh.validateSearchAttributes(unaliasedStartWorkflowSas, namespaceName) + return wh.validator.ValidateSearchAttributes(unaliasedStartWorkflowSas, namespaceName.String()) } // [cleanup-wv-pre-release] @@ -4630,7 +4556,7 @@ func (wh *WorkflowHandler) UpdateSchedule( // Both V1 and V2 use unaliasedSearchAttributesFrom for validation, without using // the result. V1 uses UpsertSearchAttributes which expects aliased names, and V2 // lets CHASM handle all visibility aliasing. - if _, err = wh.unaliasedSearchAttributesFrom(request.GetSearchAttributes(), namespaceName); err != nil { + if _, err = wh.validator.UnaliasedSearchAttributesFrom(request.GetSearchAttributes(), request.GetNamespace()); err != nil { return nil, err } @@ -6288,13 +6214,6 @@ func (wh *WorkflowHandler) RespondNexusTaskFailed(ctx context.Context, request * return &workflowservice.RespondNexusTaskFailedResponse{}, nil } -func (wh *WorkflowHandler) validateSearchAttributes(searchAttributes *commonpb.SearchAttributes, namespaceName namespace.Name) error { - if err := wh.saValidator.Validate(searchAttributes, namespaceName.String()); err != nil { - return err - } - return wh.saValidator.ValidateSize(searchAttributes, namespaceName.String()) -} - func (wh *WorkflowHandler) validateVersionRuleBuildId(request *workflowservice.UpdateWorkerVersioningRulesRequest) error { validateBuildId := func(bid string) error { if len(bid) > 255 { @@ -6324,22 +6243,6 @@ func (wh *WorkflowHandler) validateVersionRuleBuildId(request *workflowservice.U return nil } -func (wh *WorkflowHandler) validateWorkflowIdReusePolicy( - namespaceName namespace.Name, - reusePolicy enumspb.WorkflowIdReusePolicy, - conflictPolicy enumspb.WorkflowIdConflictPolicy, -) error { - if conflictPolicy != enumspb.WORKFLOW_ID_CONFLICT_POLICY_UNSPECIFIED && - reusePolicy == enumspb.WORKFLOW_ID_REUSE_POLICY_TERMINATE_IF_RUNNING { - return errIncompatibleIDReusePolicyTerminateIfRunning - } - if conflictPolicy == enumspb.WORKFLOW_ID_CONFLICT_POLICY_TERMINATE_EXISTING && - reusePolicy == enumspb.WORKFLOW_ID_REUSE_POLICY_REJECT_DUPLICATE { - return errIncompatibleIDReusePolicyRejectDuplicate - } - return nil -} - func (wh *WorkflowHandler) validateOnConflictOptions(opts *workflowpb.OnConflictOptions) error { if opts == nil { return nil @@ -6679,17 +6582,6 @@ func (wh *WorkflowHandler) checkBadBinary(namespaceEntry *namespace.Namespace, b return nil } -func (wh *WorkflowHandler) validateRetryPolicy(namespaceName namespace.Name, retryPolicy *commonpb.RetryPolicy) error { - if retryPolicy == nil { - // By default, if the user does not explicitly set a retry policy for a Workflow, do not perform any retries. - return nil - } - - defaultWorkflowRetrySettings := wh.getDefaultWorkflowRetrySettings(namespaceName.String()) - retrypolicy.EnsureDefaults(retryPolicy, defaultWorkflowRetrySettings) - return retrypolicy.Validate(retryPolicy) -} - func validateRequestId(requestID *string, lenLimit int) error { if requestID == nil { // should never happen, but just in case. @@ -6708,57 +6600,6 @@ func validateRequestId(requestID *string, lenLimit int) error { return nil } -func (wh *WorkflowHandler) validateStartWorkflowTimeouts( - request *workflowservice.StartWorkflowExecutionRequest, -) error { - if err := timestamp.ValidateAndCapProtoDuration(request.GetWorkflowExecutionTimeout()); err != nil { - return fmt.Errorf("%w cause: %v", errInvalidWorkflowExecutionTimeoutSeconds, err) - } - - if err := timestamp.ValidateAndCapProtoDuration(request.GetWorkflowRunTimeout()); err != nil { - return fmt.Errorf("%w cause: %v", errInvalidWorkflowRunTimeoutSeconds, err) - } - - if err := timestamp.ValidateAndCapProtoDuration(request.GetWorkflowTaskTimeout()); err != nil { - return fmt.Errorf("%w cause: %v", errInvalidWorkflowTaskTimeoutSeconds, err) - } - - return nil -} - -func (wh *WorkflowHandler) validateSignalWithStartWorkflowTimeouts( - request *workflowservice.SignalWithStartWorkflowExecutionRequest, -) error { - if err := timestamp.ValidateAndCapProtoDuration(request.GetWorkflowExecutionTimeout()); err != nil { - return fmt.Errorf("%w cause: %v", errInvalidWorkflowExecutionTimeoutSeconds, err) - } - - if err := timestamp.ValidateAndCapProtoDuration(request.GetWorkflowRunTimeout()); err != nil { - return fmt.Errorf("%w cause: %v", errInvalidWorkflowRunTimeoutSeconds, err) - } - - if err := timestamp.ValidateAndCapProtoDuration(request.GetWorkflowTaskTimeout()); err != nil { - return fmt.Errorf("%w cause: %v", errInvalidWorkflowTaskTimeoutSeconds, err) - } - - return nil -} - -func (wh *WorkflowHandler) validateWorkflowStartDelay( - cronSchedule string, - startDelay *durationpb.Duration, -) error { - if len(cronSchedule) > 0 && startDelay != nil { - return errCronAndStartDelaySet - } - - if err := timestamp.ValidateAndCapProtoDuration(startDelay); err != nil { - return fmt.Errorf("%w cause: %v", errInvalidWorkflowStartDelaySeconds, err) - } - - return nil -} - func (wh *WorkflowHandler) metricsScope(ctx context.Context) metrics.Handler { return interceptor.GetMetricsHandlerFromContext(ctx, wh.logger) } diff --git a/service/frontend/workflow_handler_test.go b/service/frontend/workflow_handler_test.go index 4fd23f7de92..42261872ee3 100644 --- a/service/frontend/workflow_handler_test.go +++ b/service/frontend/workflow_handler_test.go @@ -39,6 +39,7 @@ import ( taskqueuespb "go.temporal.io/server/api/taskqueue/v1" "go.temporal.io/server/chasm/lib/callback" "go.temporal.io/server/chasm/lib/nexusoperation" + "go.temporal.io/server/chasm/lib/workflow" "go.temporal.io/server/common" "go.temporal.io/server/common/archiver" "go.temporal.io/server/common/archiver/provider" @@ -51,6 +52,7 @@ import ( "go.temporal.io/server/common/payload" "go.temporal.io/server/common/payloads" "go.temporal.io/server/common/persistence" + "go.temporal.io/server/common/persistence/visibility" "go.temporal.io/server/common/persistence/visibility/manager" "go.temporal.io/server/common/persistence/visibility/store/elasticsearch" "go.temporal.io/server/common/primitives" @@ -181,6 +183,19 @@ func (s *WorkflowHandlerSuite) getWorkflowHandler(config *Config) *WorkflowHandl } }, ) + saValidator := searchattribute.NewValidator( + s.mockResource.GetSearchAttributesProvider(), + s.mockResource.GetSearchAttributesMapperProvider(), + config.SearchAttributesNumberOfKeysLimit, + config.SearchAttributesSizeOfValueLimit, + config.SearchAttributesTotalSizeLimit, + s.mockResource.GetVisibilityManager(), + visibility.AllowListForValidation( + s.mockResource.GetVisibilityManager().GetStoreNames(), + config.VisibilityAllowList, + ), + config.SuppressErrorSetSystemSearchAttribute, + ) return NewWorkflowHandler( cbValidator, config, @@ -200,6 +215,7 @@ func (s *WorkflowHandlerSuite) getWorkflowHandler(config *Config) *WorkflowHandl s.mockResource.GetNamespaceRegistry(), s.mockResource.GetSearchAttributesMapperProvider(), s.mockResource.GetSearchAttributesProvider(), + saValidator, s.mockResource.GetClusterMetadata(), s.mockResource.GetArchivalMetadata(), health.NewServer(), @@ -220,6 +236,11 @@ func (s *WorkflowHandlerSuite) getWorkflowHandler(config *Config) *WorkflowHandl ), nil, // Not testing CHASM registry here quotas.NoopRequestRateLimiter, + workflow.NewValidator( + workflow.NewConfig(dc.NewNoopCollection()), + s.mockSearchAttributesMapperProvider, + saValidator, + ), ) } @@ -383,7 +404,7 @@ func (s *WorkflowHandlerSuite) TestStartWorkflowExecution_Failed_WorkflowIdNotSe } _, err := wh.StartWorkflowExecution(context.Background(), startWorkflowExecutionRequest) s.Error(err) - s.Equal(errWorkflowIDNotSet, err) + s.Equal(workflow.ErrWorkflowIDNotSet, err) } func (s *WorkflowHandlerSuite) TestStartWorkflowExecution_Failed_WorkflowTypeNotSet() { @@ -470,7 +491,7 @@ func (s *WorkflowHandlerSuite) TestStartWorkflowExecution_Failed_InvalidExecutio _, err := wh.StartWorkflowExecution(context.Background(), startWorkflowExecutionRequest) var invalidArg *serviceerror.InvalidArgument s.ErrorAs(err, &invalidArg) - s.ErrorContains(err, errInvalidWorkflowExecutionTimeoutSeconds.Error()) + s.ErrorContains(err, "An invalid WorkflowExecutionTimeoutSeconds is set on request") } func (s *WorkflowHandlerSuite) TestStartWorkflowExecution_Failed_InvalidRunTimeout() { @@ -500,7 +521,7 @@ func (s *WorkflowHandlerSuite) TestStartWorkflowExecution_Failed_InvalidRunTimeo _, err := wh.StartWorkflowExecution(context.Background(), startWorkflowExecutionRequest) var invalidArg *serviceerror.InvalidArgument s.ErrorAs(err, &invalidArg) - s.ErrorContains(err, errInvalidWorkflowRunTimeoutSeconds.Error()) + s.ErrorContains(err, "An invalid WorkflowRunTimeoutSeconds is set on request") } func (s *WorkflowHandlerSuite) TestStartWorkflowExecution_EnsureNonNilRetryPolicyInitialized() { @@ -582,7 +603,7 @@ func (s *WorkflowHandlerSuite) TestStartWorkflowExecution_Failed_InvalidTaskTime _, err := wh.StartWorkflowExecution(context.Background(), startWorkflowExecutionRequest) var invalidArg *serviceerror.InvalidArgument s.ErrorAs(err, &invalidArg) - s.ErrorContains(err, errInvalidWorkflowTaskTimeoutSeconds.Error()) + s.ErrorContains(err, "An invalid WorkflowTaskTimeoutSeconds is set on request") } func (s *WorkflowHandlerSuite) TestStartWorkflowExecution_Failed_CronAndStartDelaySet() { @@ -613,7 +634,7 @@ func (s *WorkflowHandlerSuite) TestStartWorkflowExecution_Failed_CronAndStartDel WorkflowStartDelay: durationpb.New(10 * time.Second), } _, err := wh.StartWorkflowExecution(context.Background(), startWorkflowExecutionRequest) - s.ErrorIs(err, errCronAndStartDelaySet) + s.ErrorIs(err, workflow.ErrCronAndStartDelaySet) } func (s *WorkflowHandlerSuite) TestStartWorkflowExecution_Failed_InvalidStartDelay() { @@ -646,7 +667,7 @@ func (s *WorkflowHandlerSuite) TestStartWorkflowExecution_Failed_InvalidStartDel _, err := wh.StartWorkflowExecution(context.Background(), startWorkflowExecutionRequest) var invalidArg *serviceerror.InvalidArgument s.ErrorAs(err, &invalidArg) - s.ErrorContains(err, errInvalidWorkflowStartDelaySeconds.Error()) + s.ErrorContains(err, workflow.ErrInvalidWorkflowStartDelaySeconds.Error()) } func (s *WorkflowHandlerSuite) TestStartWorkflowExecution_InvalidWorkflowIdReusePolicy_TerminateIfRunning() { @@ -4121,7 +4142,7 @@ func (s *WorkflowHandlerSuite) TestExecuteMultiOperation() { }) s.Nil(resp) - assertMultiOpsErr([]error{errWorkflowIDNotSet, errMultiOpAborted}, err) + assertMultiOpsErr([]error{workflow.ErrWorkflowIDNotSet, errMultiOpAborted}, err) }) // unique to MultiOperation: diff --git a/service/history/api/signalwithstartworkflow/signal_with_start_workflow.go b/service/history/api/signalwithstartworkflow/signal_with_start_workflow.go index c6a27ea92a3..ec6d66004c0 100644 --- a/service/history/api/signalwithstartworkflow/signal_with_start_workflow.go +++ b/service/history/api/signalwithstartworkflow/signal_with_start_workflow.go @@ -27,10 +27,11 @@ func SignalWithStartWorkflow( startRequest *historyservice.StartWorkflowExecutionRequest, signalWithStartRequest *workflowservice.SignalWithStartWorkflowExecutionRequest, ) (string, bool, error) { - // workflow is running and restart was not requested + // workflow is running and restart was not requested, and conflict policy is to use existing if currentWorkflowLease != nil && currentWorkflowLease.GetMutableState().IsWorkflowExecutionRunning() && - signalWithStartRequest.WorkflowIdConflictPolicy != enumspb.WORKFLOW_ID_CONFLICT_POLICY_TERMINATE_EXISTING { + signalWithStartRequest.WorkflowIdConflictPolicy != enumspb.WORKFLOW_ID_CONFLICT_POLICY_TERMINATE_EXISTING && + signalWithStartRequest.WorkflowIdConflictPolicy != enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL { // current workflow exists & running if err := signalWorkflow( diff --git a/service/history/fx.go b/service/history/fx.go index 313f29fd45d..58d454ba821 100644 --- a/service/history/fx.go +++ b/service/history/fx.go @@ -8,7 +8,9 @@ import ( "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/chasm" "go.temporal.io/server/chasm/lib/activity" + "go.temporal.io/server/chasm/lib/callback" chasmnexus "go.temporal.io/server/chasm/lib/nexusoperation" + "go.temporal.io/server/chasm/lib/scheduler" chasmworkflow "go.temporal.io/server/chasm/lib/workflow" "go.temporal.io/server/common" commoncache "go.temporal.io/server/common/cache" @@ -57,8 +59,10 @@ import ( var Module = fx.Options( resource.Module, + fx.Provide(resource.SearchAttributeValidatorProvider), fx.Provide(hsm.NewRegistry), workflow.Module, + shard.Module, events.Module, cache.Module, @@ -89,6 +93,7 @@ var Module = fx.Options( fx.Provide(EventNotifierProvider), fx.Provide(HistoryEngineFactoryProvider), fx.Provide(HandlerProvider), + fx.Provide(HistoryServiceServerProvider), fx.Provide(ServerProvider), fx.Provide(NewService), fx.Provide(ReplicationProgressCacheProvider), @@ -101,14 +106,21 @@ var Module = fx.Options( hsmnexusoperations.Module, fx.Invoke(hsmnexusworkflow.RegisterCommandHandlers), activity.HistoryModule, + scheduler.Module, + callback.Module, chasmnexus.Module, chasmworkflow.Module, + chasmworkflow.HistoryHandlerModule, ) func ServerProvider(grpcServerOptions []grpc.ServerOption) *grpc.Server { return grpc.NewServer(grpcServerOptions...) } +func HistoryServiceServerProvider(handler *Handler) historyservice.HistoryServiceServer { + return handler +} + func ServiceResolverProvider( membershipMonitor membership.Monitor, ) (membership.ServiceResolver, error) { diff --git a/service/history/handler.go b/service/history/handler.go index 41c36f1e2a3..4bd2c154d55 100644 --- a/service/history/handler.go +++ b/service/history/handler.go @@ -48,6 +48,7 @@ import ( "go.temporal.io/server/common/persistence/visibility/manager" "go.temporal.io/server/common/primitives/timestamp" "go.temporal.io/server/common/rpc/interceptor" + sdkconverter "go.temporal.io/server/common/sdk" "go.temporal.io/server/common/searchattribute" serviceerrors "go.temporal.io/server/common/serviceerror" "go.temporal.io/server/common/tasktoken" @@ -2507,7 +2508,7 @@ func (h *Handler) StartNexusOperation( response := &nexuspb.StartOperationResponse{} switch r := result.(type) { case interface{ ValueAsAny() any }: - ps, err := payloads.Encode(r.ValueAsAny()) + ps, err := sdkconverter.PreferProtoDataConverter.ToPayloads(r.ValueAsAny()) if err != nil { h.logger.Error("failed to encode payload", tag.Error(err), tag.RequestID(requestID)) return nil, serviceerror.NewInternal("internal error (request ID: " + requestID + ")") diff --git a/service/worker/fx.go b/service/worker/fx.go index d1e735e4b9c..167803abc9b 100644 --- a/service/worker/fx.go +++ b/service/worker/fx.go @@ -7,6 +7,8 @@ import ( wcicomponent "go.temporal.io/auto-scaled-workers/wci/workercomponent" "go.temporal.io/server/api/adminservice/v1" "go.temporal.io/server/chasm" + "go.temporal.io/server/chasm/lib/callback" + chasmscheduler "go.temporal.io/server/chasm/lib/scheduler" "go.temporal.io/server/chasm/lib/scheduler/gen/schedulerpb/v1" "go.temporal.io/server/client" "go.temporal.io/server/common" @@ -45,6 +47,8 @@ var Module = fx.Options( migration.Module, resource.Module, deletenamespace.Module, + chasmscheduler.Module, + callback.Module, scheduler.Module, batcher.Module, workerdeployment.Module, diff --git a/temporal/fx.go b/temporal/fx.go index 8905c1b5567..5378cd57302 100644 --- a/temporal/fx.go +++ b/temporal/fx.go @@ -21,8 +21,6 @@ import ( "go.temporal.io/api/serviceerror" persistencespb "go.temporal.io/server/api/persistence/v1" "go.temporal.io/server/chasm" - chasmcallback "go.temporal.io/server/chasm/lib/callback" - chasmscheduler "go.temporal.io/server/chasm/lib/scheduler" "go.temporal.io/server/client" "go.temporal.io/server/common/archiver" "go.temporal.io/server/common/archiver/provider" @@ -151,12 +149,6 @@ var ( FxLogAdapter, fx.Invoke(ServerLifetimeHooks), ) - - ChasmLibraryOptions = fx.Options( - chasm.Module, - chasmscheduler.Module, - chasmcallback.Module, - ) ) func NewServerFx(topLevelModule fx.Option, opts ...ServerOption) (*ServerFx, error) { @@ -456,7 +448,7 @@ func (params ServiceProviderParamsCommon) GetCommonServiceOptions(serviceName pr resource.DefaultOptions, membershipModule, FxLogAdapter, - ChasmLibraryOptions, + chasm.Module, ) } diff --git a/tests/signal_with_start_from_workflow_test.go b/tests/signal_with_start_from_workflow_test.go new file mode 100644 index 00000000000..bc5120d2c91 --- /dev/null +++ b/tests/signal_with_start_from_workflow_test.go @@ -0,0 +1,918 @@ +package tests + +import ( + "context" + "maps" + "slices" + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + commandpb "go.temporal.io/api/command/v1" + commonpb "go.temporal.io/api/common/v1" + enumspb "go.temporal.io/api/enums/v1" + failurepb "go.temporal.io/api/failure/v1" + historypb "go.temporal.io/api/history/v1" + taskqueuepb "go.temporal.io/api/taskqueue/v1" + "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/api/workflowservice/v1/workflowservicenexus" + "go.temporal.io/sdk/client" + sdkworker "go.temporal.io/sdk/worker" + "go.temporal.io/sdk/workflow" + "go.temporal.io/server/common/dynamicconfig" + commonnexus "go.temporal.io/server/common/nexus" + "go.temporal.io/server/common/payloads" + sdkconverter "go.temporal.io/server/common/sdk" + "go.temporal.io/server/common/testing/await" + "go.temporal.io/server/tests/testcore" + "google.golang.org/protobuf/types/known/durationpb" +) + +// systemNexusSWSWorkflow is an SDK workflow that calls SignalWithStartWorkflowExecution +// via the __temporal_system Nexus endpoint and returns the RunID of the started/signaled +// target workflow. It is used by TestBothWorkflowsVisibleAfterSWSFromWorkflow to verify +// end-to-end SDK serialization against the real server. +func systemNexusSWSWorkflow(ctx workflow.Context, req *workflowservice.SignalWithStartWorkflowExecutionRequest) (string, error) { + nc := workflow.NewNexusClient(commonnexus.SystemEndpoint, workflowservicenexus.WorkflowService.ServiceName) + fut := nc.ExecuteOperation(ctx, workflowservicenexus.WorkflowService.SignalWithStartWorkflowExecution, + req, + workflow.NexusOperationOptions{}) + var result workflowservice.SignalWithStartWorkflowExecutionResponse + if err := fut.Get(ctx, &result); err != nil { + return "", err + } + return result.RunId, nil +} + +// sysNexusSWSTargetWorkflow is the workflow started by TestBothWorkflowsVisibleAfterSWSFromWorkflow +// as the SWS target. It waits for "test-signal" and returns the received value. Completing the +// workflow (rather than leaving it running) ensures the Nexus SWS operation's async callback fires +// so that fut.Get() in systemNexusSWSWorkflow can resolve. +func sysNexusSWSTargetWorkflow(ctx workflow.Context) (string, error) { + var received string + workflow.GetSignalChannel(ctx, "test-signal").Receive(ctx, &received) + return received, nil +} + +type SignalWithStartFromWorkflowTestSuite struct { + testcore.FunctionalTestBase // nolint:forbidigo // Will migrate to test env at a later date +} + +func TestSignalWithStartFromWorkflowTestSuite(t *testing.T) { + t.Parallel() + suite.Run(t, new(SignalWithStartFromWorkflowTestSuite)) +} + +func (s *SignalWithStartFromWorkflowTestSuite) SetupSuite() { + s.SetupSuiteWithCluster( + testcore.WithDynamicConfigOverrides(map[dynamicconfig.Key]any{ + dynamicconfig.EnableChasm.Key(): true, + dynamicconfig.EnableSignalWithStartFromWorkflow.Key(): true, + }), + ) +} + +// scheduleAndGetSWSResult dispatches a SignalWithStartWorkflowExecution Nexus operation +// from within a fresh caller workflow via the __temporal_system endpoint, waits for the +// operation to complete or fail, and returns the result. +// +// The caller workflow is terminated before this function returns. +// swsReq must NOT set Namespace, RequestId, or Links — the processor populates those from +// the Nexus operation context. +func (s *SignalWithStartFromWorkflowTestSuite) scheduleAndGetSWSResult( + ctx context.Context, + callerTaskQueue string, + swsReq *workflowservice.SignalWithStartWorkflowExecutionRequest, +) (*workflowservice.SignalWithStartWorkflowExecutionResponse, *failurepb.Failure) { + callerRun, err := s.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: callerTaskQueue, + }, "caller-workflow") + s.NoError(err) + defer func() { + _ = s.SdkClient().TerminateWorkflow(ctx, callerRun.GetID(), callerRun.GetRunID(), "test cleanup") + }() + + // First poll: schedule the SWS Nexus operation. + pollResp, err := s.FrontendClient().PollWorkflowTaskQueue(ctx, &workflowservice.PollWorkflowTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: callerTaskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: "test", + }) + s.NoError(err) + _, err = s.FrontendClient().RespondWorkflowTaskCompleted(ctx, &workflowservice.RespondWorkflowTaskCompletedRequest{ + Identity: "test", + TaskToken: pollResp.TaskToken, + Commands: []*commandpb.Command{ + { + CommandType: enumspb.COMMAND_TYPE_SCHEDULE_NEXUS_OPERATION, + Attributes: &commandpb.Command_ScheduleNexusOperationCommandAttributes{ + ScheduleNexusOperationCommandAttributes: &commandpb.ScheduleNexusOperationCommandAttributes{ + Endpoint: commonnexus.SystemEndpoint, + Service: "WorkflowService", + Operation: "SignalWithStartWorkflowExecution", + Input: payloads.MustEncodeSingle(swsReq), + }, + }, + }, + }, + }) + s.NoError(err) + + // Second poll: wait for the NexusOperationCompleted or NexusOperationFailed event. + pollResp, err = s.FrontendClient().PollWorkflowTaskQueue(ctx, &workflowservice.PollWorkflowTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: callerTaskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: "test", + }) + s.NoError(err) + + for _, event := range pollResp.History.Events { + if attrs := event.GetNexusOperationCompletedEventAttributes(); attrs != nil { + var resp workflowservice.SignalWithStartWorkflowExecutionResponse + s.NoError(sdkconverter.PreferProtoDataConverter.FromPayloads( + &commonpb.Payloads{Payloads: []*commonpb.Payload{attrs.Result}}, + &resp, + )) + return &resp, nil + } + if attrs := event.GetNexusOperationFailedEventAttributes(); attrs != nil { + return nil, attrs.Failure + } + } + s.Fail("expected NexusOperationCompleted or NexusOperationFailed event in workflow history") + return nil, nil +} + +// startAndCompleteWorkflow starts a workflow and immediately completes it by responding to +// its first workflow task. Returns the run ID of the completed execution. +func (s *SignalWithStartFromWorkflowTestSuite) startAndCompleteWorkflow( + ctx context.Context, + workflowID, taskQueue string, +) string { + _, err := s.FrontendClient().StartWorkflowExecution(ctx, &workflowservice.StartWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowId: workflowID, + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + RequestId: uuid.NewString(), + }) + s.NoError(err) + + pollResp, err := s.FrontendClient().PollWorkflowTaskQueue(ctx, &workflowservice.PollWorkflowTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: taskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: "test", + }) + s.NoError(err) + runID := pollResp.WorkflowExecution.RunId + + _, err = s.FrontendClient().RespondWorkflowTaskCompleted(ctx, &workflowservice.RespondWorkflowTaskCompletedRequest{ + Identity: "test", + TaskToken: pollResp.TaskToken, + Commands: []*commandpb.Command{{ + CommandType: enumspb.COMMAND_TYPE_COMPLETE_WORKFLOW_EXECUTION, + Attributes: &commandpb.Command_CompleteWorkflowExecutionCommandAttributes{ + CompleteWorkflowExecutionCommandAttributes: &commandpb.CompleteWorkflowExecutionCommandAttributes{}, + }, + }}, + }) + s.NoError(err) + return runID +} + +// NOTE: This test cannot use the SDK workflow package because there is a restriction that prevents setting the +// __temporal_system endpoint. +func (s *SignalWithStartFromWorkflowTestSuite) TestHappyPath() { + ctx := testcore.NewContext() + taskQueue := testcore.RandomizeStr(s.T().Name()) + + run, err := s.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: taskQueue, + }, "workflow") + s.NoError(err) + + workflowID := testcore.RandomizeStr(s.T().Name()) + + pollResp, err := s.FrontendClient().PollWorkflowTaskQueue(ctx, &workflowservice.PollWorkflowTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{ + Name: taskQueue, + Kind: enumspb.TASK_QUEUE_KIND_NORMAL, + }, + Identity: "test", + }) + s.NoError(err) + _, err = s.FrontendClient().RespondWorkflowTaskCompleted(ctx, &workflowservice.RespondWorkflowTaskCompletedRequest{ + Identity: "test", + TaskToken: pollResp.TaskToken, + Commands: []*commandpb.Command{ + { + CommandType: enumspb.COMMAND_TYPE_SCHEDULE_NEXUS_OPERATION, + Attributes: &commandpb.Command_ScheduleNexusOperationCommandAttributes{ + ScheduleNexusOperationCommandAttributes: &commandpb.ScheduleNexusOperationCommandAttributes{ + Endpoint: commonnexus.SystemEndpoint, + Service: "WorkflowService", + Operation: "SignalWithStartWorkflowExecution", + Input: payloads.MustEncodeSingle(&workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: workflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{ + Name: "workflow", + }, + TaskQueue: &taskqueuepb.TaskQueue{ + Name: s.T().Name(), + }, + }), + }, + }, + }, + }, + }) + s.NoError(err) + + // Poll for the completion + pollResp, err = s.FrontendClient().PollWorkflowTaskQueue(ctx, &workflowservice.PollWorkflowTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{ + Name: taskQueue, + Kind: enumspb.TASK_QUEUE_KIND_NORMAL, + }, + Identity: "test", + }) + s.NoError(err) + + // Find the NexusOperationCompleted event + completedEventIdx := slices.IndexFunc(pollResp.History.Events, func(e *historypb.HistoryEvent) bool { + return e.GetNexusOperationCompletedEventAttributes() != nil + }) + s.Positive(completedEventIdx, "Should have a NexusOperationCompleted event") + + // Verify the result contains the echoed request ID + completedEvent := pollResp.History.Events[completedEventIdx] + result := completedEvent.GetNexusOperationCompletedEventAttributes().Result + s.NotNil(result) + + // Complete the workflow + _, err = s.FrontendClient().RespondWorkflowTaskCompleted(ctx, &workflowservice.RespondWorkflowTaskCompletedRequest{ + Identity: "test", + TaskToken: pollResp.TaskToken, + Commands: []*commandpb.Command{ + { + CommandType: enumspb.COMMAND_TYPE_COMPLETE_WORKFLOW_EXECUTION, + Attributes: &commandpb.Command_CompleteWorkflowExecutionCommandAttributes{ + CompleteWorkflowExecutionCommandAttributes: &commandpb.CompleteWorkflowExecutionCommandAttributes{ + Result: &commonpb.Payloads{ + Payloads: []*commonpb.Payload{result}, + }, + }, + }, + }, + }, + }) + s.NoError(err) + var response workflowservice.SignalWithStartWorkflowExecutionResponse + s.NoError(run.Get(ctx, &response)) + s.True(response.Started) + + // Verify the linkage from the handler workflow in the caller's history. + it := s.SdkClient().GetWorkflowHistory(ctx, run.GetID(), run.GetRunID(), false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + var opScheduledEvent *historypb.HistoryEvent + var opCompletedEvent *historypb.HistoryEvent + for it.HasNext() { + ev, err := it.Next() + s.NoError(err) + if ev.GetNexusOperationScheduledEventAttributes() != nil { + opScheduledEvent = ev + } + if ev.GetNexusOperationCompletedEventAttributes() != nil { + opCompletedEvent = ev + break + } + } + s.NotNil(opScheduledEvent, "Should have found NexusOperationScheduled event in history") + s.NotNil(opCompletedEvent, "Should have found NexusOperationCompleted event in history") + s.Len(opCompletedEvent.Links, 1) + link := opCompletedEvent.Links[0] + s.Equal(workflowID, link.GetWorkflowEvent().GetWorkflowId()) + // s.Equal(response.RunID, link.GetWorkflowEvent().GetRunId()) + s.Equal(opScheduledEvent.GetNexusOperationScheduledEventAttributes().GetRequestId(), link.GetWorkflowEvent().GetRequestIdRef().GetRequestId()) + + // Verify the linkage from the caller workflow in the handler's history. + // it = s.SdkClient().GetWorkflowHistory(ctx, workflowID, response.RunID, false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + it = s.SdkClient().GetWorkflowHistory(ctx, workflowID, "", false, enumspb.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT) + var wfStartedEvent *historypb.HistoryEvent + for it.HasNext() { + ev, err := it.Next() + s.NoError(err) + if ev.GetWorkflowExecutionStartedEventAttributes() != nil { + wfStartedEvent = ev + break + } + } + s.NotNil(wfStartedEvent, "Should have found WorkflowExecutionStarted event in history") + s.Len(wfStartedEvent.Links, 1) + link = wfStartedEvent.Links[0] + s.Equal(run.GetID(), link.GetWorkflowEvent().GetWorkflowId()) + s.Equal(run.GetRunID(), link.GetWorkflowEvent().GetRunId()) + s.Equal(opScheduledEvent.GetEventId(), link.GetWorkflowEvent().GetEventRef().EventId) + + // Verify the request ID info is recorded correctly in the handler workflow's description. + desc, err := s.SdkClient().DescribeWorkflowExecution(ctx, workflowID, response.GetRunId()) + s.NoError(err) + requestIDInfos := desc.GetWorkflowExtendedInfo().GetRequestIdInfos() + requestID := slices.Collect(maps.Keys(requestIDInfos))[0] + s.Equal(opScheduledEvent.GetNexusOperationScheduledEventAttributes().GetRequestId(), requestID) +} + +// TestSignalExistingWorkflow verifies that SWS called from a workflow signals an already-running +// target workflow without starting a new one (Started=false, RunId unchanged). +func (s *SignalWithStartFromWorkflowTestSuite) TestSignalExistingWorkflow() { + ctx := testcore.NewContext() + callerTaskQueue := testcore.RandomizeStr(s.T().Name()) + targetTaskQueue := testcore.RandomizeStr(s.T().Name() + "-target") + targetWorkflowID := testcore.RandomizeStr(s.T().Name()) + + // Start the target workflow and leave it running. + startResp, err := s.FrontendClient().StartWorkflowExecution(ctx, &workflowservice.StartWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowId: targetWorkflowID, + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + RequestId: uuid.NewString(), + }) + s.T().Cleanup(func() { + _ = s.SdkClient().TerminateWorkflow(ctx, targetWorkflowID, startResp.RunId, "test cleanup") + }) + s.NoError(err) + originalRunID := startResp.RunId + + resp, failure := s.scheduleAndGetSWSResult(ctx, callerTaskQueue, &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: targetWorkflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue}, + WorkflowIdReusePolicy: enumspb.WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE, + }) + + s.Nil(failure) + s.False(resp.Started, "expected Started=false when signaling an existing workflow") + s.Equal(originalRunID, resp.RunId) +} + +// TestStartNewWorkflow verifies that SWS called from a workflow starts a new execution when no +// workflow with the given ID exists (Started=true). +func (s *SignalWithStartFromWorkflowTestSuite) TestStartNewWorkflow() { + ctx := testcore.NewContext() + callerTaskQueue := testcore.RandomizeStr(s.T().Name()) + targetTaskQueue := testcore.RandomizeStr(s.T().Name() + "-target") + targetWorkflowID := testcore.RandomizeStr(s.T().Name()) + + resp, failure := s.scheduleAndGetSWSResult(ctx, callerTaskQueue, &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: targetWorkflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue}, + }) + s.T().Cleanup(func() { + _ = s.SdkClient().TerminateWorkflow(ctx, targetWorkflowID, resp.RunId, "test cleanup") + }) + + s.Nil(failure) + s.True(resp.Started, "expected Started=true when starting a new workflow") + s.NotEmpty(resp.RunId) +} + +// TestSignalTerminatedWorkflow verifies that SWS starts a fresh run when the target workflow +// has been terminated (Started=true, new RunId). +func (s *SignalWithStartFromWorkflowTestSuite) TestSignalTerminatedWorkflow() { + ctx := testcore.NewContext() + callerTaskQueue := testcore.RandomizeStr(s.T().Name()) + targetTaskQueue := testcore.RandomizeStr(s.T().Name() + "-target") + targetWorkflowID := testcore.RandomizeStr(s.T().Name()) + + // Start and terminate the target workflow. + startResp, err := s.FrontendClient().StartWorkflowExecution(ctx, &workflowservice.StartWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowId: targetWorkflowID, + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + RequestId: uuid.NewString(), + }) + s.NoError(err) + originalRunID := startResp.RunId + + err = s.SdkClient().TerminateWorkflow(ctx, targetWorkflowID, originalRunID, "setup") + s.NoError(err) + + resp, failure := s.scheduleAndGetSWSResult(ctx, callerTaskQueue, &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: targetWorkflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue}, + }) + + s.Nil(failure) + s.True(resp.Started, "expected Started=true when target was terminated") + s.NotEqual(originalRunID, resp.RunId, "expected a new RunId after termination") +} + +// TestIDReusePolicy_RejectDuplicate verifies that SWS fails with WorkflowExecutionAlreadyStarted +// when the target workflow has completed and the reuse policy is REJECT_DUPLICATE. +func (s *SignalWithStartFromWorkflowTestSuite) TestIDReusePolicy_RejectDuplicate() { + s.OverrideDynamicConfig(dynamicconfig.WorkflowIdReuseMinimalInterval, 0) + + ctx := testcore.NewContext() + callerTaskQueue := testcore.RandomizeStr(s.T().Name()) + targetTaskQueue := testcore.RandomizeStr(s.T().Name() + "-target") + targetWorkflowID := testcore.RandomizeStr(s.T().Name()) + + s.startAndCompleteWorkflow(ctx, targetWorkflowID, targetTaskQueue) + + _, failure := s.scheduleAndGetSWSResult(ctx, callerTaskQueue, &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: targetWorkflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue}, + WorkflowIdReusePolicy: enumspb.WORKFLOW_ID_REUSE_POLICY_REJECT_DUPLICATE, + }) + + s.NotNil(failure, "expected the Nexus operation to fail") + s.Contains(failure.GetCause().GetMessage()+failure.GetMessage(), "duplicate") +} + +// TestIDReusePolicy_AllowDuplicate verifies that SWS starts a new run when the target has +// completed and the reuse policy is ALLOW_DUPLICATE (Started=true). +func (s *SignalWithStartFromWorkflowTestSuite) TestIDReusePolicy_AllowDuplicate() { + s.OverrideDynamicConfig(dynamicconfig.WorkflowIdReuseMinimalInterval, 0) + + ctx := testcore.NewContext() + callerTaskQueue := testcore.RandomizeStr(s.T().Name()) + targetTaskQueue := testcore.RandomizeStr(s.T().Name() + "-target") + targetWorkflowID := testcore.RandomizeStr(s.T().Name()) + + s.startAndCompleteWorkflow(ctx, targetWorkflowID, targetTaskQueue) + + resp, failure := s.scheduleAndGetSWSResult(ctx, callerTaskQueue, &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: targetWorkflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue}, + WorkflowIdReusePolicy: enumspb.WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE, + }) + s.T().Cleanup(func() { + _ = s.SdkClient().TerminateWorkflow(ctx, targetWorkflowID, resp.RunId, "test cleanup") + }) + + s.Nil(failure) + s.True(resp.Started, "expected Started=true with ALLOW_DUPLICATE after completion") + s.NotEmpty(resp.RunId) +} + +// TestIDReusePolicy_AllowDuplicateFailedOnly covers two sub-cases for ALLOW_DUPLICATE_FAILED_ONLY: +// 1. Target completed successfully → SWS fails (already started error). +// 2. Target was terminated → SWS starts a new run (Started=true). +func (s *SignalWithStartFromWorkflowTestSuite) TestIDReusePolicy_AllowDuplicateFailedOnly() { + s.OverrideDynamicConfig(dynamicconfig.WorkflowIdReuseMinimalInterval, 0) + + ctx := testcore.NewContext() + targetTaskQueue := testcore.RandomizeStr(s.T().Name() + "-target") + targetWorkflowID := testcore.RandomizeStr(s.T().Name()) + + // Sub-case 1: target completed successfully → should fail. + s.startAndCompleteWorkflow(ctx, targetWorkflowID, targetTaskQueue) + + _, failure := s.scheduleAndGetSWSResult( + ctx, + testcore.RandomizeStr(s.T().Name()), + &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: targetWorkflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue}, + WorkflowIdReusePolicy: enumspb.WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE_FAILED_ONLY, + }, + ) + s.NotNil(failure, "expected failure when completed workflow + ALLOW_DUPLICATE_FAILED_ONLY") + + // Sub-case 2: target terminated → should start a new run. + startResp, err := s.FrontendClient().StartWorkflowExecution(ctx, &workflowservice.StartWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowId: targetWorkflowID, + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + RequestId: uuid.NewString(), + }) + s.NoError(err) + err = s.SdkClient().TerminateWorkflow(ctx, targetWorkflowID, startResp.RunId, "setup") + s.NoError(err) + + resp, failure := s.scheduleAndGetSWSResult( + ctx, + testcore.RandomizeStr(s.T().Name()), + &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: targetWorkflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue}, + WorkflowIdReusePolicy: enumspb.WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE_FAILED_ONLY, + }, + ) + s.T().Cleanup(func() { + _ = s.SdkClient().TerminateWorkflow(ctx, targetWorkflowID, resp.RunId, "test cleanup") + }) + s.Nil(failure) + s.True(resp.Started, "expected Started=true after terminated workflow + ALLOW_DUPLICATE_FAILED_ONLY") +} + +// TestIDConflictPolicy_TerminateExisting verifies that SWS terminates a running workflow and +// starts a new one when the conflict policy is TERMINATE_EXISTING (Started=true, new RunId, +// original run terminated). +func (s *SignalWithStartFromWorkflowTestSuite) TestIDConflictPolicy_TerminateExisting() { + ctx := testcore.NewContext() + callerTaskQueue := testcore.RandomizeStr(s.T().Name()) + targetTaskQueue := testcore.RandomizeStr(s.T().Name() + "-target") + targetWorkflowID := testcore.RandomizeStr(s.T().Name()) + + startResp, err := s.FrontendClient().StartWorkflowExecution(ctx, &workflowservice.StartWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowId: targetWorkflowID, + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + RequestId: uuid.NewString(), + }) + s.NoError(err) + originalRunID := startResp.RunId + + resp, failure := s.scheduleAndGetSWSResult(ctx, callerTaskQueue, &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: targetWorkflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue}, + WorkflowIdConflictPolicy: enumspb.WORKFLOW_ID_CONFLICT_POLICY_TERMINATE_EXISTING, + }) + s.T().Cleanup(func() { + _ = s.SdkClient().TerminateWorkflow(ctx, targetWorkflowID, resp.RunId, "test cleanup") + }) + + s.Nil(failure) + s.True(resp.Started, "expected Started=true with TERMINATE_EXISTING") + s.NotEqual(originalRunID, resp.RunId, "expected a new RunId") + + // Verify the original run was terminated. + desc, err := s.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: targetWorkflowID, RunId: originalRunID}, + }) + s.NoError(err) + s.Equal(enumspb.WORKFLOW_EXECUTION_STATUS_TERMINATED, desc.WorkflowExecutionInfo.Status) +} + +// TestIDConflictPolicy_UseExisting verifies that SWS signals an existing running workflow and +// returns its RunId without starting a new one (Started=false) when the conflict policy is +// USE_EXISTING. +func (s *SignalWithStartFromWorkflowTestSuite) TestIDConflictPolicy_UseExisting() { + ctx := testcore.NewContext() + callerTaskQueue := testcore.RandomizeStr(s.T().Name()) + targetTaskQueue := testcore.RandomizeStr(s.T().Name() + "-target") + targetWorkflowID := testcore.RandomizeStr(s.T().Name()) + + startResp, err := s.FrontendClient().StartWorkflowExecution(ctx, &workflowservice.StartWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + WorkflowId: targetWorkflowID, + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + RequestId: uuid.NewString(), + }) + s.T().Cleanup(func() { + _ = s.SdkClient().TerminateWorkflow(ctx, targetWorkflowID, startResp.RunId, "test cleanup") + }) + s.NoError(err) + originalRunID := startResp.RunId + + resp, failure := s.scheduleAndGetSWSResult(ctx, callerTaskQueue, &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: targetWorkflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue}, + WorkflowIdConflictPolicy: enumspb.WORKFLOW_ID_CONFLICT_POLICY_USE_EXISTING, + }) + s.T().Cleanup(func() { + _ = s.SdkClient().TerminateWorkflow(ctx, targetWorkflowID, resp.RunId, "test cleanup") + }) + s.Nil(failure) + s.False(resp.Started, "expected Started=false with USE_EXISTING") + s.Equal(originalRunID, resp.RunId) +} + +// TestIDConflictPolicy_Fail verifies that SWS from a workflow rejects +// WORKFLOW_ID_CONFLICT_POLICY_FAIL with the same validation error as the frontend +// SignalWithStartWorkflowExecution API outside a workflow context: signal-with-required-start +// is not a supported operation. The validation error surfaces here as a workflow task failure +// on the ScheduleNexusOperation command (BadScheduleNexusOperationAttributes). +func (s *SignalWithStartFromWorkflowTestSuite) TestIDConflictPolicy_Fail() { + ctx := testcore.NewContext() + callerTaskQueue := testcore.RandomizeStr(s.T().Name()) + targetTaskQueue := testcore.RandomizeStr(s.T().Name() + "-target") + targetWorkflowID := testcore.RandomizeStr(s.T().Name()) + + callerRun, err := s.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: callerTaskQueue, + }, "caller-workflow") + s.NoError(err) + s.T().Cleanup(func() { + _ = s.SdkClient().TerminateWorkflow(ctx, callerRun.GetID(), callerRun.GetRunID(), "test cleanup") + }) + + pollResp, err := s.FrontendClient().PollWorkflowTaskQueue(ctx, &workflowservice.PollWorkflowTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: callerTaskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: "test", + }) + s.NoError(err) + + swsReq := &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: targetWorkflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue}, + WorkflowIdConflictPolicy: enumspb.WORKFLOW_ID_CONFLICT_POLICY_FAIL, + } + _, err = s.FrontendClient().RespondWorkflowTaskCompleted(ctx, &workflowservice.RespondWorkflowTaskCompletedRequest{ + Identity: "test", + TaskToken: pollResp.TaskToken, + Commands: []*commandpb.Command{ + { + CommandType: enumspb.COMMAND_TYPE_SCHEDULE_NEXUS_OPERATION, + Attributes: &commandpb.Command_ScheduleNexusOperationCommandAttributes{ + ScheduleNexusOperationCommandAttributes: &commandpb.ScheduleNexusOperationCommandAttributes{ + Endpoint: commonnexus.SystemEndpoint, + Service: "WorkflowService", + Operation: "SignalWithStartWorkflowExecution", + Input: payloads.MustEncodeSingle(swsReq), + }, + }, + }, + }, + }) + s.Error(err, "expected ScheduleNexusOperation to be rejected with CONFLICT_POLICY_FAIL") + s.Contains(err.Error(), "WORKFLOW_ID_CONFLICT_POLICY_FAIL is not supported") +} + +// TestBothWorkflowsVisibleAfterSWSFromWorkflow verifies that when SignalWithStart is invoked +// from a real SDK workflow via the __temporal_system Nexus endpoint: +// 1. A new target workflow is started (the caller workflow returns its RunID). +// 2. Both the caller (completed) and target (completed after receiving the signal) are visible. +// 3. The memo passed in the SWS request appears on the target workflow. +// 4. The signal arrives in the target with the correct name and input payload. +// +// Unlike the other tests in this file, this test exercises the SDK's payload-serialization +// path (the system-nexus payload converter) end-to-end against the real embedded server, +// complementing the injector-based SDK unit test in sdk-go#2293. +func (s *SignalWithStartFromWorkflowTestSuite) TestBothWorkflowsVisibleAfterSWSFromWorkflow() { + // go.temporal.io/sdk@v1.41.1 (and earlier) panics in workflow.NewNexusClient when the + // endpoint name starts with the reserved "__temporal_" prefix. This test exercises the + // __temporal_system endpoint via the SDK Nexus client and cannot pass until an SDK + // release lifts that check. The proto-binary variant + // (TestBothWorkflowsVisibleAfterSWSFromWorkflowProtoBinary) covers the same scenario by + // driving the workflow task manually, so coverage is not lost in the meantime. + s.T().Skip("requires SDK release that lifts the __temporal_ endpoint prefix check") + + ctx, cancel := context.WithTimeout(context.Background(), 25*time.Second) + s.T().Cleanup(cancel) + callerTaskQueue := testcore.RandomizeStr(s.T().Name()) + targetTaskQueue := testcore.RandomizeStr(s.T().Name() + "-target") + targetWorkflowID := testcore.RandomizeStr(s.T().Name()) + + // Stand up dedicated SDK workers for the caller and target workflows. + callerWorker := sdkworker.New(s.SdkClient(), callerTaskQueue, sdkworker.Options{}) + callerWorker.RegisterWorkflow(systemNexusSWSWorkflow) + s.NoError(callerWorker.Start()) + s.T().Cleanup(func() { callerWorker.Stop() }) + + targetWorker := sdkworker.New(s.SdkClient(), targetTaskQueue, sdkworker.Options{}) + targetWorker.RegisterWorkflow(sysNexusSWSTargetWorkflow) + s.NoError(targetWorker.Start()) + s.T().Cleanup(func() { targetWorker.Stop() }) + + // Execute the caller workflow. It calls SWS via the system Nexus endpoint and returns + // the RunID of the newly-started target workflow. + callerRun, err := s.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: callerTaskQueue, + }, systemNexusSWSWorkflow, &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: targetWorkflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "sysNexusSWSTargetWorkflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue}, + Input: &commonpb.Payloads{Payloads: []*commonpb.Payload{{Data: []byte("workflow-input")}}}, + SignalInput: &commonpb.Payloads{Payloads: []*commonpb.Payload{{Data: []byte("signal-input")}}}, + Memo: &commonpb.Memo{Fields: map[string]*commonpb.Payload{"memo-key": {Data: []byte("memo-value")}}}, + }) + s.NoError(err) + s.NotEmpty(callerRun.GetID()) + s.NotEmpty(callerRun.GetRunID()) + s.T().Cleanup(func() { + _ = s.SdkClient().TerminateWorkflow(ctx, targetWorkflowID, "", "test cleanup") + }) + + // --- Assertion 1: Caller workflow completes and returns the target's RunID. --- + // callerRun.Get blocks until the caller workflow finishes (or the context times out), + // implicitly asserting it reaches COMPLETED status. + var targetRunID string + s.NoError(callerRun.Get(ctx, &targetRunID)) + s.NotEmpty(targetRunID) + + // Confirm COMPLETED via Describe now that we know the caller has finished. + callerDesc, err := s.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: callerRun.GetID(), RunId: callerRun.GetRunID()}, + }) + s.NoError(err) + s.Equal(enumspb.WORKFLOW_EXECUTION_STATUS_COMPLETED, callerDesc.WorkflowExecutionInfo.Status) + + // --- Assertion 2: Target workflow completes and returns the signal input value. --- + // GetWorkflow(...).Get blocks until the target workflow finishes, implicitly asserting + // it reaches COMPLETED status. The target returns whatever signal payload it received. + var targetResult string + s.NoError(s.SdkClient().GetWorkflow(ctx, targetWorkflowID, targetRunID).Get(ctx, &targetResult)) + s.Equal("signal-input", targetResult) + + // Confirm COMPLETED via Describe now that we know the target has finished. + targetDesc, err := s.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: targetWorkflowID, RunId: targetRunID}, + }) + s.NoError(err) + s.Equal(enumspb.WORKFLOW_EXECUTION_STATUS_COMPLETED, targetDesc.WorkflowExecutionInfo.Status) + + // --- Assertion 3: Target carries the memo passed in the SWS request. --- + s.Require().NotNil(targetDesc.WorkflowExecutionInfo.Memo) + s.Contains(targetDesc.WorkflowExecutionInfo.Memo.Fields, "memo-key") + + // --- Assertion 4: Signal was delivered with the correct name and input. --- + // Since the target has already completed, its full history is available without polling. + histResp, err := s.FrontendClient().GetWorkflowExecutionHistory(ctx, &workflowservice.GetWorkflowExecutionHistoryRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: targetWorkflowID, RunId: targetRunID}, + }) + s.NoError(err) + var signalEvent *historypb.HistoryEvent + for _, event := range histResp.History.Events { + if event.GetWorkflowExecutionSignaledEventAttributes() != nil { + signalEvent = event + break + } + } + s.Require().NotNil(signalEvent, "expected WorkflowExecutionSignaled event in target history") + s.Equal("test-signal", signalEvent.GetWorkflowExecutionSignaledEventAttributes().SignalName) + var signalInputVal string + s.NoError(payloads.Decode(signalEvent.GetWorkflowExecutionSignaledEventAttributes().Input, &signalInputVal)) + s.Equal("signal-input", signalInputVal) +} + +// TestBothWorkflowsVisibleAfterSWSFromWorkflowProtoBinary is identical to +// TestBothWorkflowsVisibleAfterSWSFromWorkflow but sends the SWS request as a proto binary +// (binary/protobuf) payload instead of relying on the SDK's default JSON-proto encoding. +// This exercises the binary/protobuf decode path in nexusOperationProcessorAdapter and +// verifies that the server accepts and correctly processes such requests — matching what +// the Python SDK (and other SDKs that prefer proto binary) sends. +func (s *SignalWithStartFromWorkflowTestSuite) TestBothWorkflowsVisibleAfterSWSFromWorkflowProtoBinary() { + ctx, cancel := context.WithTimeout(context.Background(), 25*time.Second) + s.T().Cleanup(cancel) + + callerTaskQueue := testcore.RandomizeStr(s.T().Name()) + targetTaskQueue := testcore.RandomizeStr(s.T().Name() + "-target") + targetWorkflowID := testcore.RandomizeStr(s.T().Name()) + + // Start a caller workflow to obtain an initial workflow task. + callerRun, err := s.SdkClient().ExecuteWorkflow(ctx, client.StartWorkflowOptions{ + TaskQueue: callerTaskQueue, + }, "caller-workflow") + s.NoError(err) + s.T().Cleanup(func() { + _ = s.SdkClient().TerminateWorkflow(ctx, callerRun.GetID(), callerRun.GetRunID(), "test cleanup") + }) + + // Encode the SWS request as binary/protobuf. PreferProtoDataConverter places + // ProtoPayloadConverter first, so proto messages are marshalled to binary/protobuf + // rather than the JSON proto encoding that the SDK uses by default. + swsReq := &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: targetWorkflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue}, + Memo: &commonpb.Memo{Fields: map[string]*commonpb.Payload{"memo-key": {Data: []byte("memo-value")}}}, + } + pls, err := sdkconverter.PreferProtoDataConverter.ToPayloads(swsReq) + s.NoError(err) + s.Require().Len(pls.Payloads, 1) + protoBinaryPayload := pls.Payloads[0] + s.Equal("binary/protobuf", string(protoBinaryPayload.Metadata["encoding"])) + + // First poll: respond with a ScheduleNexusOperation command carrying the proto binary input. + pollResp, err := s.FrontendClient().PollWorkflowTaskQueue(ctx, &workflowservice.PollWorkflowTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: callerTaskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: "test", + }) + s.NoError(err) + _, err = s.FrontendClient().RespondWorkflowTaskCompleted(ctx, &workflowservice.RespondWorkflowTaskCompletedRequest{ + Identity: "test", + TaskToken: pollResp.TaskToken, + Commands: []*commandpb.Command{ + { + CommandType: enumspb.COMMAND_TYPE_SCHEDULE_NEXUS_OPERATION, + Attributes: &commandpb.Command_ScheduleNexusOperationCommandAttributes{ + ScheduleNexusOperationCommandAttributes: &commandpb.ScheduleNexusOperationCommandAttributes{ + Endpoint: commonnexus.SystemEndpoint, + Service: "WorkflowService", + Operation: "SignalWithStartWorkflowExecution", + Input: protoBinaryPayload, + }, + }, + }, + }, + }) + s.NoError(err) + + // Second poll: wait for NexusOperationCompleted or NexusOperationFailed. + pollResp, err = s.FrontendClient().PollWorkflowTaskQueue(ctx, &workflowservice.PollWorkflowTaskQueueRequest{ + Namespace: s.Namespace().String(), + TaskQueue: &taskqueuepb.TaskQueue{Name: callerTaskQueue, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + Identity: "test", + }) + s.NoError(err) + + var sswResp workflowservice.SignalWithStartWorkflowExecutionResponse + for _, event := range pollResp.History.Events { + if attrs := event.GetNexusOperationCompletedEventAttributes(); attrs != nil { + s.NoError(sdkconverter.PreferProtoDataConverter.FromPayloads( + &commonpb.Payloads{Payloads: []*commonpb.Payload{attrs.Result}}, + &sswResp, + )) + } + if attrs := event.GetNexusOperationFailedEventAttributes(); attrs != nil { + s.Fail("expected NexusOperationCompleted but got NexusOperationFailed: " + attrs.Failure.GetMessage()) + } + } + + // The operation must have started a new workflow. + s.True(sswResp.Started, "expected Started=true for proto binary encoded SWS request") + s.NotEmpty(sswResp.RunId) + + s.T().Cleanup(func() { + _ = s.SdkClient().TerminateWorkflow(ctx, targetWorkflowID, sswResp.RunId, "test cleanup") + }) + + // Both workflows must be visible. + callerDesc, err := s.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: callerRun.GetID(), RunId: callerRun.GetRunID()}, + }) + s.NoError(err) + s.Equal(enumspb.WORKFLOW_EXECUTION_STATUS_RUNNING, callerDesc.WorkflowExecutionInfo.Status) + + targetDesc, err := s.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: targetWorkflowID, RunId: sswResp.RunId}, + }) + s.NoError(err) + s.Require().NotNil(targetDesc.WorkflowExecutionInfo.Memo) + s.Contains(targetDesc.WorkflowExecutionInfo.Memo.Fields, "memo-key") +} + +// TestStartDelay verifies that SWS with WorkflowStartDelay completes successfully from a +// workflow (Started=true) and that the target workflow eventually becomes running. +func (s *SignalWithStartFromWorkflowTestSuite) TestStartDelay() { + ctx := testcore.NewContext() + callerTaskQueue := testcore.RandomizeStr(s.T().Name()) + targetTaskQueue := testcore.RandomizeStr(s.T().Name() + "-target") + targetWorkflowID := testcore.RandomizeStr(s.T().Name()) + + startDelay := 2 * time.Second + + resp, failure := s.scheduleAndGetSWSResult(ctx, callerTaskQueue, &workflowservice.SignalWithStartWorkflowExecutionRequest{ + WorkflowId: targetWorkflowID, + SignalName: "test-signal", + WorkflowType: &commonpb.WorkflowType{Name: "target-workflow"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: targetTaskQueue}, + WorkflowStartDelay: durationpb.New(startDelay), + }) + s.T().Cleanup(func() { + _ = s.SdkClient().TerminateWorkflow(ctx, targetWorkflowID, resp.RunId, "test cleanup") + }) + s.Nil(failure) + s.True(resp.Started, "expected Started=true with WorkflowStartDelay") + s.NotEmpty(resp.RunId) + + // Verify the workflow eventually becomes running after the delay. + await.Require(s.Context(), s.T(), func(t *await.T) { + desc, err := s.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: s.Namespace().String(), + Execution: &commonpb.WorkflowExecution{WorkflowId: targetWorkflowID, RunId: resp.RunId}, + }) + require.NoError(t, err) + require.Equal(t, enumspb.WORKFLOW_EXECUTION_STATUS_RUNNING, desc.WorkflowExecutionInfo.Status) + }, startDelay+5*time.Second, 200*time.Millisecond) +} diff --git a/tests/testcore/onebox.go b/tests/testcore/onebox.go index 3c591db3374..8b21acf732f 100644 --- a/tests/testcore/onebox.go +++ b/tests/testcore/onebox.go @@ -189,11 +189,6 @@ type ( const NamespaceCacheRefreshInterval = time.Second -var chasmFxOptions = fx.Options( - temporal.ChasmLibraryOptions, - chasmtests.Module, -) - // newTemporal returns an instance that hosts full temporal in one process func newTemporal(t *testing.T, params *TemporalParams) *TemporalImpl { impl := &TemporalImpl{ @@ -432,7 +427,8 @@ func (c *TemporalImpl) startFrontend() { fx.Populate(&namespaceRegistry, &rpcFactory, &historyRawClient, &matchingRawClient, &schedulerClient, &grpcResolver), temporal.FxLogAdapter, c.getFxOptionsForService(primitives.FrontendService), - chasmFxOptions, + chasm.Module, + chasmtests.Module, ) err := app.Err() if err != nil { @@ -529,7 +525,8 @@ func (c *TemporalImpl) startHistory() { replication.Module, temporal.FxLogAdapter, c.getFxOptionsForService(primitives.HistoryService), - chasmFxOptions, + chasm.Module, + chasmtests.Module, fx.Populate(&namespaceRegistry), fx.Populate(&c.chasmEngine), fx.Populate(&c.chasmVisibilityMgr), @@ -589,7 +586,8 @@ func (c *TemporalImpl) startMatching() { matching.Module, temporal.FxLogAdapter, c.getFxOptionsForService(primitives.MatchingService), - chasmFxOptions, + chasm.Module, + chasmtests.Module, fx.Populate(&namespaceRegistry), ) err := app.Err() @@ -656,7 +654,8 @@ func (c *TemporalImpl) startWorker() { worker.Module, temporal.FxLogAdapter, c.getFxOptionsForService(primitives.WorkerService), - chasmFxOptions, + chasm.Module, + chasmtests.Module, fx.Populate(&namespaceRegistry), ) err := app.Err() From 9416eda9c946231ad1e431ba22286cbed1e0b071 Mon Sep 17 00:00:00 2001 From: Prathyush PV Date: Wed, 20 May 2026 15:51:26 -0700 Subject: [PATCH 71/73] Remove client cache in RPCFactory (#9277) ## What changed? Remove client cache from RPCFactory. ## Why? This cache was added to avoid creating multiple connections to the same host for different services. But this cache does not have a way to identify when to close a connection and remove it. So it accumulates connections. This defeats the purpose of having this cache to reduce the number of connections. It just keep accumulating stale connections instead. All clients that uses RPCFactory have their own cache. They can listen to membership changes and cleanup stale connections. ## How did you test it? - [x] built - [ ] run locally and tested manually - [ ] covered by existing tests - [ ] added new unit test(s) - [ ] added new functional test(s) --- common/rpc/rpc.go | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/common/rpc/rpc.go b/common/rpc/rpc.go index 663f5464781..6cd3bf69905 100644 --- a/common/rpc/rpc.go +++ b/common/rpc/rpc.go @@ -13,7 +13,6 @@ import ( "go.temporal.io/api/serviceerror" "go.temporal.io/server/common" - "go.temporal.io/server/common/cache" "go.temporal.io/server/common/config" "go.temporal.io/server/common/convert" "go.temporal.io/server/common/log" @@ -48,8 +47,7 @@ type RPCFactory struct { perServiceDialOptions map[primitives.ServiceName][]grpc.DialOption monitor membership.Monitor // A OnceValues wrapper for createLocalFrontendHTTPClient. - localFrontendClient func() (*common.FrontendHTTPClient, error) - interNodeGrpcConnections cache.Cache + localFrontendClient func() (*common.FrontendHTTPClient, error) // TODO: Remove these flags once the keepalive settings are rolled out EnableInternodeServerKeepalive bool @@ -88,7 +86,6 @@ func NewFactory( } f.grpcListener = sync.OnceValue(f.createGRPCListener) f.localFrontendClient = sync.OnceValues(f.createLocalFrontendHTTPClient) - f.interNodeGrpcConnections = cache.NewSimple(nil) return f } @@ -231,9 +228,6 @@ func (d *RPCFactory) CreateLocalFrontendGRPCConnection() *grpc.ClientConn { // createInternodeGRPCConnection creates connection for gRPC calls func (d *RPCFactory) createInternodeGRPCConnection(hostName string, serviceName primitives.ServiceName) *grpc.ClientConn { - if c, ok := d.interNodeGrpcConnections.Get(hostName).(*grpc.ClientConn); ok { - return c - } var tlsClientConfig *tls.Config var err error if d.tlsFactory != nil { @@ -244,9 +238,7 @@ func (d *RPCFactory) createInternodeGRPCConnection(hostName string, serviceName } } additionalDialOptions := append([]grpc.DialOption{}, d.perServiceDialOptions[serviceName]...) - c := d.dial(hostName, tlsClientConfig, append(additionalDialOptions, d.getClientKeepAliveConfig(serviceName))...) - d.interNodeGrpcConnections.Put(hostName, c) - return c + return d.dial(hostName, tlsClientConfig, append(additionalDialOptions, d.getClientKeepAliveConfig(serviceName))...) } func (d *RPCFactory) CreateHistoryGRPCConnection(rpcAddress string) *grpc.ClientConn { From 5299df2ca76de8c4a6c0caa800edbc55428caf61 Mon Sep 17 00:00:00 2001 From: Stephan Behnke Date: Wed, 20 May 2026 16:38:10 -0700 Subject: [PATCH 72/73] Remove one WithFxOptionsForService from tests/dlq_test.go (#10338) ## What changed Replaced one `WithFxOptionsForService` with an await loop. ## Why We want to eliminate `WithFxOptionsForService` as it is blocking us from migrating away from the `onebox.go` approach (which duplicates the fx setup) since we don't want to expose an equivalent method in `temporal/fx.go`. --- tests/dlq_test.go | 67 ++++++++++------------------------------------- 1 file changed, 14 insertions(+), 53 deletions(-) diff --git a/tests/dlq_test.go b/tests/dlq_test.go index 79a9a2867e4..334c6b549b6 100644 --- a/tests/dlq_test.go +++ b/tests/dlq_test.go @@ -6,7 +6,6 @@ import ( "encoding/base64" "encoding/json" "errors" - "fmt" "io" "os" "strconv" @@ -16,6 +15,7 @@ import ( "time" "github.com/google/uuid" + "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/urfave/cli/v2" enumspb "go.temporal.io/api/enums/v1" @@ -31,6 +31,7 @@ import ( "go.temporal.io/server/common/persistence/serialization" "go.temporal.io/server/common/primitives" "go.temporal.io/server/common/sdk" + "go.temporal.io/server/common/testing/await" "go.temporal.io/server/service/history/queues" "go.temporal.io/server/service/history/tasks" "go.temporal.io/server/tests/testcore" @@ -47,7 +48,6 @@ type ( testcore.FunctionalTestBase dlq persistence.HistoryTaskQueueManager - dlqTasks chan tasks.Task writer bytes.Buffer sdkClientFactory sdk.ClientFactory tdbgApp *cli.App @@ -73,10 +73,6 @@ type ( base queues.Executor suite *DLQSuite } - testDLQWriter struct { - suite *DLQSuite - queues.QueueWriter - } testTaskQueueManager struct { suite *DLQSuite persistence.HistoryTaskQueueManager @@ -93,7 +89,6 @@ func TestDLQSuite(t *testing.T) { } func (s *DLQSuite) SetupSuite() { - s.dlqTasks = make(chan tasks.Task) testPrefix := "dlq-test-terminal-wfts-" s.failingWorkflowIDPrefix.Store(&testPrefix) s.FunctionalTestBase.SetupSuiteWithCluster( @@ -106,14 +101,6 @@ func (s *DLQSuite) SetupSuite() { } }, ), - fx.Decorate( - func(writer queues.QueueWriter) queues.QueueWriter { - return &testDLQWriter{ - QueueWriter: writer, - suite: s, - } - }, - ), fx.Decorate( func(m persistence.HistoryTaskQueueManager) persistence.HistoryTaskQueueManager { return &testTaskQueueManager{ @@ -436,31 +423,19 @@ func (s *DLQSuite) executeDoomedWorkflow(ctx context.Context) (sdkclient.Workflo run := s.executeWorkflow(ctx, *s.failingWorkflowIDPrefix.Load()+uuid.NewString()) // Wait for the workflow task to be added to the DLQ. - select { - case <-ctx.Done(): - s.FailNow("timed out waiting for workflow to task to be DLQ'd") - case task := <-s.dlqTasks: - s.Equal(run.GetRunID(), task.GetRunID()) - } - - // Verify that the workflow task is in the DLQ. - task := s.verifyRunIsInDLQ(ctx, run) - dlqMessageID := task.MessageID - return run, dlqMessageID -} - -func (s *DLQSuite) verifyRunIsInDLQ( - ctx context.Context, - run sdkclient.WorkflowRun, -) tdbgtest.DLQMessage[*persistencespb.TransferTaskInfo] { - dlqTasks := s.readDLQTasks(ctx) - for _, task := range dlqTasks { - if task.Payload.RunId == run.GetRunID() { - return task + var found *tdbgtest.DLQMessage[*persistencespb.TransferTaskInfo] + await.Require(ctx, s.T(), func(t *await.T) { + dlqTasks := s.readDLQTasks(t.Context()) + for _, task := range dlqTasks { + if task.Payload.RunId == run.GetRunID() { + found = &task + return + } } - } - s.Fail("workflow task not found in DLQ", run.GetRunID()) - panic("unreachable") + require.Failf(t, "workflow task not found in DLQ", "run ID: %s", run.GetRunID()) + }, dlqTestTimeout, 100*time.Millisecond) + + return run, found.MessageID } // executeWorkflow just executes a simple no-op workflow that returns "hello" and returns the sdk workflow run. @@ -644,20 +619,6 @@ func (s *DLQSuite) readTransferTasks(file *os.File) []tdbgtest.DLQMessage[*persi return dlqTasks } -// EnqueueTask is used to intercept writes to the DLQ, so that we can unblock the test upon completion. -func (t *testDLQWriter) EnqueueTask( - ctx context.Context, - request *persistence.EnqueueTaskRequest, -) (*persistence.EnqueueTaskResponse, error) { - res, err := t.QueueWriter.EnqueueTask(ctx, request) - select { - case t.suite.dlqTasks <- request.Task: - case <-ctx.Done(): - return res, fmt.Errorf("interrupted while trying to observe DLQ write: %w", ctx.Err()) - } - return res, err -} - // Wrap is used to wrap the executor with our own faulty one. func (t testExecutorWrapper) Wrap(delegate queues.Executor) queues.Executor { return &testExecutor{ From 7b46553cfe4c401a7177298a1eace72ec75881ea Mon Sep 17 00:00:00 2001 From: Kannan Rajah Date: Wed, 20 May 2026 20:03:03 -0700 Subject: [PATCH 73/73] Fix lint: import ordering and replace require.Eventually with await.RequireTrue Co-Authored-By: Claude Opus 4.6 --- service/history/worker_commands_task_dispatcher.go | 2 +- tests/standalone_activity_test.go | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/service/history/worker_commands_task_dispatcher.go b/service/history/worker_commands_task_dispatcher.go index 126901fe3a5..4079e3ac04e 100644 --- a/service/history/worker_commands_task_dispatcher.go +++ b/service/history/worker_commands_task_dispatcher.go @@ -13,11 +13,11 @@ import ( workerservicepb "go.temporal.io/api/nexusservices/workerservice/v1" taskqueuepb "go.temporal.io/api/taskqueue/v1" "go.temporal.io/server/api/matchingservice/v1" - commonnexus "go.temporal.io/server/common/nexus" "go.temporal.io/server/common/debug" "go.temporal.io/server/common/log" "go.temporal.io/server/common/log/tag" "go.temporal.io/server/common/metrics" + commonnexus "go.temporal.io/server/common/nexus" "go.temporal.io/server/common/resource" "go.temporal.io/server/service/history/configs" "go.temporal.io/server/service/history/tasks" diff --git a/tests/standalone_activity_test.go b/tests/standalone_activity_test.go index e296c4845ef..7166e54d349 100644 --- a/tests/standalone_activity_test.go +++ b/tests/standalone_activity_test.go @@ -31,6 +31,7 @@ import ( "go.temporal.io/server/common/payload" "go.temporal.io/server/common/payloads" "go.temporal.io/server/common/tasktoken" + "go.temporal.io/server/common/testing/await" "go.temporal.io/server/common/testing/parallelsuite" "go.temporal.io/server/common/testing/protorequire" "go.temporal.io/server/tests/testcore" @@ -6417,10 +6418,10 @@ func (s *standaloneActivityTestSuite) TestDispatchCancelCommandToWorker() { require.NoError(t, err) var executeReq *workerservicepb.ExecuteCommandsRequest - require.Eventually(t, func() bool { + await.RequireTrue(t, func() bool { executeReq = pollNexusControlQueue() return executeReq != nil - }, 15*time.Second, 100*time.Millisecond, "cancel command not received on control queue") + }, 15*time.Second, 100*time.Millisecond) require.Len(t, executeReq.Commands, 1) cancelCmd := executeReq.Commands[0].GetCancelActivity() @@ -6460,10 +6461,10 @@ func (s *standaloneActivityTestSuite) TestDispatchCancelCommandToWorker() { require.NoError(t, err) var executeReq *workerservicepb.ExecuteCommandsRequest - require.Eventually(t, func() bool { + await.RequireTrue(t, func() bool { executeReq = pollNexusControlQueue() return executeReq != nil - }, 15*time.Second, 100*time.Millisecond, "cancel command not received on control queue after terminate") + }, 15*time.Second, 100*time.Millisecond) require.Len(t, executeReq.Commands, 1) cancelCmd := executeReq.Commands[0].GetCancelActivity()