update plans

comphead · comphead · commit 9bd5b807334a · 2026-04-23T17:36:07.000-07:00
diff --git a/native/core/src/execution/merge_as_partial.rs b/native/core/src/execution/merge_as_partial.rs
@@ -15,16 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! MergeAsPartial wrapper for implementing Spark's PartialMerge aggregate mode.
+//! Accumulator wrappers for implementing Spark's PartialMerge aggregate mode.
 //!
 //! Spark's PartialMerge mode merges intermediate state buffers and outputs intermediate
-//! state (not final values). DataFusion has no equivalent mode — `Partial` calls
-//! `update_batch` and outputs state, while `Final` calls `merge_batch` and outputs
-//! evaluated results.
+//! state (not final values). DataFusion's `PartialReduce` mode has the same semantics.
 //!
-//! This wrapper bridges the gap: it operates under DataFusion's `Partial` mode (which
-//! outputs state) but redirects `update_batch` calls to `merge_batch`, giving merge
-//! semantics with state output.
+//! For mixed-mode aggregates (some expressions PartialMerge, some Partial in the same
+//! operator), we use `PartialReduce` mode for the whole operator and wrap Partial
+//! expressions with `UpdateAsReduceUDF` to redirect `merge_batch → update_batch`.
 
 use std::any::Any;
 use std::fmt::Debug;
@@ -42,51 +40,38 @@ use datafusion::logical_expr::{
 use datafusion::physical_expr::aggregate::AggregateFunctionExpr;
 use datafusion::scalar::ScalarValue;
 
-/// An AggregateUDF wrapper that gives merge semantics in Partial mode.
+/// Wraps a Partial-mode aggregate to work inside a PartialReduce-mode AggregateExec.
 ///
-/// When DataFusion runs an AggregateExec in Partial mode, it calls `update_batch`
-/// on each accumulator and outputs `state()`. This wrapper intercepts `update_batch`
-/// and redirects it to `merge_batch` on the inner accumulator, effectively
-/// implementing PartialMerge: merge inputs, output state.
-///
-/// We store the inner AggregateUDF (not the AggregateFunctionExpr) to avoid keeping
-/// references to UnboundColumn expressions that would panic if evaluated.
+/// PartialReduce calls `merge_batch` on all accumulators. Partial expressions need
+/// `update_batch` semantics, so this wrapper redirects `merge_batch → update_batch`.
 #[derive(Debug)]
-pub struct MergeAsPartialUDF {
-    /// The inner aggregate UDF, cloned from the original expression.
+pub struct UpdateAsReduceUDF {
     inner_udf: AggregateUDF,
-    /// Pre-computed return type from the original expression.
     return_type: DataType,
-    /// Pre-computed state fields from the original expression.
     cached_state_fields: Vec<FieldRef>,
-    /// Cached signature that accepts state field types.
     signature: Signature,
-    /// Name for this wrapper.
     name: String,
 }
 
-impl PartialEq for MergeAsPartialUDF {
+impl PartialEq for UpdateAsReduceUDF {
     fn eq(&self, other: &Self) -> bool {
         self.name == other.name
     }
 }
 
-impl Eq for MergeAsPartialUDF {}
+impl Eq for UpdateAsReduceUDF {}
 
-impl Hash for MergeAsPartialUDF {
+impl Hash for UpdateAsReduceUDF {
     fn hash<H: Hasher>(&self, state: &mut H) {
         self.name.hash(state);
     }
 }
 
-impl MergeAsPartialUDF {
+impl UpdateAsReduceUDF {
     pub fn new(inner_expr: &AggregateFunctionExpr) -> Result<Self> {
-        let name = format!("merge_as_partial_{}", inner_expr.name());
+        let name = format!("update_as_reduce_{}", inner_expr.name());
         let return_type = inner_expr.field().data_type().clone();
         let cached_state_fields = inner_expr.state_fields()?;
-
-        // Use a permissive signature since we accept state field types which
-        // vary per aggregate function.
         let signature = Signature::variadic_any(Volatility::Immutable);
 
         Ok(Self {
@@ -99,7 +84,7 @@ impl MergeAsPartialUDF {
     }
 }
 
-impl AggregateUDFImpl for MergeAsPartialUDF {
+impl AggregateUDFImpl for UpdateAsReduceUDF {
     fn as_any(&self) -> &dyn Any {
         self
     }
@@ -113,23 +98,16 @@ impl AggregateUDFImpl for MergeAsPartialUDF {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        // In Partial mode, return_type isn't used for output schema (state_fields is).
-        // Return the inner function's return type for consistency.
         Ok(self.return_type.clone())
     }
 
     fn state_fields(&self, _args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
-        // State fields must match the inner aggregate's state fields so that
-        // the output of this PartialMerge stage is compatible with subsequent
-        // Final or PartialMerge stages.
         Ok(self.cached_state_fields.clone())
     }
 
     fn accumulator(&self, args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        // Create the inner accumulator using the provided args (which have the
-        // correct Column refs, not UnboundColumns).
         let inner_acc = self.inner_udf.accumulator(args)?;
-        Ok(Box::new(MergeAsPartialAccumulator { inner: inner_acc }))
+        Ok(Box::new(UpdateAsReduceAccumulator { inner: inner_acc }))
     }
 
     fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
@@ -141,7 +119,7 @@ impl AggregateUDFImpl for MergeAsPartialUDF {
         args: AccumulatorArgs,
     ) -> Result<Box<dyn GroupsAccumulator>> {
         let inner_acc = self.inner_udf.create_groups_accumulator(args)?;
-        Ok(Box::new(MergeAsPartialGroupsAccumulator {
+        Ok(Box::new(UpdateAsReduceGroupsAccumulator {
             inner: inner_acc,
         }))
     }
@@ -159,25 +137,23 @@ impl AggregateUDFImpl for MergeAsPartialUDF {
     }
 }
 
-/// Accumulator wrapper that redirects update_batch to merge_batch.
-struct MergeAsPartialAccumulator {
+struct UpdateAsReduceAccumulator {
     inner: Box<dyn Accumulator>,
 }
 
-impl Debug for MergeAsPartialAccumulator {
+impl Debug for UpdateAsReduceAccumulator {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("MergeAsPartialAccumulator").finish()
+        f.debug_struct("UpdateAsReduceAccumulator").finish()
     }
 }
 
-impl Accumulator for MergeAsPartialAccumulator {
+impl Accumulator for UpdateAsReduceAccumulator {
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        // Redirect update to merge — this is the key trick.
-        self.inner.merge_batch(values)
+        self.inner.update_batch(values)
     }
 
     fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        self.inner.merge_batch(states)
+        self.inner.update_batch(states)
     }
 
     fn evaluate(&mut self) -> Result<ScalarValue> {
@@ -193,28 +169,26 @@ impl Accumulator for MergeAsPartialAccumulator {
     }
 }
 
-/// GroupsAccumulator wrapper that redirects update_batch to merge_batch.
-struct MergeAsPartialGroupsAccumulator {
+struct UpdateAsReduceGroupsAccumulator {
     inner: Box<dyn GroupsAccumulator>,
 }
 
-impl Debug for MergeAsPartialGroupsAccumulator {
+impl Debug for UpdateAsReduceGroupsAccumulator {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("MergeAsPartialGroupsAccumulator").finish()
+        f.debug_struct("UpdateAsReduceGroupsAccumulator").finish()
     }
 }
 
-impl GroupsAccumulator for MergeAsPartialGroupsAccumulator {
+impl GroupsAccumulator for UpdateAsReduceGroupsAccumulator {
     fn update_batch(
         &mut self,
         values: &[ArrayRef],
         group_indices: &[usize],
         opt_filter: Option<&BooleanArray>,
         total_num_groups: usize,
     ) -> Result<()> {
-        // Redirect update to merge — this is the key trick.
         self.inner
-            .merge_batch(values, group_indices, opt_filter, total_num_groups)
+            .update_batch(values, group_indices, opt_filter, total_num_groups)
     }
 
     fn merge_batch(
@@ -225,7 +199,7 @@ impl GroupsAccumulator for MergeAsPartialGroupsAccumulator {
         total_num_groups: usize,
     ) -> Result<()> {
         self.inner
-            .merge_batch(values, group_indices, opt_filter, total_num_groups)
+            .update_batch(values, group_indices, opt_filter, total_num_groups)
     }
 
     fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs
@@ -974,16 +974,15 @@ impl PhysicalPlanner {
                 let mode = match agg.mode {
                     0 => {
                         if has_mixed_partial_merge {
-                            // Mixed {Partial, PartialMerge}: use Partial mode, wrap
-                            // PartialMerge expressions with MergeAsPartial.
-                            DFAggregateMode::Partial
+                            // Mixed {Partial, PartialMerge}: use PartialReduce so
+                            // PartialMerge expressions use native merge semantics.
+                            // Partial expressions are wrapped to redirect merge→update.
+                            DFAggregateMode::PartialReduce
                         } else {
                             DFAggregateMode::Partial
                         }
                     }
                     1 => DFAggregateMode::Final,
-                    // Uniform PartialMerge maps directly to DataFusion's PartialReduce
-                    // which has merge input + state output semantics.
                     2 => DFAggregateMode::PartialReduce,
                     other => {
                         return Err(ExecutionError::GeneralError(format!(
@@ -1003,8 +1002,10 @@ impl PhysicalPlanner {
                     .collect();
 
                 let aggr_expr: Vec<Arc<AggregateFunctionExpr>> = if has_partial_merge {
-                    // Wrap PartialMerge expressions with MergeAsPartial.
-                    // State fields in the child's output start at initial_input_buffer_offset.
+                    // Mixed {Partial, PartialMerge} mode uses PartialReduce so
+                    // PartialMerge expressions get native merge semantics.
+                    // Partial expressions need UpdateAsReduce wrappers to redirect
+                    // merge_batch → update_batch since PartialReduce calls merge_batch.
                     let mut state_offset = agg.initial_input_buffer_offset as usize;
                     let per_expr_modes: Vec<i32> = if !agg.expr_modes.is_empty() {
                         agg.expr_modes.clone()
@@ -1017,35 +1018,31 @@ impl PhysicalPlanner {
                         .enumerate()
                         .map(|(idx, expr)| {
                             if per_expr_modes[idx] == 2 {
-                                // PartialMerge: wrap with MergeAsPartial
-                                let state_fields = expr
+                                // PartialMerge: advance state_offset past this
+                                // expression's state fields (PartialReduce handles
+                                // merge natively via merge_expressions column refs).
+                                let num_state_fields = expr
                                     .state_fields()
-                                    .map_err(|e| ExecutionError::GeneralError(e.to_string()))?;
-                                let num_state_fields = state_fields.len();
-
-                                let state_cols: Vec<Arc<dyn PhysicalExpr>> = (0..num_state_fields)
-                                    .map(|i| {
-                                        let col_idx = state_offset + i;
-                                        let field = schema.field(col_idx);
-                                        Arc::new(Column::new(field.name(), col_idx))
-                                            as Arc<dyn PhysicalExpr>
-                                    })
-                                    .collect();
+                                    .map_err(|e| ExecutionError::GeneralError(e.to_string()))?
+                                    .len();
                                 state_offset += num_state_fields;
-
-                                let merge_udf =
-                                    crate::execution::merge_as_partial::MergeAsPartialUDF::new(
+                                Ok(Arc::new(expr))
+                            } else {
+                                // Partial: wrap with UpdateAsReduce so merge_batch
+                                // (called by PartialReduce) redirects to update_batch.
+                                let update_udf =
+                                    crate::execution::merge_as_partial::UpdateAsReduceUDF::new(
                                         &expr,
                                     )
                                     .map_err(|e| ExecutionError::DataFusionError(e.to_string()))?;
-                                let merge_udf_arc = Arc::new(
+                                let update_udf_arc = Arc::new(
                                     datafusion::logical_expr::AggregateUDF::new_from_impl(
-                                        merge_udf,
+                                        update_udf,
                                     ),
                                 );
 
                                 let merge_expr =
-                                    AggregateExprBuilder::new(merge_udf_arc, state_cols)
+                                    AggregateExprBuilder::new(update_udf_arc, expr.expressions())
                                         .schema(Arc::clone(&schema))
                                         .alias(format!("col_{idx}"))
                                         .with_ignore_nulls(expr.ignore_nulls())
@@ -1056,8 +1053,6 @@ impl PhysicalPlanner {
                                         })?;
 
                                 Ok(Arc::new(merge_expr))
-                            } else {
-                                Ok(Arc::new(expr))
                             }
                         })
                         .collect::<Result<Vec<_>, ExecutionError>>()?
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala
@@ -1432,20 +1432,12 @@ trait CometBaseAggregate {
       hashAggBuilder.addAllResultExprs(resultExprs.map(_.get).asJava)
       Some(builder.setHashAgg(hashAggBuilder).build())
     } else {
-      // Validate mode combinations. We support:
-      // - All Partial
-      // - All Final
-      // - All PartialMerge
-      // - Mixed {Partial, PartialMerge} (for distinct aggregate plans)
       val isMixedPartialMerge = modeSet == Set(Partial, PartialMerge)
       if (modes.size > 1 && !isMixedPartialMerge) {
         withInfo(aggregate, s"Unsupported mixed aggregation modes: ${modes.mkString(", ")}")
         return None
       }
 
-      // Determine the proto mode. For uniform modes, use that mode directly.
-      // For mixed {Partial, PartialMerge}, use Partial as the base mode since
-      // PartialMerge expressions are wrapped with MergeAsPartial on the native side.
       val mode = if (isMixedPartialMerge) {
         CometAggregateMode.Partial
       } else {