@@ -32,7 +32,7 @@ import org.apache.spark.sql.execution._
3232import org .apache .spark .sql .internal .SQLConf
3333
3434import org .apache .comet .CometConf ._
35- import org .apache .comet .rules .{CometExecRule , CometReuseSubquery , CometScanRule , EliminateRedundantTransitions }
35+ import org .apache .comet .rules .{CometExecRule , CometPlanAdaptiveDynamicPruningFilters , CometReuseSubquery , CometScanRule , CometSpark34AqeDppFallbackRule , EliminateRedundantTransitions }
3636import org .apache .comet .shims .ShimCometSparkSessionExtensions
3737
3838/**
@@ -43,34 +43,44 @@ import org.apache.comet.shims.ShimCometSparkSessionExtensions
4343 *
4444 * Non-AQE (QueryExecution.preparations):
4545 * {{{
46- * 1. PlanDynamicPruningFilters -- Spark creates DPP filters
46+ * 1. PlanDynamicPruningFilters -- Spark creates non-AQE DPP (SubqueryBroadcastExec)
4747 * 2. PlanSubqueries -- Spark creates SubqueryExec for scalar subqueries
4848 * 3. EnsureRequirements -- Spark inserts shuffles/sorts
4949 * 4. ApplyColumnarRulesAndInsertTransitions:
50- * a. preColumnarTransitions: CometScanRule, CometExecRule (replace Spark -> Comet nodes)
50+ * a. preColumnarTransitions: CometScanRule, CometExecRule
51+ * - CometExecRule.convertSubqueryBroadcasts converts SubqueryBroadcastExec to
52+ * CometSubqueryBroadcastExec for exchange reuse with Comet broadcasts
5153 * b. insertTransitions: ColumnarToRow/RowToColumnar added
5254 * c. postColumnarTransitions: EliminateRedundantTransitions
5355 * 5. ReuseExchangeAndSubquery -- Spark deduplicates subqueries (sees Comet nodes)
5456 * }}}
5557 *
56- * AQE (AdaptiveSparkPlanExec):
58+ * AQE (AdaptiveSparkPlanExec, Spark 3.5+ ):
5759 * {{{
5860 * Initial plan:
59- * queryStagePreparationRules: CometScanRule, CometExecRule (replace Spark -> Comet nodes)
61+ * PlanAdaptiveSubqueries: creates SubqueryAdaptiveBroadcastExec (SAB) for AQE DPP
62+ * queryStagePreparationRules: CometScanRule, CometExecRule
63+ * - CometExecRule.convertSubqueryBroadcasts wraps SABs in
64+ * CometSubqueryAdaptiveBroadcastExec to prevent Spark's
65+ * PlanAdaptiveDynamicPruningFilters from replacing DPP with Literal.TrueLiteral
6066 *
6167 * Per stage (optimizeQueryStage + postStageCreationRules):
62- * 1. queryStageOptimizerRules: ReuseAdaptiveSubquery, CometReuseSubquery
68+ * 1. queryStageOptimizerRules:
69+ * a. PlanAdaptiveDynamicPruningFilters (Spark) -- skips wrapped SABs
70+ * b. ReuseAdaptiveSubquery (Spark)
71+ * c. CometPlanAdaptiveDynamicPruningFilters -- converts wrapped SABs to
72+ * CometSubqueryBroadcastExec with BroadcastQueryStageExec for broadcast reuse
73+ * d. CometReuseSubquery -- deduplicates converted subqueries
6374 * 2. postStageCreationRules -> ApplyColumnarRulesAndInsertTransitions:
6475 * a. preColumnarTransitions: CometScanRule, CometExecRule (no-ops, already converted)
6576 * b. insertTransitions
6677 * c. postColumnarTransitions: EliminateRedundantTransitions
6778 * }}}
6879 *
69- * CometReuseSubquery is needed in AQE because Spark's ReuseAdaptiveSubquery may run before
70- * Comet's node replacements in the initial plan construction, and the replacements can disrupt
71- * subquery reuse that was already applied. The shim-based registration
72- * (injectQueryStageOptimizerRuleShim) handles API availability: Spark 3.5+ has
73- * injectQueryStageOptimizerRule, Spark 3.4 does not (no-op).
80+ * On Spark 3.4, injectQueryStageOptimizerRule is unavailable. CometExecRule does not wrap SABs,
81+ * and CometPlanAdaptiveDynamicPruningFilters/CometReuseSubquery are not registered. AQE DPP scans
82+ * fall back to Spark so that Spark's PlanAdaptiveDynamicPruningFilters handles them natively
83+ * (with DPP).
7484 */
7585class CometSparkSessionExtensions
7686 extends (SparkSessionExtensions => Unit )
@@ -79,8 +89,13 @@ class CometSparkSessionExtensions
7989 override def apply (extensions : SparkSessionExtensions ): Unit = {
8090 extensions.injectColumnar { session => CometScanColumnar (session) }
8191 extensions.injectColumnar { session => CometExecColumnar (session) }
92+ // Pre-3.5 only: tag AQE DPP regions so the conversion rules below leave them Spark-native.
93+ // Registered before CometScanRule/CometExecRule so tags are in place when conversion runs.
94+ // No-op on Spark 3.5+; see CometSpark34AqeDppFallbackRule's class docstring.
95+ injectPreSpark35QueryStagePrepRuleShim(extensions, CometSpark34AqeDppFallbackRule )
8296 extensions.injectQueryStagePrepRule { session => CometScanRule (session) }
8397 extensions.injectQueryStagePrepRule { session => CometExecRule (session) }
98+ injectQueryStageOptimizerRuleShim(extensions, CometPlanAdaptiveDynamicPruningFilters )
8499 injectQueryStageOptimizerRuleShim(extensions, CometReuseSubquery )
85100 }
86101
0 commit comments