diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_cte.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_cte.rs index 06f5a6f1e2208..97b166ea2b4df 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_cte.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_cte.rs @@ -171,6 +171,7 @@ impl Binder { output_columns, def: s_expr, column_mapping, + stat_info: None, }, ))); Ok((s_expr, new_bind_context)) diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index e3bc2cb1b9441..4354444d1185f 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -35,6 +35,7 @@ use crate::optimizer::optimizers::CascadesOptimizer; use crate::optimizer::optimizers::CommonSubexpressionOptimizer; use crate::optimizer::optimizers::DPhpyOptimizer; use crate::optimizer::optimizers::EliminateSelfJoinOptimizer; +use crate::optimizer::optimizers::SyncMaterializedCTERefOptimizer; use crate::optimizer::optimizers::distributed::BroadcastToShuffleOptimizer; use crate::optimizer::optimizers::operator::CleanupUnusedCTEOptimizer; use crate::optimizer::optimizers::operator::DeduplicateJoinConditionOptimizer; @@ -256,6 +257,12 @@ pub async fn optimize_query(opt_ctx: Arc, s_expr: SExpr) -> Re .add(RuleNormalizeAggregateOptimizer::new()) // Pull up and infer filter. .add(PullUpFilterOptimizer::new(opt_ctx.clone())) + // Common subexpression elimination optimization + // TODO(Sky): Currently uses heuristic approach, will be integrated into Cascades optimizer in the future. + .add_if( + settings.get_enable_cse_optimizer()?, + CommonSubexpressionOptimizer::new(opt_ctx.clone()), + ) // Run default rewrite rules .add(RecursiveRuleOptimizer::new( opt_ctx.clone(), @@ -263,6 +270,8 @@ pub async fn optimize_query(opt_ctx: Arc, s_expr: SExpr) -> Re )) // CTE filter pushdown optimization .add(CTEFilterPushdownOptimizer::new(opt_ctx.clone())) + // Sync CTE consumer statistics with the latest producer estimates after pushdown rewrites. + .add(SyncMaterializedCTERefOptimizer::new()) // Run post rewrite rules .add(RecursiveRuleOptimizer::new(opt_ctx.clone(), &[ RuleID::SplitAggregate, @@ -284,12 +293,6 @@ pub async fn optimize_query(opt_ctx: Arc, s_expr: SExpr) -> Re settings.get_force_eager_aggregate()?, RuleEagerAggregation::new(opt_ctx.get_metadata()), ) - // Common subexpression elimination optimization - // TODO(Sky): Currently uses heuristic approach, will be integrated into Cascades optimizer in the future. - .add_if( - settings.get_enable_cse_optimizer()?, - CommonSubexpressionOptimizer::new(opt_ctx.clone()), - ) // Cascades optimizer may fail due to timeout, fallback to heuristic optimizer in this case. .add(CascadesOptimizer::new(opt_ctx.clone())?) // Eliminate unnecessary scalar calculations to clean up the final plan diff --git a/src/query/sql/src/planner/optimizer/optimizers/cse/analyze.rs b/src/query/sql/src/planner/optimizer/optimizers/cse/analyze.rs index cb6d6d33202a9..7df0d50991f61 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/cse/analyze.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/cse/analyze.rs @@ -35,10 +35,21 @@ pub fn analyze_common_subexpression( } let signature_to_exprs = collect_table_signatures(s_expr, metadata); + let mut expr_groups = signature_to_exprs.into_values().collect::>(); + // Keep CSE materialization order deterministic by following the first + // occurrence of each candidate group in the plan tree. + expr_groups.sort_by(|lhs, rhs| lhs[0].0.cmp(&rhs[0].0)); let mut replacements = vec![]; let mut materialized_ctes = vec![]; - for exprs in signature_to_exprs.values() { - process_candidate_expressions(exprs, metadata, &mut replacements, &mut materialized_ctes)?; + let mut selected_paths = vec![]; + for exprs in &expr_groups { + process_candidate_expressions( + exprs, + metadata, + &mut replacements, + &mut materialized_ctes, + &mut selected_paths, + )?; } Ok((replacements, materialized_ctes)) } @@ -48,17 +59,22 @@ fn process_candidate_expressions( metadata: &mut Metadata, replacements: &mut Vec, materialized_ctes: &mut Vec, + selected_paths: &mut Vec>, ) -> Result<()> { + let candidates = candidates + .iter() + .filter(|(path, _)| { + !selected_paths + .iter() + .any(|selected| paths_overlap(path, selected)) + }) + .cloned() + .collect::>(); if candidates.len() < 2 { return Ok(()); } - let mut cte_def = candidates[0].1.clone(); - if let RelOperator::Scan(scan) = cte_def.plan.as_ref() { - let mut scan = scan.clone(); - scan.scan_id = metadata.next_scan_id(); - cte_def = SExpr::create_leaf(Arc::new(RelOperator::Scan(scan))); - } + let cte_def = refresh_scan_ids(&candidates[0].1, metadata)?; let cte_def = Arc::new(cte_def); let cte_def_columns = cte_def.derive_relational_prop()?.output_columns.clone(); @@ -83,6 +99,7 @@ fn process_candidate_expressions( output_columns: cte_ref_columns.iter().copied().collect(), def: expr.clone(), column_mapping, + stat_info: None, }; let cte_ref_expr = Arc::new(SExpr::create_leaf(Arc::new( RelOperator::MaterializedCTERef(cte_ref), @@ -91,10 +108,41 @@ fn process_candidate_expressions( path: path.clone(), new_expr: cte_ref_expr.clone(), }); + selected_paths.push(path); } Ok(()) } +#[recursive::recursive] +fn refresh_scan_ids(s_expr: &SExpr, metadata: &mut Metadata) -> Result { + let new_children = s_expr + .children() + .map(|child| refresh_scan_ids(child, metadata)) + .collect::>>()?; + + let mut result = if new_children + .iter() + .zip(s_expr.children()) + .any(|(new, old)| !new.eq(old)) + { + s_expr.replace_children(new_children.into_iter().map(Arc::new)) + } else { + s_expr.clone() + }; + + if let RelOperator::Scan(scan) = result.plan.as_ref() { + let mut scan = scan.clone(); + scan.scan_id = metadata.next_scan_id(); + result = result.replace_plan(Arc::new(RelOperator::Scan(scan))); + } + + Ok(result) +} + +fn paths_overlap(lhs: &[usize], rhs: &[usize]) -> bool { + lhs.starts_with(rhs) || rhs.starts_with(lhs) +} + fn contains_recursive_cte(expr: &SExpr) -> bool { if matches!(expr.plan(), RelOperator::RecursiveCteScan(_)) { return true; @@ -102,3 +150,167 @@ fn contains_recursive_cte(expr: &SExpr) -> bool { expr.children().any(contains_recursive_cte) } + +#[cfg(test)] +mod tests { + use std::any::Any; + + use databend_common_catalog::table::Table; + use databend_common_expression::TableDataType; + use databend_common_expression::TableField; + use databend_common_expression::TableSchema; + use databend_common_expression::types::NumberDataType; + use databend_common_meta_app::schema::CatalogInfo; + use databend_common_meta_app::schema::DatabaseType; + use databend_common_meta_app::schema::TableIdent; + use databend_common_meta_app::schema::TableInfo; + use databend_common_meta_app::schema::TableMeta; + + use super::*; + use crate::planner::metadata::Metadata; + use crate::plans::Join; + use crate::plans::JoinType; + use crate::plans::RelOperator; + use crate::plans::Scan; + + #[derive(Debug)] + struct FakeTable { + table_info: TableInfo, + } + + #[async_trait::async_trait] + impl Table for FakeTable { + fn as_any(&self) -> &dyn Any { + self + } + + fn get_table_info(&self) -> &TableInfo { + &self.table_info + } + + fn support_column_projection(&self) -> bool { + true + } + } + + fn fake_fuse_table(table_id: u64, table_name: &str) -> Arc { + Arc::new(FakeTable { + table_info: TableInfo { + ident: TableIdent::new(table_id, 0), + desc: format!("'default'.'{table_name}'"), + name: table_name.to_string(), + meta: TableMeta { + schema: Arc::new(TableSchema::new(vec![TableField::new( + "a", + TableDataType::Number(NumberDataType::UInt64), + )])), + engine: "FUSE".to_string(), + ..Default::default() + }, + catalog_info: Arc::new(CatalogInfo::default()), + db_type: DatabaseType::NormalDB, + }, + }) + } + + fn add_table(metadata: &mut Metadata, table: Arc) -> usize { + metadata.add_table( + "default".to_string(), + "default".to_string(), + table, + None, + None, + false, + false, + false, + None, + ) + } + + fn scan_expr(metadata: &Metadata, table_index: usize) -> SExpr { + let columns = metadata + .columns_by_table_index(table_index) + .into_iter() + .map(|column| column.index()) + .collect(); + SExpr::create_leaf(Arc::new(RelOperator::Scan(Scan { + table_index, + columns, + ..Default::default() + }))) + } + + fn cross_join_expr(left: SExpr, right: SExpr) -> SExpr { + SExpr::create_binary( + Arc::new(RelOperator::Join(Join { + join_type: JoinType::Cross, + ..Default::default() + })), + Arc::new(left), + Arc::new(right), + ) + } + + #[test] + fn test_analyze_common_subexpression_prefers_cross_join_subtree() { + let mut metadata = Metadata::default(); + let t1 = fake_fuse_table(1, "t1"); + let t2 = fake_fuse_table(2, "t2"); + + let t1_left = add_table(&mut metadata, t1.clone()); + let t2_left = add_table(&mut metadata, t2.clone()); + let t1_right = add_table(&mut metadata, t1); + let t2_right = add_table(&mut metadata, t2); + + let left = cross_join_expr(scan_expr(&metadata, t1_left), scan_expr(&metadata, t2_left)); + let right = cross_join_expr( + scan_expr(&metadata, t1_right), + scan_expr(&metadata, t2_right), + ); + let root = cross_join_expr(left, right); + + let (replacements, materialized_ctes) = + analyze_common_subexpression(&root, &mut metadata).unwrap(); + + assert_eq!(replacements.len(), 2); + assert_eq!(materialized_ctes.len(), 1); + + let cte_def = materialized_ctes[0].child(0).unwrap(); + let RelOperator::Join(join) = cte_def.plan() else { + panic!( + "expected cross join materialized cte, got {:?}", + cte_def.plan() + ); + }; + assert_eq!(join.join_type, JoinType::Cross); + } + + #[test] + fn test_analyze_common_subexpression_keeps_cross_join_operand_order() { + let mut metadata = Metadata::default(); + let t1 = fake_fuse_table(1, "t1"); + let t2 = fake_fuse_table(2, "t2"); + + let t1_left = add_table(&mut metadata, t1.clone()); + let t2_left = add_table(&mut metadata, t2.clone()); + let t1_right = add_table(&mut metadata, t1); + let t2_right = add_table(&mut metadata, t2); + + let left = cross_join_expr(scan_expr(&metadata, t1_left), scan_expr(&metadata, t2_left)); + let right = cross_join_expr( + scan_expr(&metadata, t2_right), + scan_expr(&metadata, t1_right), + ); + let root = cross_join_expr(left, right); + + let (_replacements, materialized_ctes) = + analyze_common_subexpression(&root, &mut metadata).unwrap(); + + assert_eq!(materialized_ctes.len(), 2); + assert!( + materialized_ctes + .iter() + .all(|cte| matches!(cte.child(0).unwrap().plan(), RelOperator::Scan(_))) + ); + } +} diff --git a/src/query/sql/src/planner/optimizer/optimizers/cse/table_signature.rs b/src/query/sql/src/planner/optimizer/optimizers/cse/table_signature.rs index c7a20619d0caa..6552d5399243c 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/cse/table_signature.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/cse/table_signature.rs @@ -12,18 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::BTreeSet; use std::collections::HashMap; use crate::ColumnEntry; use crate::IndexType; use crate::optimizer::ir::SExpr; use crate::planner::metadata::Metadata; +use crate::plans::Join; +use crate::plans::JoinType; use crate::plans::RelOperator; +use crate::plans::Scan; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct TableSignature { - pub tables: BTreeSet, + pub tables: Vec, } pub fn collect_table_signatures( @@ -41,43 +43,89 @@ fn collect_table_signatures_rec( path: &mut Vec, metadata: &Metadata, signature_to_exprs: &mut HashMap, SExpr)>>, -) { +) -> Option> { + let mut child_tables = Vec::with_capacity(expr.arity()); for (child_index, child) in expr.children().enumerate() { path.push(child_index); - collect_table_signatures_rec(child, path, metadata, signature_to_exprs); + child_tables.push(collect_table_signatures_rec( + child, + path, + metadata, + signature_to_exprs, + )); path.pop(); } - if let RelOperator::Scan(scan) = expr.plan.as_ref() { - let has_internal_column = scan.columns.iter().any(|column_index| { - let column = metadata.column(*column_index); - matches!(column, ColumnEntry::InternalColumn(_)) - }); - if has_internal_column - || scan.prewhere.is_some() - || scan.agg_index.is_some() - || scan.change_type.is_some() - || scan.update_stream_columns - || scan.inverted_index.is_some() - || scan.vector_index.is_some() - || scan.is_lazy_table - || scan.sample.is_some() - || scan.secure_predicates.is_some() + match expr.plan.as_ref() { + RelOperator::Scan(scan) => { + let table_id = scan_signature(scan, metadata)?; + let tables = vec![table_id]; + signature_to_exprs + .entry(TableSignature { + tables: tables.clone(), + }) + .or_default() + .push((path.clone(), expr.clone())); + Some(tables) + } + RelOperator::Join(join) + if is_supported_cross_join(join) + && child_tables.len() == 2 + && child_tables[0].is_some() + && child_tables[1].is_some() => { - return; + let mut tables = child_tables[0].clone().unwrap(); + tables.extend(child_tables[1].clone().unwrap()); + // Preserve operand order so side-swapped cross joins do not share a + // signature and get remapped positionally later. + signature_to_exprs + .entry(TableSignature { + tables: tables.clone(), + }) + .or_default() + .push((path.clone(), expr.clone())); + Some(tables) } + _ => None, + } +} - let table_entry = metadata.table(scan.table_index); - let table = table_entry.table(); - if table.engine() != "FUSE" { - return; - } +fn scan_signature(scan: &Scan, metadata: &Metadata) -> Option { + let has_internal_column = scan.columns.iter().any(|column_index| { + let column = metadata.column(*column_index); + matches!(column, ColumnEntry::InternalColumn(_)) + }); + if has_internal_column + || scan.prewhere.is_some() + || scan.agg_index.is_some() + || scan.change_type.is_some() + || scan.update_stream_columns + || scan.inverted_index.is_some() + || scan.vector_index.is_some() + || scan.is_lazy_table + || scan.sample.is_some() + || scan.secure_predicates.is_some() + { + return None; + } - let mut tables = BTreeSet::new(); - tables.insert(table.get_id() as IndexType); - signature_to_exprs - .entry(TableSignature { tables }) - .or_default() - .push((path.clone(), expr.clone())); + let table_entry = metadata.table(scan.table_index); + let table = table_entry.table(); + if table.engine() != "FUSE" { + return None; } + + Some(table.get_id() as IndexType) +} + +fn is_supported_cross_join(join: &Join) -> bool { + join.join_type == JoinType::Cross + && join.equi_conditions.is_empty() + && join.non_equi_conditions.is_empty() + && join.marker_index.is_none() + && !join.from_correlated_subquery + && !join.need_hold_hash_table + && !join.is_lateral + && join.single_to_inner.is_none() + && join.build_side_cache_info.is_none() } diff --git a/src/query/sql/src/planner/optimizer/optimizers/cte_filter_pushdown.rs b/src/query/sql/src/planner/optimizer/optimizers/cte_filter_pushdown.rs index 7904326298975..905528b1216c7 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/cte_filter_pushdown.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/cte_filter_pushdown.rs @@ -23,7 +23,6 @@ use crate::Symbol; use crate::optimizer::Optimizer; use crate::optimizer::OptimizerContext; use crate::optimizer::ir::SExpr; -use crate::optimizer::optimizers::operator::PullUpFilterOptimizer; use crate::optimizer::optimizers::recursive::RecursiveRuleOptimizer; use crate::optimizer::optimizers::rule::DEFAULT_REWRITE_RULES; use crate::plans::BoundColumnRef; @@ -35,7 +34,6 @@ use crate::plans::VisitorMut; pub struct CTEFilterPushdownOptimizer { cte_filters: HashMap>>, - pull_up_filter_optimizer: PullUpFilterOptimizer, rule_optimizer: RecursiveRuleOptimizer, } @@ -43,6 +41,145 @@ struct ColumnMappingRewriter { mapping: HashMap, } +#[derive(Default)] +struct PredicateDedupNormalizer; + +impl VisitorMut<'_> for PredicateDedupNormalizer { + fn visit_bound_column_ref(&mut self, col: &mut BoundColumnRef) -> Result<()> { + col.column.table_index = None; + Ok(()) + } +} + +fn dedup_normalized_predicate(predicate: &ScalarExpr) -> Result { + let mut normalized = predicate.clone(); + let mut normalizer = PredicateDedupNormalizer; + normalizer.visit(&mut normalized)?; + Ok(normalized) +} + +fn dedup_append_predicate(predicates: &mut Vec, predicate: ScalarExpr) -> Result<()> { + let normalized = dedup_normalized_predicate(&predicate)?; + let mut exists = false; + for current in predicates.iter() { + if dedup_normalized_predicate(current)? == normalized { + exists = true; + break; + } + } + + if !exists { + predicates.push(predicate); + } + + Ok(()) +} + +fn flatten_conjuncts(predicate: &ScalarExpr) -> Vec { + match predicate { + ScalarExpr::FunctionCall(func) + if matches!(func.func_name.as_str(), "and" | "and_filters") => + { + func.arguments.iter().flat_map(flatten_conjuncts).collect() + } + _ => vec![predicate.clone()], + } +} + +fn build_conjunction(predicates: Vec) -> Option { + predicates.into_iter().reduce(|acc, predicate| { + ScalarExpr::FunctionCall(FunctionCall { + span: Span::None, + func_name: "and".to_string(), + params: vec![], + arguments: vec![acc, predicate], + }) + }) +} + +fn build_disjunction(predicates: Vec) -> Option { + predicates.into_iter().reduce(|acc, predicate| { + ScalarExpr::FunctionCall(FunctionCall { + span: Span::None, + func_name: "or".to_string(), + params: vec![], + arguments: vec![acc, predicate], + }) + }) +} + +fn extract_common_conjuncts(predicates: &[ScalarExpr]) -> Result> { + if predicates.is_empty() { + return Ok(vec![]); + } + + let predicate_conjuncts = predicates.iter().map(flatten_conjuncts).collect::>(); + let normalized_conjuncts = predicate_conjuncts + .iter() + .map(|conjuncts| { + conjuncts + .iter() + .map(dedup_normalized_predicate) + .collect::>>() + }) + .collect::>>()?; + + let mut common_predicates = Vec::new(); + let mut common_normalized = Vec::new(); + for (predicate, normalized) in predicate_conjuncts[0] + .iter() + .zip(normalized_conjuncts[0].iter()) + { + if common_normalized + .iter() + .any(|current| current == normalized) + { + continue; + } + + if normalized_conjuncts + .iter() + .skip(1) + .all(|conjuncts| conjuncts.iter().any(|current| current == normalized)) + { + common_predicates.push(predicate.clone()); + common_normalized.push(normalized.clone()); + } + } + + let mut residual_predicates = Vec::new(); + let mut has_empty_residual = false; + for (conjuncts, normalized) in predicate_conjuncts.iter().zip(normalized_conjuncts.iter()) { + let residual_conjuncts = conjuncts + .iter() + .zip(normalized.iter()) + .filter_map(|(predicate, normalized)| { + (!common_normalized + .iter() + .any(|current| current == normalized)) + .then_some(predicate.clone()) + }) + .collect::>(); + + if residual_conjuncts.is_empty() { + has_empty_residual = true; + continue; + } + + if let Some(residual_predicate) = build_conjunction(residual_conjuncts) { + dedup_append_predicate(&mut residual_predicates, residual_predicate)?; + } + } + + if !has_empty_residual { + if let Some(residual_predicate) = build_disjunction(residual_predicates) { + common_predicates.push(residual_predicate); + } + } + + Ok(common_predicates) +} + impl VisitorMut<'_> for ColumnMappingRewriter { fn visit_bound_column_ref(&mut self, col: &mut BoundColumnRef) -> Result<()> { if let Some(&new_index) = self.mapping.get(&col.column.index) { @@ -54,11 +191,9 @@ impl VisitorMut<'_> for ColumnMappingRewriter { impl CTEFilterPushdownOptimizer { pub fn new(ctx: Arc) -> Self { - let pull_up_filter_optimizer = PullUpFilterOptimizer::new(ctx.clone()); let inner_optimizer = RecursiveRuleOptimizer::new(ctx.clone(), &DEFAULT_REWRITE_RULES); Self { cte_filters: HashMap::new(), - pull_up_filter_optimizer, rule_optimizer: inner_optimizer, } } @@ -91,7 +226,7 @@ impl CTEFilterPushdownOptimizer { match self.cte_filters.get_mut(&cte.cte_name) { Some(Some(predicates)) => { - predicates.push(and_predicate); + dedup_append_predicate(predicates, and_predicate)?; } Some(None) => { // Already marked as None, do nothing @@ -136,26 +271,11 @@ impl CTEFilterPushdownOptimizer { if let RelOperator::MaterializedCTE(cte) = s_expr.plan() { if let Some(Some(predicates)) = self.cte_filters.get(&cte.cte_name) { - if !predicates.is_empty() { + let pushdown_predicates = extract_common_conjuncts(predicates)?; + if !pushdown_predicates.is_empty() { log::info!("Pushing predicates to CTE {}", cte.cte_name); - let or_predicate = if predicates.len() == 1 { - predicates[0].clone() - } else { - predicates - .iter() - .skip(1) - .fold(predicates[0].clone(), |acc, pred| { - ScalarExpr::FunctionCall(FunctionCall { - span: Span::None, - func_name: "or".to_string(), - params: vec![], - arguments: vec![acc, pred.clone()], - }) - }) - }; - let filter = Filter { - predicates: vec![or_predicate], + predicates: pushdown_predicates, }; let filter_expr = SExpr::create_unary( @@ -185,17 +305,171 @@ impl Optimizer for CTEFilterPushdownOptimizer { let expr_with_filters = self.add_filters_to_ctes(s_expr)?; - let expr_with_pulled_up_filters = self - .pull_up_filter_optimizer - .optimize(&expr_with_filters) - .await?; - - self.rule_optimizer - .optimize(&expr_with_pulled_up_filters) - .await + self.rule_optimizer.optimize(&expr_with_filters).await } fn name(&self) -> String { "CTEFilterPushdownOptimizer".to_string() } } + +#[cfg(test)] +mod tests { + use databend_common_expression::Scalar; + use databend_common_expression::types::DataType; + + use super::*; + use crate::Symbol; + use crate::Visibility; + use crate::binder::ColumnBindingBuilder; + use crate::plans::BoundColumnRef; + use crate::plans::ConstantExpr; + + fn bound_column(index: usize, table_index: usize, name: &str) -> ScalarExpr { + BoundColumnRef { + span: None, + column: ColumnBindingBuilder::new( + name.to_string(), + Symbol::new(index), + Box::new(DataType::String), + Visibility::Visible, + ) + .table_index(Some(table_index)) + .build(), + } + .into() + } + + fn eq_string(index: usize, table_index: usize, name: &str, value: &str) -> ScalarExpr { + ScalarExpr::FunctionCall(FunctionCall { + span: Span::None, + func_name: "eq".to_string(), + params: vec![], + arguments: vec![ + bound_column(index, table_index, name), + ConstantExpr { + span: None, + value: Scalar::String(value.to_string()), + } + .into(), + ], + }) + } + + fn eq_columns( + left_index: usize, + left_table_index: usize, + left_name: &str, + right_index: usize, + right_table_index: usize, + right_name: &str, + ) -> ScalarExpr { + ScalarExpr::FunctionCall(FunctionCall { + span: Span::None, + func_name: "eq".to_string(), + params: vec![], + arguments: vec![ + bound_column(left_index, left_table_index, left_name), + bound_column(right_index, right_table_index, right_name), + ], + }) + } + + fn and(arguments: Vec) -> ScalarExpr { + arguments + .into_iter() + .reduce(|acc, arg| { + ScalarExpr::FunctionCall(FunctionCall { + span: Span::None, + func_name: "and".to_string(), + params: vec![], + arguments: vec![acc, arg], + }) + }) + .unwrap() + } + + fn or(arguments: Vec) -> ScalarExpr { + arguments + .into_iter() + .reduce(|acc, arg| { + ScalarExpr::FunctionCall(FunctionCall { + span: Span::None, + func_name: "or".to_string(), + params: vec![], + arguments: vec![acc, arg], + }) + }) + .unwrap() + } + + #[test] + fn test_dedup_append_predicate_preserves_first_occurrence() { + let predicate = eq_string(10, 1, "s_store_name", "ese"); + let mut predicates = vec![predicate.clone()]; + + dedup_append_predicate(&mut predicates, predicate.clone()).unwrap(); + dedup_append_predicate(&mut predicates, eq_string(11, 1, "s_city", "beijing")).unwrap(); + dedup_append_predicate(&mut predicates, predicate).unwrap(); + + assert_eq!(predicates.len(), 2); + assert_eq!(predicates[0], eq_string(10, 1, "s_store_name", "ese")); + assert_eq!(predicates[1], eq_string(11, 1, "s_city", "beijing")); + } + + #[test] + fn test_dedup_append_predicate_ignores_table_index_noise() { + let mut predicates = vec![eq_string(10, 1, "s_store_name", "ese")]; + + dedup_append_predicate(&mut predicates, eq_string(10, 2, "s_store_name", "ese")).unwrap(); + + assert_eq!(predicates.len(), 1); + } + + #[test] + fn test_extract_common_conjuncts_keeps_shared_predicates_outside_or() { + let join_predicate = eq_columns(0, 1, "ss_store_sk", 1, 2, "s_store_sk"); + let store_predicate = eq_string(2, 2, "s_store_name", "ese"); + let first_window = eq_string(3, 3, "t_hour", "8"); + let second_window = eq_string(3, 3, "t_hour", "9"); + + let result = extract_common_conjuncts(&[ + and(vec![ + join_predicate.clone(), + store_predicate.clone(), + first_window.clone(), + ]), + and(vec![ + join_predicate.clone(), + store_predicate.clone(), + second_window.clone(), + ]), + ]) + .unwrap(); + + assert_eq!(result, vec![ + join_predicate, + store_predicate, + or(vec![first_window, second_window]), + ]); + } + + #[test] + fn test_extract_common_conjuncts_drops_residual_or_when_branch_is_subset() { + let join_predicate = eq_columns(0, 1, "ss_store_sk", 1, 2, "s_store_sk"); + let store_predicate = eq_string(2, 2, "s_store_name", "ese"); + let window_predicate = eq_string(3, 3, "t_hour", "8"); + + let result = extract_common_conjuncts(&[ + and(vec![join_predicate.clone(), store_predicate.clone()]), + and(vec![ + join_predicate.clone(), + store_predicate.clone(), + window_predicate, + ]), + ]) + .unwrap(); + + assert_eq!(result, vec![join_predicate, store_predicate]); + } +} diff --git a/src/query/sql/src/planner/optimizer/optimizers/mod.rs b/src/query/sql/src/planner/optimizer/optimizers/mod.rs index 05620b7610a8e..720d1e3b3bfdb 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/mod.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/mod.rs @@ -21,9 +21,11 @@ mod hyper_dp; pub mod operator; pub mod recursive; pub mod rule; +mod sync_materialized_cte_ref; pub use cascades::CascadesOptimizer; pub use cse::CommonSubexpressionOptimizer; pub use cte_filter_pushdown::CTEFilterPushdownOptimizer; pub use eliminate_self_join::EliminateSelfJoinOptimizer; pub use hyper_dp::DPhpyOptimizer; pub use operator::CleanupUnusedCTEOptimizer; +pub use sync_materialized_cte_ref::SyncMaterializedCTERefOptimizer; diff --git a/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/rule_grouping_sets_to_union.rs b/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/rule_grouping_sets_to_union.rs index 330d3a9101879..097cdcf7c43ab 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/rule_grouping_sets_to_union.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/rule_grouping_sets_to_union.rs @@ -153,6 +153,7 @@ impl Rule for RuleGroupingSetsToUnion { output_columns: agg_input_columns.clone(), def: agg_input.clone(), column_mapping, + stat_info: None, }); let mask = (1 << grouping_sets.dup_group_items.len()) - 1; diff --git a/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/rule_hierarchical_grouping_sets.rs b/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/rule_hierarchical_grouping_sets.rs index a28616f20d37a..6087062f56fa7 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/rule_hierarchical_grouping_sets.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/rule_hierarchical_grouping_sets.rs @@ -464,6 +464,7 @@ impl RuleHierarchicalGroupingSetsToUnion { output_columns: agg_input_columns.to_vec(), // Will be populated based on original input def: agg_input.clone(), column_mapping: agg_input_columns.iter().map(|col| (*col, *col)).collect(), // Identity mapping + stat_info: None, } .into(), )); @@ -670,6 +671,7 @@ impl RuleHierarchicalGroupingSetsToUnion { output_columns: parent_output_columns, def: parent_cte.cte.child(0)?.clone(), column_mapping, + stat_info: None, } .into(), )); @@ -750,6 +752,7 @@ impl RuleHierarchicalGroupingSetsToUnion { output_columns: source_output_columns.to_vec(), def: source_cte.cte.child(0)?.clone(), column_mapping, + stat_info: None, }); // Apply grouping sets NULL semantics in EvalScalar diff --git a/src/query/sql/src/planner/optimizer/optimizers/sync_materialized_cte_ref.rs b/src/query/sql/src/planner/optimizer/optimizers/sync_materialized_cte_ref.rs new file mode 100644 index 0000000000000..9241475c12390 --- /dev/null +++ b/src/query/sql/src/planner/optimizer/optimizers/sync_materialized_cte_ref.rs @@ -0,0 +1,230 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::sync::Arc; + +use databend_common_exception::Result; + +use crate::optimizer::Optimizer; +use crate::optimizer::ir::RelExpr; +use crate::optimizer::ir::SExpr; +use crate::optimizer::ir::StatInfo; +use crate::optimizer::ir::Statistics; +use crate::plans::MaterializedCTERef; +use crate::plans::RelOperator; + +pub struct SyncMaterializedCTERefOptimizer { + cte_stats: HashMap>, +} + +impl Default for SyncMaterializedCTERefOptimizer { + fn default() -> Self { + Self::new() + } +} + +impl SyncMaterializedCTERefOptimizer { + pub fn new() -> Self { + Self { + cte_stats: HashMap::new(), + } + } + + pub fn optimize_sync(&mut self, s_expr: &SExpr) -> Result { + self.cte_stats.clear(); + self.collect_cte_stats(s_expr)?; + let (s_expr, _) = self.sync_cte_ref_stats(s_expr)?; + Ok(s_expr) + } + + #[recursive::recursive] + fn collect_cte_stats(&mut self, s_expr: &SExpr) -> Result<()> { + if let RelOperator::MaterializedCTE(cte) = s_expr.plan() { + let stat_info = RelExpr::with_s_expr(s_expr.child(0)?).derive_cardinality()?; + self.cte_stats.insert(cte.cte_name.clone(), stat_info); + } + + for child in s_expr.children() { + self.collect_cte_stats(child)?; + } + + Ok(()) + } + + fn remap_stat_info( + cte_ref: &MaterializedCTERef, + producer_stat_info: &Arc, + ) -> Arc { + let producer_to_ref = cte_ref + .column_mapping + .iter() + .map(|(ref_col, producer_col)| (*producer_col, *ref_col)) + .collect::>(); + let column_stats = producer_stat_info + .statistics + .column_stats + .iter() + .filter_map(|(producer_col, stat)| { + producer_to_ref + .get(producer_col) + .map(|ref_col| (*ref_col, stat.clone())) + }) + .collect(); + + Arc::new(StatInfo { + cardinality: producer_stat_info.cardinality, + statistics: Statistics { + precise_cardinality: producer_stat_info.statistics.precise_cardinality, + column_stats, + }, + }) + } + + #[recursive::recursive] + fn sync_cte_ref_stats(&self, s_expr: &SExpr) -> Result<(SExpr, bool)> { + let mut changed = false; + let mut new_children = Vec::with_capacity(s_expr.arity()); + for child in s_expr.children() { + let (new_child, child_changed) = self.sync_cte_ref_stats(child)?; + changed |= child_changed; + new_children.push(Arc::new(new_child)); + } + + let mut result = if changed { + s_expr.replace_children(new_children) + } else { + s_expr.clone() + }; + + if let RelOperator::MaterializedCTERef(cte_ref) = result.plan() { + if let Some(producer_stat_info) = self.cte_stats.get(&cte_ref.cte_name) { + let mut new_cte_ref = cte_ref.clone(); + new_cte_ref.stat_info = Some(Self::remap_stat_info(cte_ref, producer_stat_info)); + result = + result.replace_plan(Arc::new(RelOperator::MaterializedCTERef(new_cte_ref))); + changed = true; + } + } + + Ok((result, changed)) + } +} + +#[async_trait::async_trait] +impl Optimizer for SyncMaterializedCTERefOptimizer { + fn name(&self) -> String { + "SyncMaterializedCTERefOptimizer".to_string() + } + + async fn optimize(&mut self, s_expr: &SExpr) -> Result { + self.optimize_sync(s_expr) + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + use std::sync::Arc; + + use databend_common_expression::Scalar; + + use super::*; + use crate::plans::ConstantExpr; + use crate::plans::DummyTableScan; + use crate::plans::Filter; + use crate::plans::MaterializedCTE; + use crate::plans::MaterializedCTERef; + use crate::plans::Sequence; + + fn bool_constant(value: bool) -> crate::ScalarExpr { + ConstantExpr { + span: None, + value: Scalar::Boolean(value), + } + .into() + } + + #[test] + fn test_sync_materialized_cte_ref_updates_consumer_stats() { + let old_def = SExpr::create_leaf(DummyTableScan::new()); + let new_def = SExpr::create_unary( + Filter { + predicates: vec![bool_constant(false)], + }, + Arc::new(old_def.clone()), + ); + + let producer = SExpr::create_unary( + MaterializedCTE::new("cte".to_string(), None), + Arc::new(new_def), + ); + let consumer = SExpr::create_leaf(RelOperator::MaterializedCTERef(MaterializedCTERef { + cte_name: "cte".to_string(), + output_columns: vec![], + def: old_def.clone(), + column_mapping: HashMap::new(), + stat_info: None, + })); + let query = SExpr::create_unary( + Filter { + predicates: vec![bool_constant(false)], + }, + Arc::new(consumer), + ); + let root = SExpr::create_binary(Sequence, Arc::new(producer), Arc::new(query)); + + let optimized = SyncMaterializedCTERefOptimizer::new() + .optimize_sync(&root) + .unwrap(); + + let query = optimized.child(1).unwrap(); + let consumer = query.child(0).unwrap(); + let RelOperator::MaterializedCTERef(cte_ref) = consumer.plan() else { + panic!("expected materialized cte ref"); + }; + + assert_eq!(cte_ref.def, old_def); + assert_eq!(cte_ref.stat_info.as_ref().unwrap().cardinality, 0.0); + } + + #[test] + fn test_sync_materialized_cte_ref_keeps_unmatched_consumer_stats() { + let old_def = SExpr::create_leaf(DummyTableScan::new()); + let producer = SExpr::create_unary( + MaterializedCTE::new("cte".to_string(), None), + Arc::new(old_def.clone()), + ); + let consumer = SExpr::create_leaf(RelOperator::MaterializedCTERef(MaterializedCTERef { + cte_name: "other_cte".to_string(), + output_columns: vec![], + def: old_def.clone(), + column_mapping: HashMap::new(), + stat_info: None, + })); + let root = SExpr::create_binary(Sequence, Arc::new(producer), Arc::new(consumer)); + + let optimized = SyncMaterializedCTERefOptimizer::new() + .optimize_sync(&root) + .unwrap(); + + let consumer = optimized.child(1).unwrap(); + let RelOperator::MaterializedCTERef(cte_ref) = consumer.plan() else { + panic!("expected materialized cte ref"); + }; + + assert_eq!(cte_ref.def, old_def); + assert!(cte_ref.stat_info.is_none()); + } +} diff --git a/src/query/sql/src/planner/plans/cte_consumer.rs b/src/query/sql/src/planner/plans/cte_consumer.rs index 30ecdacd4e136..918555867f865 100644 --- a/src/query/sql/src/planner/plans/cte_consumer.rs +++ b/src/query/sql/src/planner/plans/cte_consumer.rs @@ -32,14 +32,26 @@ use crate::optimizer::ir::StatInfo; use crate::plans::Operator; use crate::plans::RelOp; -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug)] pub struct MaterializedCTERef { pub cte_name: String, pub output_columns: Vec, pub def: SExpr, pub column_mapping: HashMap, + pub stat_info: Option>, } +impl PartialEq for MaterializedCTERef { + fn eq(&self, other: &Self) -> bool { + self.cte_name == other.cte_name + && self.output_columns == other.output_columns + && self.def == other.def + && self.column_mapping == other.column_mapping + } +} + +impl Eq for MaterializedCTERef {} + impl Hash for MaterializedCTERef { fn hash(&self, state: &mut H) { self.cte_name.hash(state); @@ -58,6 +70,9 @@ impl Operator for MaterializedCTERef { /// Derive statistics information fn derive_stats(&self, _rel_expr: &RelExpr) -> Result> { + if let Some(stat_info) = &self.stat_info { + return Ok(stat_info.clone()); + } RelExpr::with_s_expr(&self.def).derive_cardinality() } diff --git a/tests/sqllogictests/suites/mode/standalone/explain/common_subexpression_optimizer.test b/tests/sqllogictests/suites/mode/standalone/explain/common_subexpression_optimizer.test index e3242cc5ed696..76060b108afcc 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/common_subexpression_optimizer.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/common_subexpression_optimizer.test @@ -68,3 +68,612 @@ on t.b = agg.b; statement ok drop table cse_t; + +statement ok +create or replace table cse_left as +select number as a +from numbers(3); + +statement ok +create or replace table cse_right as +select number as b +from numbers(3); + +query T nosort +explain +SELECT * +FROM + (SELECT count(*) q1 + FROM cse_left, + cse_right + WHERE cse_left.a = cse_right.b + AND cse_left.a < 1) s1, + (SELECT count(*) q2 + FROM cse_left, + cse_right + WHERE cse_left.a = cse_right.b + AND cse_left.a < 2) s2; +---- +Sequence +├── MaterializedCTE: cte_cse_0 +│ └── Filter +│ ├── output columns: [cse_left.a (#0), cse_right.b (#1)] +│ ├── filters: [cse_left.a (#0) < 1 and cse_right.b (#1) < 1 or cse_left.a (#0) < 2 and cse_right.b (#1) < 2] +│ ├── estimated rows: 1.81 +│ └── HashJoin +│ ├── output columns: [cse_left.a (#0), cse_right.b (#1)] +│ ├── join type: INNER +│ ├── build keys: [cse_right.b (#1)] +│ ├── probe keys: [cse_left.a (#0)] +│ ├── keys is null equal: [false] +│ ├── filters: [] +│ ├── build join filters: +│ │ └── filter id:0, build key:cse_right.b (#1), probe targets:[cse_left.a (#0)@scan0], filter type:bloom,inlist,min_max +│ ├── estimated rows: 2.33 +│ ├── Filter(Build) +│ │ ├── output columns: [cse_right.b (#1)] +│ │ ├── filters: [cse_right.b (#1) < 1 or cse_right.b (#1) < 2] +│ │ ├── estimated rows: 2.33 +│ │ └── TableScan +│ │ ├── table: default.default.cse_right +│ │ ├── scan id: 1 +│ │ ├── output columns: [b (#1)] +│ │ ├── read rows: 3 +│ │ ├── read size: < 1 KiB +│ │ ├── partitions total: 1 +│ │ ├── partitions scanned: 1 +│ │ ├── pruning stats: [segments: >, blocks: >] +│ │ ├── push downs: [filters: [cse_right.b (#1) < 1 or cse_right.b (#1) < 2], limit: NONE] +│ │ └── estimated rows: 3.00 +│ └── Filter(Probe) +│ ├── output columns: [cse_left.a (#0)] +│ ├── filters: [cse_left.a (#0) < 1 or cse_left.a (#0) < 2] +│ ├── estimated rows: 2.33 +│ └── TableScan +│ ├── table: default.default.cse_left +│ ├── scan id: 0 +│ ├── output columns: [a (#0)] +│ ├── read rows: 3 +│ ├── read size: < 1 KiB +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: >, blocks: >] +│ ├── push downs: [filters: [cse_left.a (#0) < 1 or cse_left.a (#0) < 2], limit: NONE] +│ ├── apply join filters: [#0] +│ └── estimated rows: 3.00 +└── HashJoin + ├── output columns: [COUNT(*) (#2), COUNT(*) (#5)] + ├── join type: CROSS + ├── build keys: [] + ├── probe keys: [] + ├── keys is null equal: [] + ├── filters: [] + ├── estimated rows: 1.00 + ├── AggregateFinal(Build) + │ ├── output columns: [COUNT(*) (#5)] + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── AggregatePartial + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── Filter + │ ├── output columns: [] + │ ├── filters: [is_true(cse_left.a (#3) < 2), is_true(cse_right.b (#4) < 2), is_true(cse_left.a (#3) = cse_right.b (#4))] + │ ├── estimated rows: 0.36 + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [a (#3), b (#4)] + │ └── estimated rows: 1.81 + └── AggregateFinal(Probe) + ├── output columns: [COUNT(*) (#2)] + ├── group by: [] + ├── aggregate functions: [count()] + ├── estimated rows: 1.00 + └── AggregatePartial + ├── group by: [] + ├── aggregate functions: [count()] + ├── estimated rows: 1.00 + └── Filter + ├── output columns: [] + ├── filters: [is_true(cse_left.a (#0) < 1), is_true(cse_right.b (#1) < 1), is_true(cse_left.a (#0) = cse_right.b (#1))] + ├── estimated rows: 0.36 + └── MaterializeCTERef + ├── cte_name: cte_cse_0 + ├── cte_schema: [a (#0), b (#1)] + └── estimated rows: 1.81 + +statement ok +drop table cse_left; + +statement ok +drop table cse_right; + +# Simulate TPC-DS Q88 and verify the common subexpression optimizer plan. +statement ok +create or replace table store_sales ( + ss_sold_time_sk int, + ss_hdemo_sk int, + ss_store_sk int +); + +statement ok +create or replace table household_demographics ( + hd_demo_sk int, + hd_dep_count int, + hd_vehicle_count int +); + +statement ok +create or replace table time_dim ( + t_time_sk int, + t_hour int, + t_minute int +); + +statement ok +create or replace table store ( + s_store_sk int, + s_store_name string +); + +statement ok +insert into household_demographics values + (1, 4, 6), + (2, 2, 4), + (3, 0, 2), + (4, 1, 5); + +statement ok +insert into time_dim values + (1, 8, 30), + (2, 9, 0), + (3, 9, 30), + (4, 10, 0), + (5, 10, 30), + (6, 11, 0), + (7, 11, 30), + (8, 12, 0), + (9, 12, 45); + +statement ok +insert into store values + (1, 'ese'), + (2, 'other'); + +statement ok +insert into store_sales +select + t.t_time_sk as ss_sold_time_sk, + h.hd_demo_sk as ss_hdemo_sk, + 1 as ss_store_sk +from time_dim t +cross join household_demographics h; + +query T nosort +explain +SELECT * +FROM + (SELECT count(*) h8_30_to_9 + FROM store_sales, + household_demographics, + time_dim, + store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 8 + AND time_dim.t_minute >= 30 + AND ((household_demographics.hd_dep_count = 4 + AND household_demographics.hd_vehicle_count <= 4 + 2) + OR (household_demographics.hd_dep_count = 2 + AND household_demographics.hd_vehicle_count <= 2 + 2) + OR (household_demographics.hd_dep_count = 0 + AND household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s1, + (SELECT count(*) h9_to_9_30 + FROM store_sales, + household_demographics, + time_dim, + store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 9 + AND time_dim.t_minute < 30 + AND ((household_demographics.hd_dep_count = 4 + AND household_demographics.hd_vehicle_count <= 4 + 2) + OR (household_demographics.hd_dep_count = 2 + AND household_demographics.hd_vehicle_count <= 2 + 2) + OR (household_demographics.hd_dep_count = 0 + AND household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s2, + (SELECT count(*) h9_30_to_10 + FROM store_sales, + household_demographics, + time_dim, + store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 9 + AND time_dim.t_minute >= 30 + AND ((household_demographics.hd_dep_count = 4 + AND household_demographics.hd_vehicle_count <= 4 + 2) + OR (household_demographics.hd_dep_count = 2 + AND household_demographics.hd_vehicle_count <= 2 + 2) + OR (household_demographics.hd_dep_count = 0 + AND household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s3, + (SELECT count(*) h10_to_10_30 + FROM store_sales, + household_demographics, + time_dim, + store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 10 + AND time_dim.t_minute < 30 + AND ((household_demographics.hd_dep_count = 4 + AND household_demographics.hd_vehicle_count <= 4 + 2) + OR (household_demographics.hd_dep_count = 2 + AND household_demographics.hd_vehicle_count <= 2 + 2) + OR (household_demographics.hd_dep_count = 0 + AND household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s4, + (SELECT count(*) h10_30_to_11 + FROM store_sales, + household_demographics, + time_dim, + store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 10 + AND time_dim.t_minute >= 30 + AND ((household_demographics.hd_dep_count = 4 + AND household_demographics.hd_vehicle_count <= 4 + 2) + OR (household_demographics.hd_dep_count = 2 + AND household_demographics.hd_vehicle_count <= 2 + 2) + OR (household_demographics.hd_dep_count = 0 + AND household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s5, + (SELECT count(*) h11_to_11_30 + FROM store_sales, + household_demographics, + time_dim, + store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 11 + AND time_dim.t_minute < 30 + AND ((household_demographics.hd_dep_count = 4 + AND household_demographics.hd_vehicle_count <= 4 + 2) + OR (household_demographics.hd_dep_count = 2 + AND household_demographics.hd_vehicle_count <= 2 + 2) + OR (household_demographics.hd_dep_count = 0 + AND household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s6, + (SELECT count(*) h11_30_to_12 + FROM store_sales, + household_demographics, + time_dim, + store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 11 + AND time_dim.t_minute >= 30 + AND ((household_demographics.hd_dep_count = 4 + AND household_demographics.hd_vehicle_count <= 4 + 2) + OR (household_demographics.hd_dep_count = 2 + AND household_demographics.hd_vehicle_count <= 2 + 2) + OR (household_demographics.hd_dep_count = 0 + AND household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s7, + (SELECT count(*) h12_to_12_30 + FROM store_sales, + household_demographics, + time_dim, + store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 12 + AND time_dim.t_minute < 30 + AND ((household_demographics.hd_dep_count = 4 + AND household_demographics.hd_vehicle_count <= 4 + 2) + OR (household_demographics.hd_dep_count = 2 + AND household_demographics.hd_vehicle_count <= 2 + 2) + OR (household_demographics.hd_dep_count = 0 + AND household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s8; +---- +Sequence +├── MaterializedCTE: cte_cse_0 +│ └── HashJoin +│ ├── output columns: [store_sales.ss_sold_time_sk (#0), store_sales.ss_hdemo_sk (#1), store_sales.ss_store_sk (#2), household_demographics.hd_dep_count (#4), household_demographics.hd_vehicle_count (#5), household_demographics.hd_demo_sk (#3), time_dim.t_hour (#7), time_dim.t_minute (#8), time_dim.t_time_sk (#6), store.s_store_name (#10), store.s_store_sk (#9)] +│ ├── join type: INNER +│ ├── build keys: [store.s_store_sk (#9)] +│ ├── probe keys: [store_sales.ss_store_sk (#2)] +│ ├── keys is null equal: [false] +│ ├── filters: [] +│ ├── build join filters: +│ │ └── filter id:2, build key:store.s_store_sk (#9), probe targets:[store_sales.ss_store_sk (#2)@scan0], filter type:bloom,inlist,min_max +│ ├── estimated rows: 17.32 +│ ├── TableScan(Build) +│ │ ├── table: default.default.store +│ │ ├── scan id: 3 +│ │ ├── output columns: [s_store_sk (#9), s_store_name (#10)] +│ │ ├── read rows: 2 +│ │ ├── read size: < 1 KiB +│ │ ├── partitions total: 1 +│ │ ├── partitions scanned: 1 +│ │ ├── pruning stats: [segments: >, blocks: , bloom pruning: >] +│ │ ├── push downs: [filters: [is_true(store.s_store_name (#10) = 'ese')], limit: NONE] +│ │ └── estimated rows: 1.00 +│ └── HashJoin(Probe) +│ ├── output columns: [store_sales.ss_sold_time_sk (#0), store_sales.ss_hdemo_sk (#1), store_sales.ss_store_sk (#2), household_demographics.hd_dep_count (#4), household_demographics.hd_vehicle_count (#5), household_demographics.hd_demo_sk (#3), time_dim.t_hour (#7), time_dim.t_minute (#8), time_dim.t_time_sk (#6)] +│ ├── join type: INNER +│ ├── build keys: [time_dim.t_time_sk (#6)] +│ ├── probe keys: [store_sales.ss_sold_time_sk (#0)] +│ ├── keys is null equal: [false] +│ ├── filters: [] +│ ├── build join filters: +│ │ └── filter id:1, build key:time_dim.t_time_sk (#6), probe targets:[store_sales.ss_sold_time_sk (#0)@scan0], filter type:bloom,inlist,min_max +│ ├── estimated rows: 17.32 +│ ├── Filter(Build) +│ │ ├── output columns: [time_dim.t_time_sk (#6), time_dim.t_hour (#7), time_dim.t_minute (#8)] +│ │ ├── filters: [time_dim.t_hour (#7) = 8 and time_dim.t_minute (#8) >= 30 or time_dim.t_hour (#7) = 9 and time_dim.t_minute (#8) < 30 or time_dim.t_hour (#7) = 9 and time_dim.t_minute (#8) >= 30 or time_dim.t_hour (#7) = 10 and time_dim.t_minute (#8) < 30 or time_dim.t_hour (#7) = 10 and time_dim.t_minute (#8) >= 30 or time_dim.t_hour (#7) = 11 and time_dim.t_minute (#8) < 30 or time_dim.t_hour (#7) = 11 and time_dim.t_minute (#8) >= 30 or time_dim.t_hour (#7) = 12 and time_dim.t_minute (#8) < 30] +│ │ ├── estimated rows: 7.49 +│ │ └── TableScan +│ │ ├── table: default.default.time_dim +│ │ ├── scan id: 2 +│ │ ├── output columns: [t_time_sk (#6), t_hour (#7), t_minute (#8)] +│ │ ├── read rows: 9 +│ │ ├── read size: < 1 KiB +│ │ ├── partitions total: 1 +│ │ ├── partitions scanned: 1 +│ │ ├── pruning stats: [segments: >, blocks: , bloom pruning: >] +│ │ ├── push downs: [filters: [time_dim.t_hour (#7) = 8 and time_dim.t_minute (#8) >= 30 or time_dim.t_hour (#7) = 9 and time_dim.t_minute (#8) < 30 or time_dim.t_hour (#7) = 9 and time_dim.t_minute (#8) >= 30 or time_dim.t_hour (#7) = 10 and time_dim.t_minute (#8) < 30 or time_dim.t_hour (#7) = 10 and time_dim.t_minute (#8) >= 30 or time_dim.t_hour (#7) = 11 and time_dim.t_minute (#8) < 30 or time_dim.t_hour (#7) = 11 and time_dim.t_minute (#8) >= 30 or time_dim.t_hour (#7) = 12 and time_dim.t_minute (#8) < 30], limit: NONE] +│ │ └── estimated rows: 9.00 +│ └── HashJoin(Probe) +│ ├── output columns: [store_sales.ss_sold_time_sk (#0), store_sales.ss_hdemo_sk (#1), store_sales.ss_store_sk (#2), household_demographics.hd_dep_count (#4), household_demographics.hd_vehicle_count (#5), household_demographics.hd_demo_sk (#3)] +│ ├── join type: INNER +│ ├── build keys: [household_demographics.hd_demo_sk (#3)] +│ ├── probe keys: [store_sales.ss_hdemo_sk (#1)] +│ ├── keys is null equal: [false] +│ ├── filters: [] +│ ├── build join filters: +│ │ └── filter id:0, build key:household_demographics.hd_demo_sk (#3), probe targets:[store_sales.ss_hdemo_sk (#1)@scan0], filter type:bloom,inlist,min_max +│ ├── estimated rows: 20.81 +│ ├── TableScan(Build) +│ │ ├── table: default.default.household_demographics +│ │ ├── scan id: 1 +│ │ ├── output columns: [hd_demo_sk (#3), hd_dep_count (#4), hd_vehicle_count (#5)] +│ │ ├── read rows: 4 +│ │ ├── read size: < 1 KiB +│ │ ├── partitions total: 1 +│ │ ├── partitions scanned: 1 +│ │ ├── pruning stats: [segments: >, blocks: , bloom pruning: >] +│ │ ├── push downs: [filters: [household_demographics.hd_dep_count (#4) = 4 and household_demographics.hd_vehicle_count (#5) <= 6 or household_demographics.hd_dep_count (#4) = 2 and household_demographics.hd_vehicle_count (#5) <= 4 or household_demographics.hd_dep_count (#4) = 0 and household_demographics.hd_vehicle_count (#5) <= 2], limit: NONE] +│ │ └── estimated rows: 2.31 +│ └── TableScan(Probe) +│ ├── table: default.default.store_sales +│ ├── scan id: 0 +│ ├── output columns: [ss_sold_time_sk (#0), ss_hdemo_sk (#1), ss_store_sk (#2)] +│ ├── read rows: 36 +│ ├── read size: < 1 KiB +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: >, blocks: >] +│ ├── push downs: [filters: [], limit: NONE] +│ ├── apply join filters: [#2, #1, #0] +│ └── estimated rows: 36.00 +└── HashJoin + ├── output columns: [COUNT(*) (#11), COUNT(*) (#23), COUNT(*) (#35), COUNT(*) (#47), COUNT(*) (#59), COUNT(*) (#71), COUNT(*) (#83), COUNT(*) (#95)] + ├── join type: CROSS + ├── build keys: [] + ├── probe keys: [] + ├── keys is null equal: [] + ├── filters: [] + ├── estimated rows: 1.00 + ├── AggregateFinal(Build) + │ ├── output columns: [COUNT(*) (#95)] + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── AggregatePartial + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── Filter + │ ├── output columns: [] + │ ├── filters: [is_true(time_dim.t_hour (#91) = 12), is_true(time_dim.t_minute (#92) < 30), is_true(store_sales.ss_sold_time_sk (#84) = time_dim.t_time_sk (#90)), is_true(store_sales.ss_hdemo_sk (#85) = household_demographics.hd_demo_sk (#87)), is_true(store_sales.ss_store_sk (#86) = store.s_store_sk (#93)), household_demographics.hd_dep_count (#88) = 4 and household_demographics.hd_vehicle_count (#89) <= 6 or household_demographics.hd_dep_count (#88) = 2 and household_demographics.hd_vehicle_count (#89) <= 4 or household_demographics.hd_dep_count (#88) = 0 and household_demographics.hd_vehicle_count (#89) <= 2, is_true(store.s_store_name (#94) = 'ese')] + │ ├── estimated rows: 3.46 + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [ss_sold_time_sk (#84), ss_hdemo_sk (#85), ss_store_sk (#86), hd_demo_sk (#87), hd_dep_count (#88), hd_vehicle_count (#89), t_time_sk (#90), t_hour (#91), t_minute (#92), s_store_sk (#93), s_store_name (#94)] + │ └── estimated rows: 17.32 + └── HashJoin(Probe) + ├── output columns: [COUNT(*) (#11), COUNT(*) (#23), COUNT(*) (#35), COUNT(*) (#47), COUNT(*) (#59), COUNT(*) (#71), COUNT(*) (#83)] + ├── join type: CROSS + ├── build keys: [] + ├── probe keys: [] + ├── keys is null equal: [] + ├── filters: [] + ├── estimated rows: 1.00 + ├── AggregateFinal(Build) + │ ├── output columns: [COUNT(*) (#83)] + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── AggregatePartial + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── Filter + │ ├── output columns: [] + │ ├── filters: [is_true(time_dim.t_hour (#79) = 11), is_true(time_dim.t_minute (#80) >= 30), is_true(store_sales.ss_sold_time_sk (#72) = time_dim.t_time_sk (#78)), is_true(store_sales.ss_hdemo_sk (#73) = household_demographics.hd_demo_sk (#75)), is_true(store_sales.ss_store_sk (#74) = store.s_store_sk (#81)), household_demographics.hd_dep_count (#76) = 4 and household_demographics.hd_vehicle_count (#77) <= 6 or household_demographics.hd_dep_count (#76) = 2 and household_demographics.hd_vehicle_count (#77) <= 4 or household_demographics.hd_dep_count (#76) = 0 and household_demographics.hd_vehicle_count (#77) <= 2, is_true(store.s_store_name (#82) = 'ese')] + │ ├── estimated rows: 3.46 + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [ss_sold_time_sk (#72), ss_hdemo_sk (#73), ss_store_sk (#74), hd_demo_sk (#75), hd_dep_count (#76), hd_vehicle_count (#77), t_time_sk (#78), t_hour (#79), t_minute (#80), s_store_sk (#81), s_store_name (#82)] + │ └── estimated rows: 17.32 + └── HashJoin(Probe) + ├── output columns: [COUNT(*) (#11), COUNT(*) (#23), COUNT(*) (#35), COUNT(*) (#47), COUNT(*) (#59), COUNT(*) (#71)] + ├── join type: CROSS + ├── build keys: [] + ├── probe keys: [] + ├── keys is null equal: [] + ├── filters: [] + ├── estimated rows: 1.00 + ├── AggregateFinal(Build) + │ ├── output columns: [COUNT(*) (#71)] + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── AggregatePartial + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── Filter + │ ├── output columns: [] + │ ├── filters: [is_true(time_dim.t_hour (#67) = 11), is_true(time_dim.t_minute (#68) < 30), is_true(store_sales.ss_sold_time_sk (#60) = time_dim.t_time_sk (#66)), is_true(store_sales.ss_hdemo_sk (#61) = household_demographics.hd_demo_sk (#63)), is_true(store_sales.ss_store_sk (#62) = store.s_store_sk (#69)), household_demographics.hd_dep_count (#64) = 4 and household_demographics.hd_vehicle_count (#65) <= 6 or household_demographics.hd_dep_count (#64) = 2 and household_demographics.hd_vehicle_count (#65) <= 4 or household_demographics.hd_dep_count (#64) = 0 and household_demographics.hd_vehicle_count (#65) <= 2, is_true(store.s_store_name (#70) = 'ese')] + │ ├── estimated rows: 3.46 + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [ss_sold_time_sk (#60), ss_hdemo_sk (#61), ss_store_sk (#62), hd_demo_sk (#63), hd_dep_count (#64), hd_vehicle_count (#65), t_time_sk (#66), t_hour (#67), t_minute (#68), s_store_sk (#69), s_store_name (#70)] + │ └── estimated rows: 17.32 + └── HashJoin(Probe) + ├── output columns: [COUNT(*) (#11), COUNT(*) (#23), COUNT(*) (#35), COUNT(*) (#47), COUNT(*) (#59)] + ├── join type: CROSS + ├── build keys: [] + ├── probe keys: [] + ├── keys is null equal: [] + ├── filters: [] + ├── estimated rows: 1.00 + ├── AggregateFinal(Build) + │ ├── output columns: [COUNT(*) (#59)] + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── AggregatePartial + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── Filter + │ ├── output columns: [] + │ ├── filters: [is_true(time_dim.t_hour (#55) = 10), is_true(time_dim.t_minute (#56) >= 30), is_true(store_sales.ss_sold_time_sk (#48) = time_dim.t_time_sk (#54)), is_true(store_sales.ss_hdemo_sk (#49) = household_demographics.hd_demo_sk (#51)), is_true(store_sales.ss_store_sk (#50) = store.s_store_sk (#57)), household_demographics.hd_dep_count (#52) = 4 and household_demographics.hd_vehicle_count (#53) <= 6 or household_demographics.hd_dep_count (#52) = 2 and household_demographics.hd_vehicle_count (#53) <= 4 or household_demographics.hd_dep_count (#52) = 0 and household_demographics.hd_vehicle_count (#53) <= 2, is_true(store.s_store_name (#58) = 'ese')] + │ ├── estimated rows: 3.46 + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [ss_sold_time_sk (#48), ss_hdemo_sk (#49), ss_store_sk (#50), hd_demo_sk (#51), hd_dep_count (#52), hd_vehicle_count (#53), t_time_sk (#54), t_hour (#55), t_minute (#56), s_store_sk (#57), s_store_name (#58)] + │ └── estimated rows: 17.32 + └── HashJoin(Probe) + ├── output columns: [COUNT(*) (#11), COUNT(*) (#23), COUNT(*) (#35), COUNT(*) (#47)] + ├── join type: CROSS + ├── build keys: [] + ├── probe keys: [] + ├── keys is null equal: [] + ├── filters: [] + ├── estimated rows: 1.00 + ├── AggregateFinal(Build) + │ ├── output columns: [COUNT(*) (#47)] + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── AggregatePartial + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── Filter + │ ├── output columns: [] + │ ├── filters: [is_true(time_dim.t_hour (#43) = 10), is_true(time_dim.t_minute (#44) < 30), is_true(store_sales.ss_sold_time_sk (#36) = time_dim.t_time_sk (#42)), is_true(store_sales.ss_hdemo_sk (#37) = household_demographics.hd_demo_sk (#39)), is_true(store_sales.ss_store_sk (#38) = store.s_store_sk (#45)), household_demographics.hd_dep_count (#40) = 4 and household_demographics.hd_vehicle_count (#41) <= 6 or household_demographics.hd_dep_count (#40) = 2 and household_demographics.hd_vehicle_count (#41) <= 4 or household_demographics.hd_dep_count (#40) = 0 and household_demographics.hd_vehicle_count (#41) <= 2, is_true(store.s_store_name (#46) = 'ese')] + │ ├── estimated rows: 3.46 + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [ss_sold_time_sk (#36), ss_hdemo_sk (#37), ss_store_sk (#38), hd_demo_sk (#39), hd_dep_count (#40), hd_vehicle_count (#41), t_time_sk (#42), t_hour (#43), t_minute (#44), s_store_sk (#45), s_store_name (#46)] + │ └── estimated rows: 17.32 + └── HashJoin(Probe) + ├── output columns: [COUNT(*) (#11), COUNT(*) (#23), COUNT(*) (#35)] + ├── join type: CROSS + ├── build keys: [] + ├── probe keys: [] + ├── keys is null equal: [] + ├── filters: [] + ├── estimated rows: 1.00 + ├── AggregateFinal(Build) + │ ├── output columns: [COUNT(*) (#35)] + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── AggregatePartial + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── Filter + │ ├── output columns: [] + │ ├── filters: [is_true(time_dim.t_hour (#31) = 9), is_true(time_dim.t_minute (#32) >= 30), is_true(store_sales.ss_sold_time_sk (#24) = time_dim.t_time_sk (#30)), is_true(store_sales.ss_hdemo_sk (#25) = household_demographics.hd_demo_sk (#27)), is_true(store_sales.ss_store_sk (#26) = store.s_store_sk (#33)), household_demographics.hd_dep_count (#28) = 4 and household_demographics.hd_vehicle_count (#29) <= 6 or household_demographics.hd_dep_count (#28) = 2 and household_demographics.hd_vehicle_count (#29) <= 4 or household_demographics.hd_dep_count (#28) = 0 and household_demographics.hd_vehicle_count (#29) <= 2, is_true(store.s_store_name (#34) = 'ese')] + │ ├── estimated rows: 3.46 + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [ss_sold_time_sk (#24), ss_hdemo_sk (#25), ss_store_sk (#26), hd_demo_sk (#27), hd_dep_count (#28), hd_vehicle_count (#29), t_time_sk (#30), t_hour (#31), t_minute (#32), s_store_sk (#33), s_store_name (#34)] + │ └── estimated rows: 17.32 + └── HashJoin(Probe) + ├── output columns: [COUNT(*) (#11), COUNT(*) (#23)] + ├── join type: CROSS + ├── build keys: [] + ├── probe keys: [] + ├── keys is null equal: [] + ├── filters: [] + ├── estimated rows: 1.00 + ├── AggregateFinal(Build) + │ ├── output columns: [COUNT(*) (#23)] + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── AggregatePartial + │ ├── group by: [] + │ ├── aggregate functions: [count()] + │ ├── estimated rows: 1.00 + │ └── Filter + │ ├── output columns: [] + │ ├── filters: [is_true(time_dim.t_hour (#19) = 9), is_true(time_dim.t_minute (#20) < 30), is_true(store_sales.ss_sold_time_sk (#12) = time_dim.t_time_sk (#18)), is_true(store_sales.ss_hdemo_sk (#13) = household_demographics.hd_demo_sk (#15)), is_true(store_sales.ss_store_sk (#14) = store.s_store_sk (#21)), household_demographics.hd_dep_count (#16) = 4 and household_demographics.hd_vehicle_count (#17) <= 6 or household_demographics.hd_dep_count (#16) = 2 and household_demographics.hd_vehicle_count (#17) <= 4 or household_demographics.hd_dep_count (#16) = 0 and household_demographics.hd_vehicle_count (#17) <= 2, is_true(store.s_store_name (#22) = 'ese')] + │ ├── estimated rows: 3.46 + │ └── MaterializeCTERef + │ ├── cte_name: cte_cse_0 + │ ├── cte_schema: [ss_sold_time_sk (#12), ss_hdemo_sk (#13), ss_store_sk (#14), hd_demo_sk (#15), hd_dep_count (#16), hd_vehicle_count (#17), t_time_sk (#18), t_hour (#19), t_minute (#20), s_store_sk (#21), s_store_name (#22)] + │ └── estimated rows: 17.32 + └── AggregateFinal(Probe) + ├── output columns: [COUNT(*) (#11)] + ├── group by: [] + ├── aggregate functions: [count()] + ├── estimated rows: 1.00 + └── AggregatePartial + ├── group by: [] + ├── aggregate functions: [count()] + ├── estimated rows: 1.00 + └── Filter + ├── output columns: [] + ├── filters: [is_true(time_dim.t_hour (#7) = 8), is_true(time_dim.t_minute (#8) >= 30), is_true(store_sales.ss_sold_time_sk (#0) = time_dim.t_time_sk (#6)), is_true(store_sales.ss_hdemo_sk (#1) = household_demographics.hd_demo_sk (#3)), is_true(store_sales.ss_store_sk (#2) = store.s_store_sk (#9)), household_demographics.hd_dep_count (#4) = 4 and household_demographics.hd_vehicle_count (#5) <= 6 or household_demographics.hd_dep_count (#4) = 2 and household_demographics.hd_vehicle_count (#5) <= 4 or household_demographics.hd_dep_count (#4) = 0 and household_demographics.hd_vehicle_count (#5) <= 2, is_true(store.s_store_name (#10) = 'ese')] + ├── estimated rows: 3.46 + └── MaterializeCTERef + ├── cte_name: cte_cse_0 + ├── cte_schema: [ss_sold_time_sk (#0), ss_hdemo_sk (#1), ss_store_sk (#2), hd_demo_sk (#3), hd_dep_count (#4), hd_vehicle_count (#5), t_time_sk (#6), t_hour (#7), t_minute (#8), s_store_sk (#9), s_store_name (#10)] + └── estimated rows: 17.32 + +statement ok +drop table store_sales; + +statement ok +drop table household_demographics; + +statement ok +drop table time_dim; + +statement ok +drop table store; diff --git a/tests/sqllogictests/suites/mode/standalone/explain/cte_prune_columns.test b/tests/sqllogictests/suites/mode/standalone/explain/cte_prune_columns.test index ed10e7fbaeae3..ecb1fff3be551 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/cte_prune_columns.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/cte_prune_columns.test @@ -31,8 +31,8 @@ Sequence │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] -│ ├── push downs: [filters: [cte_prune_t.a (#0) > 0 or cte_prune_t.a (#0) > 0], limit: NONE] -│ └── estimated rows: 2.67 +│ ├── push downs: [filters: [is_true(cte_prune_t.a (#0) > 0)], limit: NONE] +│ └── estimated rows: 2.00 └── HashJoin ├── output columns: [cte_prune_t.a (#4), cte_prune_t.c (#10)] ├── join type: INNER @@ -42,23 +42,23 @@ Sequence ├── filters: [] ├── build join filters: │ └── filter id:0, build key:y.a (#8), probe targets:[x.a (#4)@scan1], filter type:bloom,inlist,min_max - ├── estimated rows: 2.00 + ├── estimated rows: 1.00 ├── Filter(Build) │ ├── output columns: [cte_prune_t.a (#8), cte_prune_t.c (#10)] │ ├── filters: [is_true(y.a (#8) > 0)] - │ ├── estimated rows: 2.00 + │ ├── estimated rows: 1.00 │ └── MaterializeCTERef │ ├── cte_name: cte │ ├── cte_schema: [a (#8), c (#10)] - │ └── estimated rows: 3.00 + │ └── estimated rows: 2.00 └── Filter(Probe) ├── output columns: [cte_prune_t.a (#4)] ├── filters: [is_true(x.a (#4) > 0)] - ├── estimated rows: 2.00 + ├── estimated rows: 1.00 └── MaterializeCTERef ├── cte_name: cte ├── cte_schema: [a (#4), c (#6)] - └── estimated rows: 3.00 + └── estimated rows: 2.00 query T nosort explain