@@ -667,65 +667,31 @@ impl PhysicalPlanner {
667667 ) -> Result < Arc < dyn PhysicalExpr > , ExecutionError > {
668668 let left = self . create_expr ( left, Arc :: clone ( & input_schema) ) ?;
669669 let right = self . create_expr ( right, Arc :: clone ( & input_schema) ) ?;
670- let left_type = left. data_type ( & input_schema) ;
671- let right_type = right. data_type ( & input_schema) ;
672- match ( & op, & left_type, & right_type) {
673- // Handle date arithmetic with Int8/Int16/Int32 by:
674- // 1. Casting Date32 to Int32 (days since epoch)
675- // 2. Performing the arithmetic as Int32 +/- Int32
676- // 3. Casting the result back to Date32 using DataFusion's CastExpr
677- // Arrow's date arithmetic kernel only supports Date32 +/- Interval types
678- // Note: We use DataFusion's CastExpr for the final cast because Spark's Cast
679- // doesn't support Int32 -> Date32 conversion
680- (
681- DataFusionOperator :: Plus | DataFusionOperator :: Minus ,
682- Ok ( DataType :: Date32 ) ,
683- Ok ( DataType :: Int8 ) | Ok ( DataType :: Int16 ) | Ok ( DataType :: Int32 ) ,
684- ) => {
685- // Cast Date32 to Int32 (days since epoch)
686- let left_as_int = Arc :: new ( Cast :: new (
687- left,
688- DataType :: Int32 ,
689- SparkCastOptions :: new_without_timezone ( EvalMode :: Legacy , false ) ,
690- ) ) ;
691- // Cast Int8/Int16 to Int32 if needed
692- let right_as_int: Arc < dyn PhysicalExpr > =
693- if matches ! ( right_type, Ok ( DataType :: Int32 ) ) {
694- right
695- } else {
696- Arc :: new ( Cast :: new (
697- right,
698- DataType :: Int32 ,
699- SparkCastOptions :: new_without_timezone ( EvalMode :: Legacy , false ) ,
700- ) )
701- } ;
702- // Perform the arithmetic as Int32 +/- Int32
703- let result_int = Arc :: new ( BinaryExpr :: new ( left_as_int, op, right_as_int) ) ;
704- // Cast the result back to Date32 using DataFusion's CastExpr
705- // (Spark's Cast doesn't support Int32 -> Date32)
706- Ok ( Arc :: new ( CastExpr :: new ( result_int, DataType :: Date32 , None ) ) )
707- }
670+ match (
671+ & op,
672+ left. data_type ( & input_schema) ,
673+ right. data_type ( & input_schema) ,
674+ ) {
708675 (
709676 DataFusionOperator :: Plus | DataFusionOperator :: Minus | DataFusionOperator :: Multiply ,
710677 Ok ( DataType :: Decimal128 ( p1, s1) ) ,
711678 Ok ( DataType :: Decimal128 ( p2, s2) ) ,
712679 ) if ( ( op == DataFusionOperator :: Plus || op == DataFusionOperator :: Minus )
713- && max ( * s1, * s2) as u8 + max ( * p1 - * s1 as u8 , * p2 - * s2 as u8 )
680+ && max ( s1, s2) as u8 + max ( p1 - s1 as u8 , p2 - s2 as u8 )
714681 >= DECIMAL128_MAX_PRECISION )
715- || ( op == DataFusionOperator :: Multiply
716- && * p1 + * p2 >= DECIMAL128_MAX_PRECISION ) =>
682+ || ( op == DataFusionOperator :: Multiply && p1 + p2 >= DECIMAL128_MAX_PRECISION ) =>
717683 {
718684 let data_type = return_type. map ( to_arrow_datatype) . unwrap ( ) ;
719685 // For some Decimal128 operations, we need wider internal digits.
720686 // Cast left and right to Decimal256 and cast the result back to Decimal128
721687 let left = Arc :: new ( Cast :: new (
722688 left,
723- DataType :: Decimal256 ( * p1, * s1) ,
689+ DataType :: Decimal256 ( p1, s1) ,
724690 SparkCastOptions :: new_without_timezone ( EvalMode :: Legacy , false ) ,
725691 ) ) ;
726692 let right = Arc :: new ( Cast :: new (
727693 right,
728- DataType :: Decimal256 ( * p2, * s2) ,
694+ DataType :: Decimal256 ( p2, s2) ,
729695 SparkCastOptions :: new_without_timezone ( EvalMode :: Legacy , false ) ,
730696 ) ) ;
731697 let child = Arc :: new ( BinaryExpr :: new ( left, op, right) ) ;
@@ -999,6 +965,7 @@ impl PhysicalPlanner {
999965 ) )
1000966 }
1001967 OpStruct :: NativeScan ( scan) => {
968+ dbg ! ( & scan) ;
1002969 let data_schema = convert_spark_types_to_arrow_schema ( scan. data_schema . as_slice ( ) ) ;
1003970 let required_schema: SchemaRef =
1004971 convert_spark_types_to_arrow_schema ( scan. required_schema . as_slice ( ) ) ;
@@ -1146,6 +1113,7 @@ impl PhysicalPlanner {
11461113 ) )
11471114 }
11481115 OpStruct :: Scan ( scan) => {
1116+ dbg ! ( & scan) ;
11491117 let data_types = scan. fields . iter ( ) . map ( to_arrow_datatype) . collect_vec ( ) ;
11501118
11511119 // If it is not test execution context for unit test, we should have at least one
@@ -1172,6 +1140,8 @@ impl PhysicalPlanner {
11721140 scan. arrow_ffi_safe ,
11731141 ) ?;
11741142
1143+ dbg ! ( & scan) ;
1144+
11751145 Ok ( (
11761146 vec ! [ scan. clone( ) ] ,
11771147 Arc :: new ( SparkPlan :: new ( spark_plan. plan_id , Arc :: new ( scan) , vec ! [ ] ) ) ,
@@ -4411,12 +4381,10 @@ mod tests {
44114381 fn test_date_sub_with_int8_cast_error ( ) {
44124382 use arrow:: array:: Date32Array ;
44134383
4414- let session_ctx = SessionContext :: new ( ) ;
4415- let task_ctx = session_ctx. task_ctx ( ) ;
4416- let planner = PhysicalPlanner :: new ( Arc :: from ( session_ctx) , 0 ) ;
4384+ let planner = PhysicalPlanner :: default ( ) ;
4385+ let row_count = 3 ;
44174386
4418- // Create a scan operator with Date32 (DATE) and Int8 (TINYINT) columns
4419- // This simulates the schema from the Scala test where _20 is DATE and _2 is TINYINT
4387+ // Create a Scan operator with Date32 (DATE) and Int8 (TINYINT) columns
44204388 let op_scan = Operator {
44214389 plan_id : 0 ,
44224390 children : vec ! [ ] ,
@@ -4431,7 +4399,7 @@ mod tests {
44314399 type_info: None ,
44324400 } ,
44334401 ] ,
4434- source : "test " . to_string ( ) ,
4402+ source : "" . to_string ( ) ,
44354403 arrow_ffi_safe : false ,
44364404 } ) ) ,
44374405 } ;
@@ -4486,22 +4454,27 @@ mod tests {
44864454 let ( mut scans, datafusion_plan) =
44874455 planner. create_plan ( & projection, & mut vec ! [ ] , 1 ) . unwrap ( ) ;
44884456
4489- // Execute the plan with test data
4457+ // Create test data: Date32 and Int8 columns
4458+ let date_array = Date32Array :: from ( vec ! [ Some ( 19000 ) , Some ( 19001 ) , Some ( 19002 ) ] ) ;
4459+ let int8_array = Int8Array :: from ( vec ! [ Some ( 1i8 ) , Some ( 2i8 ) , Some ( 3i8 ) ] ) ;
4460+
4461+ // Set input batch for the scan
4462+ let input_batch = InputBatch :: Batch ( vec ! [ Arc :: new( date_array) , Arc :: new( int8_array) ] , row_count) ;
4463+ scans[ 0 ] . set_input_batch ( input_batch) ;
4464+
4465+ let session_ctx = SessionContext :: new ( ) ;
4466+ let task_ctx = session_ctx. task_ctx ( ) ;
44904467 let mut stream = datafusion_plan. native_plan . execute ( 0 , task_ctx) . unwrap ( ) ;
44914468
44924469 let runtime = tokio:: runtime:: Runtime :: new ( ) . unwrap ( ) ;
44934470 let ( tx, mut rx) = mpsc:: channel ( 1 ) ;
44944471
4495- // Send test data: Date32 values and Int8 values
4472+ // Separate thread to send the EOF signal once we've processed the only input batch
44964473 runtime. spawn ( async move {
4497- // Create Date32 array (days since epoch)
4498- // 19000 days = approximately 2022-01-01
4474+ // Create test data again for the second batch
44994475 let date_array = Date32Array :: from ( vec ! [ Some ( 19000 ) , Some ( 19001 ) , Some ( 19002 ) ] ) ;
4500- // Create Int8 array
45014476 let int8_array = Int8Array :: from ( vec ! [ Some ( 1i8 ) , Some ( 2i8 ) , Some ( 3i8 ) ] ) ;
4502-
4503- let input_batch1 =
4504- InputBatch :: Batch ( vec ! [ Arc :: new( date_array) , Arc :: new( int8_array) ] , 3 ) ;
4477+ let input_batch1 = InputBatch :: Batch ( vec ! [ Arc :: new( date_array) , Arc :: new( int8_array) ] , row_count) ;
45054478 let input_batch2 = InputBatch :: EOF ;
45064479
45074480 let batches = vec ! [ input_batch1, input_batch2] ;
@@ -4511,7 +4484,6 @@ mod tests {
45114484 }
45124485 } ) ;
45134486
4514- // Execute and expect success - the Int8 should be cast to Int32 for date arithmetic
45154487 runtime. block_on ( async move {
45164488 loop {
45174489 let batch = rx. recv ( ) . await . unwrap ( ) ;
@@ -4524,10 +4496,13 @@ mod tests {
45244496 "Expected success for date - int8 operation but got error: {:?}" ,
45254497 result. unwrap_err( )
45264498 ) ;
4499+
45274500 let batch = result. unwrap ( ) ;
4528- assert_eq ! ( batch. num_rows( ) , 3 ) ;
4501+ assert_eq ! ( batch. num_rows( ) , row_count) ;
4502+
45294503 // The result should be Date32 type
45304504 assert_eq ! ( batch. column( 0 ) . data_type( ) , & DataType :: Date32 ) ;
4505+
45314506 // Verify the values: 19000-1=18999, 19001-2=18999, 19002-3=18999
45324507 let date_array = batch
45334508 . column ( 0 )
@@ -4537,7 +4512,6 @@ mod tests {
45374512 assert_eq ! ( date_array. value( 0 ) , 18999 ) ; // 19000 - 1
45384513 assert_eq ! ( date_array. value( 1 ) , 18999 ) ; // 19001 - 2
45394514 assert_eq ! ( date_array. value( 2 ) , 18999 ) ; // 19002 - 3
4540- break ;
45414515 }
45424516 Poll :: Ready ( None ) => {
45434517 break ;
0 commit comments