@@ -41,9 +41,9 @@ use crate::{cast_whole_num_to_binary, BinaryOutputStyle};
4141use crate :: { EvalMode , SparkError } ;
4242use arrow:: array:: builder:: StringBuilder ;
4343use arrow:: array:: {
44- BinaryBuilder , DictionaryArray , GenericByteArray , ListArray , MapArray , StringArray , StructArray ,
44+ new_null_array, BinaryBuilder , DictionaryArray , GenericByteArray , ListArray , MapArray ,
45+ StringArray , StructArray ,
4546} ;
46- use arrow:: compute:: can_cast_types;
4747use arrow:: datatypes:: { ArrowDictionaryKeyType , ArrowNativeType , DataType , Schema } ;
4848use arrow:: datatypes:: { Field , Fields , GenericBinaryType } ;
4949use arrow:: error:: ArrowError ;
@@ -311,6 +311,9 @@ pub(crate) fn cast_array(
311311 } ;
312312
313313 let cast_result = match ( & from_type, to_type) {
314+ // Null arrays carry no concrete values, so Arrow's native cast can change only the
315+ // logical type while preserving length and nullness.
316+ ( Null , _) => Ok ( cast_with_options ( & array, to_type, & native_cast_options) ?) ,
314317 ( Utf8 , Boolean ) => spark_cast_utf8_to_boolean :: < i32 > ( & array, eval_mode) ,
315318 ( LargeUtf8 , Boolean ) => spark_cast_utf8_to_boolean :: < i64 > ( & array, eval_mode) ,
316319 ( Utf8 , Timestamp ( _, _) ) => cast_string_to_timestamp (
@@ -387,8 +390,25 @@ pub(crate) fn cast_array(
387390 cast_options,
388391 ) ?) ,
389392 ( List ( _) , Utf8 ) => Ok ( cast_array_to_string ( array. as_list ( ) , cast_options) ?) ,
390- ( List ( _) , List ( _) ) if can_cast_types ( & from_type, to_type) => {
391- Ok ( cast_with_options ( & array, to_type, & CAST_OPTIONS ) ?)
393+ ( List ( _) , List ( to) ) => {
394+ // Cast list elements recursively so nested array casts follow Spark semantics
395+ // instead of relying on Arrow's top-level cast support.
396+ let list_array = array. as_list :: < i32 > ( ) ;
397+ let casted_values = match ( list_array. values ( ) . data_type ( ) , to. data_type ( ) ) {
398+ // Spark legacy array casts produce null elements for array<Date> -> array<Int>.
399+ ( Date32 , Int32 ) => new_null_array ( to. data_type ( ) , list_array. values ( ) . len ( ) ) ,
400+ _ => cast_array (
401+ Arc :: clone ( list_array. values ( ) ) ,
402+ to. data_type ( ) ,
403+ cast_options,
404+ ) ?,
405+ } ;
406+ Ok ( Arc :: new ( ListArray :: new (
407+ Arc :: clone ( to) ,
408+ list_array. offsets ( ) . clone ( ) ,
409+ casted_values,
410+ list_array. nulls ( ) . cloned ( ) ,
411+ ) ) as ArrayRef )
392412 }
393413 ( Map ( _, _) , Map ( _, _) ) => Ok ( cast_map_to_map ( & array, & from_type, to_type, cast_options) ?) ,
394414 ( UInt8 | UInt16 | UInt32 | UInt64 , Int8 | Int16 | Int32 | Int64 )
@@ -820,7 +840,8 @@ fn cast_binary_formatter(value: &[u8]) -> String {
820840#[ cfg( test) ]
821841mod tests {
822842 use super :: * ;
823- use arrow:: array:: StringArray ;
843+ use arrow:: array:: { ListArray , NullArray , StringArray } ;
844+ use arrow:: buffer:: OffsetBuffer ;
824845 use arrow:: datatypes:: TimestampMicrosecondType ;
825846 use arrow:: datatypes:: { Field , Fields } ;
826847 #[ test]
@@ -946,8 +967,6 @@ mod tests {
946967
947968 #[ test]
948969 fn test_cast_string_array_to_string ( ) {
949- use arrow:: array:: ListArray ;
950- use arrow:: buffer:: OffsetBuffer ;
951970 let values_array =
952971 StringArray :: from ( vec ! [ Some ( "a" ) , Some ( "b" ) , Some ( "c" ) , Some ( "a" ) , None , None ] ) ;
953972 let offsets_buffer = OffsetBuffer :: < i32 > :: new ( vec ! [ 0 , 3 , 5 , 6 , 6 ] . into ( ) ) ;
@@ -972,8 +991,6 @@ mod tests {
972991
973992 #[ test]
974993 fn test_cast_i32_array_to_string ( ) {
975- use arrow:: array:: ListArray ;
976- use arrow:: buffer:: OffsetBuffer ;
977994 let values_array = Int32Array :: from ( vec ! [ Some ( 1 ) , Some ( 2 ) , Some ( 3 ) , Some ( 1 ) , None , None ] ) ;
978995 let offsets_buffer = OffsetBuffer :: < i32 > :: new ( vec ! [ 0 , 3 , 5 , 6 , 6 ] . into ( ) ) ;
979996 let item_field = Arc :: new ( Field :: new ( "item" , DataType :: Int32 , true ) ) ;
@@ -994,4 +1011,33 @@ mod tests {
9941011 assert_eq ! ( r#"[null]"# , string_array. value( 2 ) ) ;
9951012 assert_eq ! ( r#"[]"# , string_array. value( 3 ) ) ;
9961013 }
1014+
1015+ #[ test]
1016+ fn test_cast_array_of_nulls_to_array ( ) {
1017+ let offsets_buffer = OffsetBuffer :: < i32 > :: new ( vec ! [ 0 , 2 , 3 , 3 ] . into ( ) ) ;
1018+ let from_item_field = Arc :: new ( Field :: new ( "item" , DataType :: Null , true ) ) ;
1019+ let from_array: ArrayRef = Arc :: new ( ListArray :: new (
1020+ from_item_field,
1021+ offsets_buffer,
1022+ Arc :: new ( NullArray :: new ( 3 ) ) ,
1023+ None ,
1024+ ) ) ;
1025+
1026+ let to_type = DataType :: List ( Arc :: new ( Field :: new ( "item" , DataType :: Int32 , true ) ) ) ;
1027+ let to_array = cast_array (
1028+ from_array,
1029+ & to_type,
1030+ & SparkCastOptions :: new ( EvalMode :: Legacy , "UTC" , false ) ,
1031+ )
1032+ . unwrap ( ) ;
1033+
1034+ let result = to_array. as_list :: < i32 > ( ) ;
1035+ assert_eq ! ( 3 , result. len( ) ) ;
1036+ assert_eq ! ( result. value_offsets( ) , & [ 0 , 2 , 3 , 3 ] ) ;
1037+
1038+ let values = result. values ( ) . as_primitive :: < Int32Type > ( ) ;
1039+ assert_eq ! ( 3 , values. len( ) ) ;
1040+ assert_eq ! ( 3 , values. null_count( ) ) ;
1041+ assert ! ( values. iter( ) . all( |value| value. is_none( ) ) ) ;
1042+ }
9971043}
0 commit comments