@@ -41,9 +41,9 @@ use crate::{cast_whole_num_to_binary, BinaryOutputStyle};
4141use crate :: { EvalMode , SparkError } ;
4242use arrow:: array:: builder:: StringBuilder ;
4343use arrow:: array:: {
44- BinaryBuilder , DictionaryArray , GenericByteArray , ListArray , MapArray , StringArray , StructArray ,
44+ new_null_array, BinaryBuilder , DictionaryArray , GenericByteArray , ListArray , MapArray ,
45+ StringArray , StructArray ,
4546} ;
46- use arrow:: compute:: can_cast_types;
4747use arrow:: datatypes:: { ArrowDictionaryKeyType , ArrowNativeType , DataType , Schema } ;
4848use arrow:: datatypes:: { Field , Fields , GenericBinaryType } ;
4949use arrow:: error:: ArrowError ;
@@ -310,6 +310,9 @@ pub(crate) fn cast_array(
310310 } ;
311311
312312 let cast_result = match ( & from_type, to_type) {
313+ // Null arrays carry no concrete values, so Arrow's native cast can change only the
314+ // logical type while preserving length and nullness.
315+ ( Null , _) => Ok ( cast_with_options ( & array, to_type, & native_cast_options) ?) ,
313316 ( Utf8 , Boolean ) => spark_cast_utf8_to_boolean :: < i32 > ( & array, eval_mode) ,
314317 ( LargeUtf8 , Boolean ) => spark_cast_utf8_to_boolean :: < i64 > ( & array, eval_mode) ,
315318 ( Utf8 , Timestamp ( _, _) ) => cast_string_to_timestamp (
@@ -386,8 +389,25 @@ pub(crate) fn cast_array(
386389 cast_options,
387390 ) ?) ,
388391 ( List ( _) , Utf8 ) => Ok ( cast_array_to_string ( array. as_list ( ) , cast_options) ?) ,
389- ( List ( _) , List ( _) ) if can_cast_types ( & from_type, to_type) => {
390- Ok ( cast_with_options ( & array, to_type, & CAST_OPTIONS ) ?)
392+ ( List ( _) , List ( to) ) => {
393+ // Cast list elements recursively so nested array casts follow Spark semantics
394+ // instead of relying on Arrow's top-level cast support.
395+ let list_array = array. as_list :: < i32 > ( ) ;
396+ let casted_values = match ( list_array. values ( ) . data_type ( ) , to. data_type ( ) ) {
397+ // Spark legacy array casts produce null elements for array<Date> -> array<Int>.
398+ ( Date32 , Int32 ) => new_null_array ( to. data_type ( ) , list_array. values ( ) . len ( ) ) ,
399+ _ => cast_array (
400+ Arc :: clone ( list_array. values ( ) ) ,
401+ to. data_type ( ) ,
402+ cast_options,
403+ ) ?,
404+ } ;
405+ Ok ( Arc :: new ( ListArray :: new (
406+ Arc :: clone ( to) ,
407+ list_array. offsets ( ) . clone ( ) ,
408+ casted_values,
409+ list_array. nulls ( ) . cloned ( ) ,
410+ ) ) as ArrayRef )
391411 }
392412 ( Map ( _, _) , Map ( _, _) ) => Ok ( cast_map_to_map ( & array, & from_type, to_type, cast_options) ?) ,
393413 ( UInt8 | UInt16 | UInt32 | UInt64 , Int8 | Int16 | Int32 | Int64 )
@@ -819,7 +839,8 @@ fn cast_binary_formatter(value: &[u8]) -> String {
819839#[ cfg( test) ]
820840mod tests {
821841 use super :: * ;
822- use arrow:: array:: StringArray ;
842+ use arrow:: array:: { ListArray , NullArray , StringArray } ;
843+ use arrow:: buffer:: OffsetBuffer ;
823844 use arrow:: datatypes:: TimestampMicrosecondType ;
824845 use arrow:: datatypes:: { Field , Fields } ;
825846 #[ test]
@@ -945,8 +966,6 @@ mod tests {
945966
946967 #[ test]
947968 fn test_cast_string_array_to_string ( ) {
948- use arrow:: array:: ListArray ;
949- use arrow:: buffer:: OffsetBuffer ;
950969 let values_array =
951970 StringArray :: from ( vec ! [ Some ( "a" ) , Some ( "b" ) , Some ( "c" ) , Some ( "a" ) , None , None ] ) ;
952971 let offsets_buffer = OffsetBuffer :: < i32 > :: new ( vec ! [ 0 , 3 , 5 , 6 , 6 ] . into ( ) ) ;
@@ -971,8 +990,6 @@ mod tests {
971990
972991 #[ test]
973992 fn test_cast_i32_array_to_string ( ) {
974- use arrow:: array:: ListArray ;
975- use arrow:: buffer:: OffsetBuffer ;
976993 let values_array = Int32Array :: from ( vec ! [ Some ( 1 ) , Some ( 2 ) , Some ( 3 ) , Some ( 1 ) , None , None ] ) ;
977994 let offsets_buffer = OffsetBuffer :: < i32 > :: new ( vec ! [ 0 , 3 , 5 , 6 , 6 ] . into ( ) ) ;
978995 let item_field = Arc :: new ( Field :: new ( "item" , DataType :: Int32 , true ) ) ;
@@ -993,4 +1010,33 @@ mod tests {
9931010 assert_eq ! ( r#"[null]"# , string_array. value( 2 ) ) ;
9941011 assert_eq ! ( r#"[]"# , string_array. value( 3 ) ) ;
9951012 }
1013+
1014+ #[ test]
1015+ fn test_cast_array_of_nulls_to_array ( ) {
1016+ let offsets_buffer = OffsetBuffer :: < i32 > :: new ( vec ! [ 0 , 2 , 3 , 3 ] . into ( ) ) ;
1017+ let from_item_field = Arc :: new ( Field :: new ( "item" , DataType :: Null , true ) ) ;
1018+ let from_array: ArrayRef = Arc :: new ( ListArray :: new (
1019+ from_item_field,
1020+ offsets_buffer,
1021+ Arc :: new ( NullArray :: new ( 3 ) ) ,
1022+ None ,
1023+ ) ) ;
1024+
1025+ let to_type = DataType :: List ( Arc :: new ( Field :: new ( "item" , DataType :: Int32 , true ) ) ) ;
1026+ let to_array = cast_array (
1027+ from_array,
1028+ & to_type,
1029+ & SparkCastOptions :: new ( EvalMode :: Legacy , "UTC" , false ) ,
1030+ )
1031+ . unwrap ( ) ;
1032+
1033+ let result = to_array. as_list :: < i32 > ( ) ;
1034+ assert_eq ! ( 3 , result. len( ) ) ;
1035+ assert_eq ! ( result. value_offsets( ) , & [ 0 , 2 , 3 , 3 ] ) ;
1036+
1037+ let values = result. values ( ) . as_primitive :: < Int32Type > ( ) ;
1038+ assert_eq ! ( 3 , values. len( ) ) ;
1039+ assert_eq ! ( 3 , values. null_count( ) ) ;
1040+ assert ! ( values. iter( ) . all( |value| value. is_none( ) ) ) ;
1041+ }
9961042}
0 commit comments