1616// under the License.
1717
1818use crate :: utils:: array_with_timezone;
19+ use crate :: EvalMode :: Legacy ;
1920use crate :: { timezone, BinaryOutputStyle } ;
2021use crate :: { EvalMode , SparkError , SparkResult } ;
2122use arrow:: array:: builder:: StringBuilder ;
2223use arrow:: array:: {
23- BooleanBuilder , Decimal128Builder , DictionaryArray , GenericByteArray , ListArray ,
24+ BinaryBuilder , BooleanBuilder , Decimal128Builder , DictionaryArray , GenericByteArray , ListArray ,
2425 PrimitiveBuilder , StringArray , StructArray , TimestampMicrosecondBuilder ,
2526} ;
2627use arrow:: compute:: can_cast_types;
@@ -304,29 +305,32 @@ fn can_cast_from_timestamp(to_type: &DataType, _options: &SparkCastOptions) -> b
304305
305306fn can_cast_from_boolean ( to_type : & DataType , _: & SparkCastOptions ) -> bool {
306307 use DataType :: * ;
307- matches ! ( to_type, Int8 | Int16 | Int32 | Int64 | Float32 | Float64 )
308+ matches ! (
309+ to_type,
310+ Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Decimal128 ( _, _)
311+ )
308312}
309313
310314fn can_cast_from_byte ( to_type : & DataType , _: & SparkCastOptions ) -> bool {
311315 use DataType :: * ;
312316 matches ! (
313317 to_type,
314- Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Decimal128 ( _, _)
318+ Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Decimal128 ( _, _) | Binary
315319 )
316320}
317321
318322fn can_cast_from_short ( to_type : & DataType , _: & SparkCastOptions ) -> bool {
319323 use DataType :: * ;
320324 matches ! (
321325 to_type,
322- Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Decimal128 ( _, _)
326+ Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Decimal128 ( _, _) | Binary
323327 )
324328}
325329
326330fn can_cast_from_int ( to_type : & DataType , options : & SparkCastOptions ) -> bool {
327331 use DataType :: * ;
328332 match to_type {
329- Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Utf8 => true ,
333+ Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Utf8 | Binary => true ,
330334 Decimal128 ( _, _) => {
331335 // incompatible: no overflow check
332336 options. allow_incompat
@@ -338,7 +342,7 @@ fn can_cast_from_int(to_type: &DataType, options: &SparkCastOptions) -> bool {
338342fn can_cast_from_long ( to_type : & DataType , options : & SparkCastOptions ) -> bool {
339343 use DataType :: * ;
340344 match to_type {
341- Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 => true ,
345+ Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Binary => true ,
342346 Decimal128 ( _, _) => {
343347 // incompatible: no overflow check
344348 options. allow_incompat
@@ -501,6 +505,29 @@ macro_rules! cast_float_to_string {
501505 } } ;
502506}
503507
508+ // eval mode is not needed since all ints can be implemented in binary format
509+ macro_rules! cast_whole_num_to_binary {
510+ ( $array: expr, $primitive_type: ty, $byte_size: expr) => { {
511+ let input_arr = $array
512+ . as_any( )
513+ . downcast_ref:: <$primitive_type>( )
514+ . ok_or_else( || SparkError :: Internal ( "Expected numeric array" . to_string( ) ) ) ?;
515+
516+ let len = input_arr. len( ) ;
517+ let mut builder = BinaryBuilder :: with_capacity( len, len * $byte_size) ;
518+
519+ for i in 0 ..input_arr. len( ) {
520+ if input_arr. is_null( i) {
521+ builder. append_null( ) ;
522+ } else {
523+ builder. append_value( input_arr. value( i) . to_be_bytes( ) ) ;
524+ }
525+ }
526+
527+ Ok ( Arc :: new( builder. finish( ) ) as ArrayRef )
528+ } } ;
529+ }
530+
504531macro_rules! cast_int_to_int_macro {
505532 (
506533 $array: expr,
@@ -1101,6 +1128,19 @@ fn cast_array(
11011128 }
11021129 ( Binary , Utf8 ) => Ok ( cast_binary_to_string :: < i32 > ( & array, cast_options) ?) ,
11031130 ( Date32 , Timestamp ( _, tz) ) => Ok ( cast_date_to_timestamp ( & array, cast_options, tz) ?) ,
1131+ ( Int8 , Binary ) if ( eval_mode == Legacy ) => cast_whole_num_to_binary ! ( & array, Int8Array , 1 ) ,
1132+ ( Int16 , Binary ) if ( eval_mode == Legacy ) => {
1133+ cast_whole_num_to_binary ! ( & array, Int16Array , 2 )
1134+ }
1135+ ( Int32 , Binary ) if ( eval_mode == Legacy ) => {
1136+ cast_whole_num_to_binary ! ( & array, Int32Array , 4 )
1137+ }
1138+ ( Int64 , Binary ) if ( eval_mode == Legacy ) => {
1139+ cast_whole_num_to_binary ! ( & array, Int64Array , 8 )
1140+ }
1141+ ( Boolean , Decimal128 ( precision, scale) ) => {
1142+ cast_boolean_to_decimal ( & array, * precision, * scale)
1143+ }
11041144 _ if cast_options. is_adapting_schema
11051145 || is_datafusion_spark_compatible ( from_type, to_type) =>
11061146 {
@@ -1163,6 +1203,16 @@ fn cast_date_to_timestamp(
11631203 ) )
11641204}
11651205
1206+ fn cast_boolean_to_decimal ( array : & ArrayRef , precision : u8 , scale : i8 ) -> SparkResult < ArrayRef > {
1207+ let bool_array = array. as_boolean ( ) ;
1208+ let scaled_val = 10_i128 . pow ( scale as u32 ) ;
1209+ let result: Decimal128Array = bool_array
1210+ . iter ( )
1211+ . map ( |v| v. map ( |b| if b { scaled_val } else { 0 } ) )
1212+ . collect ( ) ;
1213+ Ok ( Arc :: new ( result. with_precision_and_scale ( precision, scale) ?) )
1214+ }
1215+
11661216fn cast_string_to_float (
11671217 array : & ArrayRef ,
11681218 to_type : & DataType ,
0 commit comments