@@ -163,232 +163,6 @@ impl Hash for Cast {
163163 }
164164}
165165
166- /// Determine if Comet supports a cast, taking options such as EvalMode and Timezone into account.
167- pub fn cast_supported (
168- from_type : & DataType ,
169- to_type : & DataType ,
170- options : & SparkCastOptions ,
171- ) -> bool {
172- use DataType :: * ;
173-
174- let from_type = if let Dictionary ( _, dt) = from_type {
175- dt
176- } else {
177- from_type
178- } ;
179-
180- let to_type = if let Dictionary ( _, dt) = to_type {
181- dt
182- } else {
183- to_type
184- } ;
185-
186- if from_type == to_type {
187- return true ;
188- }
189-
190- match ( from_type, to_type) {
191- ( Boolean , _) => can_cast_from_boolean ( to_type, options) ,
192- ( UInt8 | UInt16 | UInt32 | UInt64 , Int8 | Int16 | Int32 | Int64 )
193- if options. allow_cast_unsigned_ints =>
194- {
195- true
196- }
197- ( Int8 , _) => can_cast_from_byte ( to_type, options) ,
198- ( Int16 , _) => can_cast_from_short ( to_type, options) ,
199- ( Int32 , _) => can_cast_from_int ( to_type, options) ,
200- ( Int64 , _) => can_cast_from_long ( to_type, options) ,
201- ( Float32 , _) => can_cast_from_float ( to_type, options) ,
202- ( Float64 , _) => can_cast_from_double ( to_type, options) ,
203- ( Decimal128 ( p, s) , _) => can_cast_from_decimal ( p, s, to_type, options) ,
204- ( Timestamp ( _, None ) , _) => can_cast_from_timestamp_ntz ( to_type, options) ,
205- ( Timestamp ( _, Some ( _) ) , _) => can_cast_from_timestamp ( to_type, options) ,
206- ( Utf8 | LargeUtf8 , _) => can_cast_from_string ( to_type, options) ,
207- ( _, Utf8 | LargeUtf8 ) => can_cast_to_string ( from_type, options) ,
208- ( Struct ( from_fields) , Struct ( to_fields) ) => from_fields
209- . iter ( )
210- . zip ( to_fields. iter ( ) )
211- . all ( |( a, b) | cast_supported ( a. data_type ( ) , b. data_type ( ) , options) ) ,
212- _ => false ,
213- }
214- }
215-
216- fn can_cast_from_string ( to_type : & DataType , options : & SparkCastOptions ) -> bool {
217- use DataType :: * ;
218- match to_type {
219- Boolean | Int8 | Int16 | Int32 | Int64 | Binary => true ,
220- Float32 | Float64 => true ,
221- Decimal128 ( _, _) => {
222- // https://github.com/apache/datafusion-comet/issues/325
223- // Does not support fullwidth digits and null byte handling.
224- options. allow_incompat
225- }
226- Date32 | Date64 => {
227- // https://github.com/apache/datafusion-comet/issues/327
228- // Only supports years between 262143 BC and 262142 AD
229- options. allow_incompat
230- }
231- Timestamp ( _, _) if options. eval_mode == EvalMode :: Ansi => {
232- // ANSI mode not supported
233- false
234- }
235- Timestamp ( _, Some ( tz) ) if tz. as_ref ( ) != "UTC" => {
236- // Cast will use UTC instead of $timeZoneId
237- options. allow_incompat
238- }
239- Timestamp ( _, _) => {
240- // https://github.com/apache/datafusion-comet/issues/328
241- // Not all valid formats are supported
242- options. allow_incompat
243- }
244- _ => false ,
245- }
246- }
247-
248- fn can_cast_to_string ( from_type : & DataType , _options : & SparkCastOptions ) -> bool {
249- use DataType :: * ;
250- match from_type {
251- Boolean | Int8 | Int16 | Int32 | Int64 | Date32 | Date64 | Timestamp ( _, _) => true ,
252- Float32 | Float64 => {
253- // There can be differences in precision.
254- // For example, the input \"1.4E-45\" will produce 1.0E-45 " +
255- // instead of 1.4E-45"))
256- true
257- }
258- Decimal128 ( _, _) => {
259- // https://github.com/apache/datafusion-comet/issues/1068
260- // There can be formatting differences in some case due to Spark using
261- // scientific notation where Comet does not
262- true
263- }
264- Binary => true ,
265- Struct ( fields) => fields
266- . iter ( )
267- . all ( |f| can_cast_to_string ( f. data_type ( ) , _options) ) ,
268- _ => false ,
269- }
270- }
271-
272- fn can_cast_from_timestamp_ntz ( to_type : & DataType , options : & SparkCastOptions ) -> bool {
273- use DataType :: * ;
274- match to_type {
275- Timestamp ( _, _) | Date32 | Date64 | Utf8 => {
276- // incompatible
277- options. allow_incompat
278- }
279- _ => {
280- // unsupported
281- false
282- }
283- }
284- }
285-
286- fn can_cast_from_timestamp ( to_type : & DataType , _options : & SparkCastOptions ) -> bool {
287- use DataType :: * ;
288- match to_type {
289- Boolean | Int8 | Int16 => {
290- // https://github.com/apache/datafusion-comet/issues/352
291- // this seems like an edge case that isn't important for us to support
292- false
293- }
294- Int64 => {
295- // https://github.com/apache/datafusion-comet/issues/352
296- true
297- }
298- Date32 | Date64 | Utf8 | Decimal128 ( _, _) => true ,
299- _ => {
300- // unsupported
301- false
302- }
303- }
304- }
305-
306- fn can_cast_from_boolean ( to_type : & DataType , _: & SparkCastOptions ) -> bool {
307- use DataType :: * ;
308- matches ! (
309- to_type,
310- Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Decimal128 ( _, _)
311- )
312- }
313-
314- fn can_cast_from_byte ( to_type : & DataType , _: & SparkCastOptions ) -> bool {
315- use DataType :: * ;
316- matches ! (
317- to_type,
318- Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Decimal128 ( _, _) | Binary
319- )
320- }
321-
322- fn can_cast_from_short ( to_type : & DataType , _: & SparkCastOptions ) -> bool {
323- use DataType :: * ;
324- matches ! (
325- to_type,
326- Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Decimal128 ( _, _) | Binary
327- )
328- }
329-
330- fn can_cast_from_int ( to_type : & DataType , options : & SparkCastOptions ) -> bool {
331- use DataType :: * ;
332- match to_type {
333- Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Utf8 | Binary => true ,
334- Decimal128 ( _, _) => {
335- // incompatible: no overflow check
336- options. allow_incompat
337- }
338- _ => false ,
339- }
340- }
341-
342- fn can_cast_from_long ( to_type : & DataType , options : & SparkCastOptions ) -> bool {
343- use DataType :: * ;
344- match to_type {
345- Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Binary => true ,
346- Decimal128 ( _, _) => {
347- // incompatible: no overflow check
348- options. allow_incompat
349- }
350- _ => false ,
351- }
352- }
353-
354- fn can_cast_from_float ( to_type : & DataType , _: & SparkCastOptions ) -> bool {
355- use DataType :: * ;
356- matches ! (
357- to_type,
358- Boolean | Int8 | Int16 | Int32 | Int64 | Float64 | Decimal128 ( _, _)
359- )
360- }
361-
362- fn can_cast_from_double ( to_type : & DataType , _: & SparkCastOptions ) -> bool {
363- use DataType :: * ;
364- matches ! (
365- to_type,
366- Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Decimal128 ( _, _)
367- )
368- }
369-
370- fn can_cast_from_decimal (
371- p1 : & u8 ,
372- _s1 : & i8 ,
373- to_type : & DataType ,
374- options : & SparkCastOptions ,
375- ) -> bool {
376- use DataType :: * ;
377- match to_type {
378- Int8 | Int16 | Int32 | Int64 | Float32 | Float64 => true ,
379- Decimal128 ( p2, _) => {
380- if p2 < p1 {
381- // https://github.com/apache/datafusion/issues/13492
382- // Incompatible(Some("Casting to smaller precision is not supported"))
383- options. allow_incompat
384- } else {
385- true
386- }
387- }
388- _ => false ,
389- }
390- }
391-
392166macro_rules! cast_utf8_to_int {
393167 ( $array: expr, $array_type: ty, $parse_fn: expr) => { {
394168 let len = $array. len( ) ;
0 commit comments