1717
1818//! temporal kernels
1919
20- use chrono:: { DateTime , Datelike , Duration , NaiveDate , Timelike , Utc } ;
20+ use chrono:: {
21+ DateTime , Datelike , Duration , LocalResult , NaiveDate , NaiveDateTime , Offset , TimeZone ,
22+ Timelike , Utc ,
23+ } ;
2124
2225use std:: sync:: Arc ;
2326
@@ -153,10 +156,30 @@ where
153156 Ok ( ( ) )
154157}
155158
156- // Apply the Tz to the Naive Date Time,,convert to UTC, and return as microseconds in Unix epoch
159+ // Apply the Tz to the Naive Date Time, convert to UTC, and return as microseconds in Unix epoch.
160+ // This function re-interprets the local datetime in the timezone to ensure the correct DST offset
161+ // is used for the target date (not the original date's offset). This is important when truncation
162+ // changes the date to a different DST period (e.g., from December/PST to October/PDT).
163+ //
164+ // Note: For far-future dates (approximately beyond year 2100), chrono-tz may not accurately
165+ // calculate DST transitions, which can result in incorrect offsets. See the compatibility
166+ // guide for more information.
157167#[ inline]
158168fn as_micros_from_unix_epoch_utc ( dt : Option < DateTime < Tz > > ) -> i64 {
159- dt. unwrap ( ) . with_timezone ( & Utc ) . timestamp_micros ( )
169+ let dt = dt. unwrap ( ) ;
170+ let naive = dt. naive_local ( ) ;
171+ let tz = dt. timezone ( ) ;
172+
173+ // Re-interpret the local time in the timezone to get the correct DST offset
174+ // for the truncated date. Use noon to avoid DST gaps that occur around midnight.
175+ let noon = naive. date ( ) . and_hms_opt ( 12 , 0 , 0 ) . unwrap_or ( naive) ;
176+
177+ let offset = match tz. offset_from_local_datetime ( & noon) {
178+ LocalResult :: Single ( off) | LocalResult :: Ambiguous ( off, _) => off. fix ( ) ,
179+ LocalResult :: None => return dt. with_timezone ( & Utc ) . timestamp_micros ( ) ,
180+ } ;
181+
182+ ( naive - offset) . and_utc ( ) . timestamp_micros ( )
160183}
161184
162185#[ inline]
@@ -529,6 +552,85 @@ pub(crate) fn timestamp_trunc_dyn(
529552 }
530553}
531554
555+ /// Convert microseconds since epoch to NaiveDateTime
556+ #[ inline]
557+ fn micros_to_naive ( micros : i64 ) -> Option < NaiveDateTime > {
558+ DateTime :: from_timestamp_micros ( micros) . map ( |dt| dt. naive_utc ( ) )
559+ }
560+
561+ /// Convert NaiveDateTime back to microseconds since epoch
562+ #[ inline]
563+ fn naive_to_micros ( dt : NaiveDateTime ) -> i64 {
564+ dt. and_utc ( ) . timestamp_micros ( )
565+ }
566+
567+ /// Truncate a TimestampNTZ array without any timezone conversion.
568+ /// NTZ values are timezone-independent; we treat the raw microseconds as a naive datetime.
569+ fn timestamp_trunc_ntz < T > (
570+ array : & PrimitiveArray < T > ,
571+ format : String ,
572+ ) -> Result < TimestampMicrosecondArray , SparkError >
573+ where
574+ T : ArrowTemporalType + ArrowNumericType ,
575+ i64 : From < T :: Native > ,
576+ {
577+ let trunc_fn: fn ( NaiveDateTime ) -> Option < NaiveDateTime > = match format. to_uppercase ( ) . as_str ( )
578+ {
579+ "YEAR" | "YYYY" | "YY" => trunc_date_to_year,
580+ "QUARTER" => trunc_date_to_quarter,
581+ "MONTH" | "MON" | "MM" => trunc_date_to_month,
582+ "WEEK" => trunc_date_to_week,
583+ "DAY" | "DD" => trunc_date_to_day,
584+ "HOUR" => trunc_date_to_hour,
585+ "MINUTE" => trunc_date_to_minute,
586+ "SECOND" => trunc_date_to_second,
587+ "MILLISECOND" => trunc_date_to_ms,
588+ "MICROSECOND" => trunc_date_to_microsec,
589+ _ => {
590+ return Err ( SparkError :: Internal ( format ! (
591+ "Unsupported format: {format:?} for function 'timestamp_trunc'"
592+ ) ) )
593+ }
594+ } ;
595+
596+ let result: TimestampMicrosecondArray = array
597+ . iter ( )
598+ . map ( |opt_val| {
599+ opt_val. and_then ( |v| {
600+ let micros: i64 = v. into ( ) ;
601+ micros_to_naive ( micros)
602+ . and_then ( trunc_fn)
603+ . map ( naive_to_micros)
604+ } )
605+ } )
606+ . collect ( ) ;
607+
608+ Ok ( result)
609+ }
610+
611+ /// Truncate a single NTZ value and append to builder
612+ fn timestamp_trunc_ntz_single < F > (
613+ value : Option < i64 > ,
614+ builder : & mut PrimitiveBuilder < TimestampMicrosecondType > ,
615+ op : F ,
616+ ) -> Result < ( ) , SparkError >
617+ where
618+ F : Fn ( NaiveDateTime ) -> Option < NaiveDateTime > ,
619+ {
620+ match value {
621+ Some ( micros) => match micros_to_naive ( micros) . and_then ( op) {
622+ Some ( truncated) => builder. append_value ( naive_to_micros ( truncated) ) ,
623+ None => {
624+ return Err ( SparkError :: Internal (
625+ "Unable to truncate NTZ timestamp" . to_string ( ) ,
626+ ) )
627+ }
628+ } ,
629+ None => builder. append_null ( ) ,
630+ }
631+ Ok ( ( ) )
632+ }
633+
532634pub ( crate ) fn timestamp_trunc < T > (
533635 array : & PrimitiveArray < T > ,
534636 format : String ,
@@ -540,6 +642,10 @@ where
540642 let builder = TimestampMicrosecondBuilder :: with_capacity ( array. len ( ) ) ;
541643 let iter = ArrayIter :: new ( array) ;
542644 match array. data_type ( ) {
645+ DataType :: Timestamp ( TimeUnit :: Microsecond , None ) => {
646+ // TimestampNTZ: operate directly on naive microsecond values without timezone
647+ timestamp_trunc_ntz ( array, format)
648+ }
543649 DataType :: Timestamp ( TimeUnit :: Microsecond , Some ( tz) ) => {
544650 match format. to_uppercase ( ) . as_str ( ) {
545651 "YEAR" | "YYYY" | "YY" => {
@@ -687,6 +793,60 @@ macro_rules! timestamp_trunc_array_fmt_helper {
687793 "lengths of values array and format array must be the same"
688794 ) ;
689795 match $datatype {
796+ DataType :: Timestamp ( TimeUnit :: Microsecond , None ) => {
797+ // TimestampNTZ: operate directly on naive microsecond values
798+ for ( index, val) in iter. enumerate( ) {
799+ let micros_val = val. map( |v| i64 :: from( v) ) ;
800+ let op_result = match $formats. value( index) . to_uppercase( ) . as_str( ) {
801+ "YEAR" | "YYYY" | "YY" => {
802+ timestamp_trunc_ntz_single( micros_val, & mut builder, trunc_date_to_year)
803+ }
804+ "QUARTER" => timestamp_trunc_ntz_single(
805+ micros_val,
806+ & mut builder,
807+ trunc_date_to_quarter,
808+ ) ,
809+ "MONTH" | "MON" | "MM" => timestamp_trunc_ntz_single(
810+ micros_val,
811+ & mut builder,
812+ trunc_date_to_month,
813+ ) ,
814+ "WEEK" => {
815+ timestamp_trunc_ntz_single( micros_val, & mut builder, trunc_date_to_week)
816+ }
817+ "DAY" | "DD" => {
818+ timestamp_trunc_ntz_single( micros_val, & mut builder, trunc_date_to_day)
819+ }
820+ "HOUR" => {
821+ timestamp_trunc_ntz_single( micros_val, & mut builder, trunc_date_to_hour)
822+ }
823+ "MINUTE" => timestamp_trunc_ntz_single(
824+ micros_val,
825+ & mut builder,
826+ trunc_date_to_minute,
827+ ) ,
828+ "SECOND" => timestamp_trunc_ntz_single(
829+ micros_val,
830+ & mut builder,
831+ trunc_date_to_second,
832+ ) ,
833+ "MILLISECOND" => {
834+ timestamp_trunc_ntz_single( micros_val, & mut builder, trunc_date_to_ms)
835+ }
836+ "MICROSECOND" => timestamp_trunc_ntz_single(
837+ micros_val,
838+ & mut builder,
839+ trunc_date_to_microsec,
840+ ) ,
841+ _ => Err ( SparkError :: Internal ( format!(
842+ "Unsupported format: {:?} for function 'timestamp_trunc'" ,
843+ $formats. value( index)
844+ ) ) ) ,
845+ } ;
846+ op_result?
847+ }
848+ Ok ( builder. finish( ) )
849+ }
690850 DataType :: Timestamp ( TimeUnit :: Microsecond , Some ( tz) ) => {
691851 let tz: Tz = tz. parse( ) ?;
692852 for ( index, val) in iter. enumerate( ) {
0 commit comments