Skip to content

Commit 830874b

Browse files
committed
fix
1 parent e2df118 commit 830874b

2 files changed

Lines changed: 15 additions & 23 deletions

File tree

native/spark-expr/src/kernels/temporal.rs

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
//! temporal kernels
1919
2020
use chrono::{
21-
DateTime, Datelike, Duration, LocalResult, NaiveDate, NaiveDateTime, Offset, TimeZone,
22-
Timelike, Utc,
21+
DateTime, Datelike, Duration, LocalResult, NaiveDate, NaiveDateTime, TimeZone, Timelike, Utc,
2322
};
2423

2524
use std::sync::Arc;
@@ -157,29 +156,22 @@ where
157156
}
158157

159158
// Apply the Tz to the Naive Date Time, convert to UTC, and return as microseconds in Unix epoch.
160-
// This function re-interprets the local datetime in the timezone to ensure the correct DST offset
161-
// is used for the target date (not the original date's offset). This is important when truncation
162-
// changes the date to a different DST period (e.g., from December/PST to October/PDT).
163-
//
164-
// Note: For far-future dates (approximately beyond year 2100), chrono-tz may not accurately
165-
// calculate DST transitions, which can result in incorrect offsets. See the compatibility
166-
// guide for more information.
159+
// After truncation the carried UTC offset may be wrong if the truncated time falls in a different
160+
// DST period than the original (e.g., truncating a December/PST timestamp to QUARTER yields
161+
// October 1 which is in PDT). We re-resolve the naive local time through the timezone so that
162+
// chrono picks the correct offset for the target date.
167163
#[inline]
168164
fn as_micros_from_unix_epoch_utc(dt: Option<DateTime<Tz>>) -> i64 {
169165
let dt = dt.unwrap();
170166
let naive = dt.naive_local();
171167
let tz = dt.timezone();
172168

173-
// Re-interpret the local time in the timezone to get the correct DST offset
174-
// for the truncated date. Use noon to avoid DST gaps that occur around midnight.
175-
let noon = naive.date().and_hms_opt(12, 0, 0).unwrap_or(naive);
176-
177-
let offset = match tz.offset_from_local_datetime(&noon) {
178-
LocalResult::Single(off) | LocalResult::Ambiguous(off, _) => off.fix(),
179-
LocalResult::None => return dt.with_timezone(&Utc).timestamp_micros(),
180-
};
181-
182-
(naive - offset).and_utc().timestamp_micros()
169+
match tz.from_local_datetime(&naive) {
170+
LocalResult::Single(resolved) | LocalResult::Ambiguous(resolved, _) => {
171+
resolved.with_timezone(&Utc).timestamp_micros()
172+
}
173+
LocalResult::None => dt.with_timezone(&Utc).timestamp_micros(),
174+
}
183175
}
184176

185177
#[inline]

spark/src/main/scala/org/apache/comet/serde/datetime.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ object CometQuarter extends CometExpressionSerde[Quarter] with CometExprGetDateF
181181
object CometHour extends CometExpressionSerde[Hour] {
182182

183183
override def getSupportLevel(expr: Hour): SupportLevel = {
184-
if (expr.child.dataType.typeName == "timestamp_ntz") {
184+
if (expr.child.dataType == TimestampNTZType) {
185185
Incompatible(
186186
Some(
187187
"Incorrectly applies timezone conversion to TimestampNTZ inputs" +
@@ -219,7 +219,7 @@ object CometHour extends CometExpressionSerde[Hour] {
219219
object CometMinute extends CometExpressionSerde[Minute] {
220220

221221
override def getSupportLevel(expr: Minute): SupportLevel = {
222-
if (expr.child.dataType.typeName == "timestamp_ntz") {
222+
if (expr.child.dataType == TimestampNTZType) {
223223
Incompatible(
224224
Some(
225225
"Incorrectly applies timezone conversion to TimestampNTZ inputs" +
@@ -257,7 +257,7 @@ object CometMinute extends CometExpressionSerde[Minute] {
257257
object CometSecond extends CometExpressionSerde[Second] {
258258

259259
override def getSupportLevel(expr: Second): SupportLevel = {
260-
if (expr.child.dataType.typeName == "timestamp_ntz") {
260+
if (expr.child.dataType == TimestampNTZType) {
261261
Incompatible(
262262
Some(
263263
"Incorrectly applies timezone conversion to TimestampNTZ inputs" +
@@ -297,7 +297,7 @@ object CometUnixTimestamp extends CometExpressionSerde[UnixTimestamp] {
297297
private def isSupportedInputType(expr: UnixTimestamp): Boolean = {
298298
expr.children.head.dataType match {
299299
case TimestampType | DateType => true
300-
case dt if dt.typeName == "timestamp_ntz" => true
300+
case TimestampNTZType => true
301301
case _ => false
302302
}
303303
}

0 commit comments

Comments
 (0)