diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 2775e34808153..808376d005ce2 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -4944,6 +4944,12 @@ ], "sqlState" : "42K0F" }, + "INVALID_TIMESTAMP_LITERAL_PRECISION" : { + "message" : [ + "The timestamp literal has more than 9 fractional-second digits. The maximum supported fractional-second precision of a timestamp literal is 9 (nanoseconds)." + ], + "sqlState" : "22023" + }, "INVALID_TIMESTAMP_PRECISION" : { "message" : [ "The seconds precision of is invalid. Expected an integer in [7, 9], or parameterless for precision <= 6." diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index d7200715f9374..e263d078e96dc 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -544,6 +544,35 @@ trait SparkDateTimeUtils { } } + /** + * Returns the number of fractional-second digits in a timestamp/time string, i.e. the count of + * decimal digits immediately following the first `.` (0 if there is no fractional part). In a + * well-formed timestamp/time string the only `.` is the one that introduces the seconds + * fraction, so this is sufficient to derive the precision `p` of a typed literal per the ANSI + * SQL rule (the precision of a timestamp literal is the number of digits in its + * ``). Digits beyond the fractional run (e.g. a trailing time zone) are not + * counted. + * + * This is intentionally a lightweight pre-parse digit counter: it does not validate that `s` is + * a well-formed timestamp. Callers use the returned count only to choose a parse path (the + * digit count routes between the microsecond path, the nanosecond path, and the ">9 digits" + * error); each of those paths then re-parses and validates the whole string, so a malformed + * input such as `"abcd.1234"` is still rejected downstream by the chosen parser. Consequently + * the result is meaningful only for strings that are otherwise valid timestamp/time literals. + */ + def fractionalSecondsDigits(s: String): Int = { + val dot = s.indexOf('.') + if (dot < 0) { + 0 + } else { + var i = dot + 1 + while (i < s.length && s.charAt(i) >= '0' && s.charAt(i) <= '9') { + i += 1 + } + i - (dot + 1) + } + } + /** * Trims and parses a given UTF8 timestamp string to the corresponding timestamp segments, time * zone id and whether it is just time without a date. value. The return type is [[Option]] in diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 4879b8ba081ff..1cc050f488f64 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -360,6 +360,15 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase { ctx) } + def timestampLiteralPrecisionExceedsMaxError( + value: String, + ctx: TypeConstructorContext): Throwable = { + new ParseException( + errorClass = "INVALID_TIMESTAMP_LITERAL_PRECISION", + messageParameters = Map("value" -> toSQLValue(value)), + ctx) + } + def literalValueTypeUnsupportedError( unsupportedType: String, supportedTypes: Seq[String], diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index ccca53f351e8b..cf6c63409a553 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -46,7 +46,7 @@ import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin} import org.apache.spark.sql.catalyst.trees.TreePattern.PARAMETER import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, CollationFactory, DateTimeUtils, EvaluateUnresolvedInlineTable, IntervalUtils} -import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTime, stringToTimestamp, stringToTimestampWithoutTimeZone} +import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, fractionalSecondsDigits, getZoneId, stringToDate, stringToTime, stringToTimestamp, stringToTimestampLTZNanos, stringToTimestampNTZNanos, stringToTimestampWithoutTimeZone} import org.apache.spark.sql.connector.catalog.{CatalogV2Util, ChangelogContext, PathElement, SupportsNamespaces, TableCatalog, TableWritePrivilege} import org.apache.spark.sql.connector.catalog.ChangelogRange.{TimestampRange, UnboundedRange, VersionRange} import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition @@ -4062,6 +4062,39 @@ class AstBuilder extends DataTypeAstBuilder specialTs.getOrElse(toLiteral(stringToTimestamp(_, zoneId), TimestampType)) } + // ANSI SQL (ISO/IEC 9075-2, Subclause 5.3, Syntax Rule 27): the fractional-seconds precision + // of a typed timestamp literal is the number of digits in its ``. When the + // nanosecond preview is enabled and the literal carries 7-9 fractional digits, build a + // nanosecond-capable literal with precision `p` equal to that digit count. Literals with <= 6 + // fractional digits keep the microsecond behavior; more than 9 digits is rejected. + def constructTimestampNTZNanosLiteral(p: Int): Literal = + toLiteral(stringToTimestampNTZNanos(_, p), TimestampNTZNanosType(p)) + + def constructTimestampLTZNanosLiteral(p: Int): Literal = { + val zoneId = getZoneId(conf.sessionLocalTimeZone) + toLiteral(stringToTimestampLTZNanos(_, p, zoneId), TimestampLTZNanosType(p)) + } + + // Returns Some(literal) when the nanos preview flag is on and the literal has 7-9 fractional + // digits; throws when there are more than 9; returns None (fall back to the micro path) when + // the flag is off or there are <= 6 fractional digits. + def nanosLiteralOpt(construct: Int => Literal): Option[Literal] = { + if (!SQLConf.get.timestampNanosTypesEnabled) { + None + } else { + val p = fractionalSecondsDigits(value) + // With the flag off, >9 fractional digits silently truncate to microseconds via + // the fall-through path. Strict validation is intentionally flag-gated. + if (p > TimestampNTZNanosType.MAX_PRECISION) { + throw QueryParsingErrors.timestampLiteralPrecisionExceedsMaxError(value, ctx) + } else if (p >= TimestampNTZNanosType.MIN_PRECISION) { + Some(construct(p)) + } else { + None + } + } + } + valueType match { case DATE => val zoneId = getZoneId(conf.sessionLocalTimeZone) @@ -4069,11 +4102,14 @@ class AstBuilder extends DataTypeAstBuilder specialDate.getOrElse(toLiteral(stringToDate, DateType)) case TIME => toLiteral(stringToTime, TimeType()) case TIMESTAMP_NTZ => - convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone)) - .map(Literal(_, TimestampNTZType)) - .getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)) + nanosLiteralOpt(constructTimestampNTZNanosLiteral).getOrElse { + convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone)) + .map(Literal(_, TimestampNTZType)) + .getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)) + } case TIMESTAMP_LTZ => - constructTimestampLTZLiteral(value) + nanosLiteralOpt(constructTimestampLTZNanosLiteral) + .getOrElse(constructTimestampLTZLiteral(value)) case TIMESTAMP => SQLConf.get.timestampType match { case TimestampNTZType => @@ -4085,14 +4121,17 @@ class AstBuilder extends DataTypeAstBuilder // If the input string contains time zone part, return a timestamp with local time // zone literal. if (containsTimeZonePart) { - constructTimestampLTZLiteral(value) + nanosLiteralOpt(constructTimestampLTZNanosLiteral) + .getOrElse(constructTimestampLTZLiteral(value)) } else { - toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType) + nanosLiteralOpt(constructTimestampNTZNanosLiteral) + .getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)) } } case TimestampType => - constructTimestampLTZLiteral(value) + nanosLiteralOpt(constructTimestampLTZNanosLiteral) + .getOrElse(constructTimestampLTZLiteral(value)) } case INTERVAL => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 08d64d2db1c56..758e7eaf21582 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.parser import java.sql.{Date, Timestamp} -import java.time.{Duration, LocalDateTime, LocalTime, Period} +import java.time.{Duration, LocalDateTime, LocalTime, Period, ZoneOffset} import java.util.concurrent.TimeUnit import scala.language.implicitConversions @@ -1184,6 +1184,98 @@ class ExpressionParserSuite extends AnalysisTest { } } + test("SPARK-57250: nanosecond timestamp typed literals") { + import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._ + + // Expected NTZ / LTZ nanos literals from readable components. The session time zone is fixed + // to UTC below so the wall-clock fields of the LTZ literal map to the same instant. + def ntz(p: Int, y: Int, mo: Int, d: Int, h: Int, mi: Int, s: Int, nanoOfSec: Int): Literal = + Literal(localDateTimeToNanosVal(timestampNTZ(y, mo, d, h, mi, s, nanoOfSec)), + TimestampNTZNanosType(p)) + def ltz(p: Int, y: Int, mo: Int, d: Int, h: Int, mi: Int, s: Int, nanoOfSec: Int): Literal = + Literal(instantToNanosVal(timestampLTZ(y, mo, d, h, mi, s, nanoOfSec)), + TimestampLTZNanosType(p)) + + withSQLConf( + SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true", + SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") { + // Precision is derived from the number of fractional digits (ANSI SQL Subclause 5.3 SR 27). + assertEqual("TIMESTAMP_NTZ '2020-01-01 00:00:00.1234567'", + ntz(7, 2020, 1, 1, 0, 0, 0, 123456700)) + assertEqual("TIMESTAMP_NTZ '2020-01-01 00:00:00.12345678'", + ntz(8, 2020, 1, 1, 0, 0, 0, 123456780)) + assertEqual("TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789'", + ntz(9, 2020, 1, 1, 0, 0, 0, 123456789)) + + // TIMESTAMP_LTZ: value interpreted in the session time zone (UTC here). + assertEqual("TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789'", + ltz(9, 2020, 1, 1, 0, 0, 0, 123456789)) + + // TIMESTAMP_LTZ with an explicit zone offset in the literal: the offset takes precedence + // over the session timezone. '2020-01-01 00:00:00.123456789+05:00' is the instant + // 2019-12-31 19:00:00.123456789 UTC. + assertEqual("TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789+05:00'", + Literal( + instantToNanosVal(timestampLTZ(2020, 1, 1, 0, 0, 0, 123456789, ZoneOffset.of("+05:00"))), + TimestampLTZNanosType(9))) + + // Bare TIMESTAMP keyword resolves to LTZ nanos by default (TIMESTAMP_TYPE = LTZ). + assertEqual("TIMESTAMP '2020-01-01 00:00:00.123456789'", + ltz(9, 2020, 1, 1, 0, 0, 0, 123456789)) + + // Under the NTZ default, bare TIMESTAMP resolves to NTZ nanos, unless the string carries a + // time-zone offset, which flips it to LTZ nanos. + withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> TimestampTypes.TIMESTAMP_NTZ.toString) { + assertEqual("TIMESTAMP '2020-01-01 00:00:00.123456789'", + ntz(9, 2020, 1, 1, 0, 0, 0, 123456789)) + assertEqual("TIMESTAMP '2020-01-01 00:00:00.123456789+00:00'", + ltz(9, 2020, 1, 1, 0, 0, 0, 123456789)) + } + + // Boundary values: nanosWithinMicro 0 and 999; pre-epoch (1582) and the max year (9999). + assertEqual("TIMESTAMP_NTZ '1970-01-01 00:00:00.000000000'", + ntz(9, 1970, 1, 1, 0, 0, 0, 0)) + assertEqual("TIMESTAMP_NTZ '1970-01-01 00:00:00.000000999'", + ntz(9, 1970, 1, 1, 0, 0, 0, 999)) + assertEqual("TIMESTAMP_NTZ '1582-10-15 23:59:59.123456789'", + ntz(9, 1582, 10, 15, 23, 59, 59, 123456789)) + assertEqual("TIMESTAMP_NTZ '9999-12-31 23:59:59.999999999'", + ntz(9, 9999, 12, 31, 23, 59, 59, 999999999)) + + // Exactly 6 fractional digits stays a microsecond literal. + assertEqual("TIMESTAMP_NTZ '2020-01-01 00:00:00.123456'", + Literal(LocalDateTime.parse("2020-01-01T00:00:00.123456"))) + + // More than 9 fractional digits is rejected. + checkError( + exception = parseException("TIMESTAMP_NTZ '2020-01-01 00:00:00.1234567890'"), + condition = "INVALID_TIMESTAMP_LITERAL_PRECISION", + parameters = Map("value" -> "'2020-01-01 00:00:00.1234567890'"), + context = ExpectedContext( + fragment = "TIMESTAMP_NTZ '2020-01-01 00:00:00.1234567890'", + start = 0, + stop = 45)) + + // Special values have no fractional part, so nanosLiteralOpt returns None and the + // existing special-value path handles them, producing plain microsecond literals. + assertEqual("TIMESTAMP_NTZ 'epoch'", Literal(0L, TimestampNTZType)) + } + + // With the preview flag off, 7-9 digit literals narrow to microseconds (legacy behavior). + withSQLConf( + SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "false", + SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") { + assertEqual("TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789'", + Literal(LocalDateTime.parse("2020-01-01T00:00:00.123456"))) + + // More than 9 fractional digits is NOT rejected when the flag is off; the strict + // INVALID_TIMESTAMP_LITERAL_PRECISION validation is intentionally flag-gated, so the + // literal silently narrows to microseconds via the legacy fall-through path. + assertEqual("TIMESTAMP_NTZ '2020-01-01 00:00:00.1234567890'", + Literal(LocalDateTime.parse("2020-01-01T00:00:00.123456"))) + } + } + test("date literals") { DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 47eb4a1e3e3cb..531a8190f7af7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -412,6 +412,35 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { None) } + test("SPARK-57250: fractionalSecondsDigits counts digits after the first dot") { + // No fractional part. + assert(fractionalSecondsDigits("2020-01-01 00:00:00") === 0) + assert(fractionalSecondsDigits("2020-01-01") === 0) + assert(fractionalSecondsDigits("") === 0) + + // A trailing dot with no digits. + assert(fractionalSecondsDigits("2020-01-01 00:00:00.") === 0) + + // Boundary digit counts used by the literal precision routing. + assert(fractionalSecondsDigits("2020-01-01 00:00:00.1") === 1) + assert(fractionalSecondsDigits("2020-01-01 00:00:00.123456") === 6) + assert(fractionalSecondsDigits("2020-01-01 00:00:00.1234567") === 7) + assert(fractionalSecondsDigits("2020-01-01 00:00:00.123456789") === 9) + assert(fractionalSecondsDigits("2020-01-01 00:00:00.1234567890") === 10) + + // Counting stops at the first non-digit, e.g. a trailing time zone or whitespace. + assert(fractionalSecondsDigits("2020-01-01 00:00:00.123456789+08:00") === 9) + assert(fractionalSecondsDigits("2020-01-01 00:00:00.123 ") === 3) + assert(fractionalSecondsDigits("12:30:45.5Z") === 1) + + // Only the first dot introduces the fraction; later dots are not counted. + assert(fractionalSecondsDigits("2020-01-01 00:00:00.12.34") === 2) + + // The helper does not validate the rest of the string; it just counts the fractional run. + assert(fractionalSecondsDigits("abcd.1234") === 4) + assert(fractionalSecondsDigits(".789") === 3) + } + test("SPARK-15379: special invalid date string") { // Test stringToDate assert(toDate("2015-02-29 00:00:00").isEmpty)