Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -4944,6 +4944,12 @@
],
"sqlState" : "42K0F"
},
"INVALID_TIMESTAMP_LITERAL_PRECISION" : {
"message" : [
"The timestamp literal <value> has more than 9 fractional-second digits. The maximum supported fractional-second precision of a timestamp literal is 9 (nanoseconds)."
],
"sqlState" : "22023"
},
"INVALID_TIMESTAMP_PRECISION" : {
"message" : [
"The seconds precision <precision> of <type> is invalid. Expected an integer in [7, 9], or parameterless <type> for precision <= 6."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,28 @@ trait SparkDateTimeUtils {
}
}

/**
* Returns the number of fractional-second digits in a timestamp/time string, i.e. the count of
* decimal digits immediately following the first `.` (0 if there is no fractional part). In a
* well-formed timestamp/time string the only `.` is the one that introduces the seconds
* fraction, so this is sufficient to derive the precision `p` of a typed literal per the ANSI
* SQL rule (the precision of a timestamp literal is the number of digits in its
* `<seconds fraction>`). Digits beyond the fractional run (e.g. a trailing time zone) are not
* counted.
*/
def fractionalSecondsDigits(s: String): Int = {
Comment thread
uros-b marked this conversation as resolved.
val dot = s.indexOf('.')
if (dot < 0) {
0
} else {
var i = dot + 1
while (i < s.length && s.charAt(i) >= '0' && s.charAt(i) <= '9') {
i += 1
}
i - (dot + 1)
}
}

/**
* Trims and parses a given UTF8 timestamp string to the corresponding timestamp segments, time
* zone id and whether it is just time without a date. value. The return type is [[Option]] in
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,15 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
ctx)
}

def timestampLiteralPrecisionExceedsMaxError(
value: String,
ctx: TypeConstructorContext): Throwable = {
new ParseException(
errorClass = "INVALID_TIMESTAMP_LITERAL_PRECISION",
messageParameters = Map("value" -> toSQLValue(value)),
ctx)
}

def literalValueTypeUnsupportedError(
unsupportedType: String,
supportedTypes: Seq[String],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
import org.apache.spark.sql.catalyst.trees.TreePattern.PARAMETER
import org.apache.spark.sql.catalyst.types.DataTypeUtils
import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, CollationFactory, DateTimeUtils, EvaluateUnresolvedInlineTable, IntervalUtils}
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTime, stringToTimestamp, stringToTimestampWithoutTimeZone}
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, fractionalSecondsDigits, getZoneId, stringToDate, stringToTime, stringToTimestamp, stringToTimestampLTZNanos, stringToTimestampNTZNanos, stringToTimestampWithoutTimeZone}
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, ChangelogContext, PathElement, SupportsNamespaces, TableCatalog, TableWritePrivilege}
import org.apache.spark.sql.connector.catalog.ChangelogRange.{TimestampRange, UnboundedRange, VersionRange}
import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
Expand Down Expand Up @@ -4062,18 +4062,54 @@ class AstBuilder extends DataTypeAstBuilder
specialTs.getOrElse(toLiteral(stringToTimestamp(_, zoneId), TimestampType))
}

// ANSI SQL (ISO/IEC 9075-2, Subclause 5.3, Syntax Rule 27): the fractional-seconds precision
// of a typed timestamp literal is the number of digits in its `<seconds fraction>`. When the
// nanosecond preview is enabled and the literal carries 7-9 fractional digits, build a
// nanosecond-capable literal with precision `p` equal to that digit count. Literals with <= 6
// fractional digits keep the microsecond behavior; more than 9 digits is rejected.
def constructTimestampNTZNanosLiteral(p: Int): Literal =
toLiteral(stringToTimestampNTZNanos(_, p), TimestampNTZNanosType(p))

def constructTimestampLTZNanosLiteral(p: Int): Literal = {
val zoneId = getZoneId(conf.sessionLocalTimeZone)
toLiteral(stringToTimestampLTZNanos(_, p, zoneId), TimestampLTZNanosType(p))
}

// Returns Some(literal) when the nanos preview flag is on and the literal has 7-9 fractional
// digits; throws when there are more than 9; returns None (fall back to the micro path) when
// the flag is off or there are <= 6 fractional digits.
def nanosLiteralOpt(construct: Int => Literal): Option[Literal] = {
if (!SQLConf.get.timestampNanosTypesEnabled) {
None
} else {
val p = fractionalSecondsDigits(value)
// With the flag off, >9 fractional digits silently truncate to microseconds via
// the fall-through path. Strict validation is intentionally flag-gated.
if (p > TimestampNTZNanosType.MAX_PRECISION) {
throw QueryParsingErrors.timestampLiteralPrecisionExceedsMaxError(value, ctx)
} else if (p >= TimestampNTZNanosType.MIN_PRECISION) {
Some(construct(p))
} else {
None
}
}
}

valueType match {
case DATE =>
val zoneId = getZoneId(conf.sessionLocalTimeZone)
val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, DateType))
specialDate.getOrElse(toLiteral(stringToDate, DateType))
case TIME => toLiteral(stringToTime, TimeType())
case TIMESTAMP_NTZ =>
convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone))
.map(Literal(_, TimestampNTZType))
.getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType))
nanosLiteralOpt(constructTimestampNTZNanosLiteral).getOrElse {
convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone))
.map(Literal(_, TimestampNTZType))
.getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType))
}
case TIMESTAMP_LTZ =>
constructTimestampLTZLiteral(value)
nanosLiteralOpt(constructTimestampLTZNanosLiteral)
.getOrElse(constructTimestampLTZLiteral(value))
case TIMESTAMP =>
SQLConf.get.timestampType match {
case TimestampNTZType =>
Expand All @@ -4085,14 +4121,17 @@ class AstBuilder extends DataTypeAstBuilder
// If the input string contains time zone part, return a timestamp with local time
// zone literal.
if (containsTimeZonePart) {
constructTimestampLTZLiteral(value)
nanosLiteralOpt(constructTimestampLTZNanosLiteral)
.getOrElse(constructTimestampLTZLiteral(value))
} else {
toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)
nanosLiteralOpt(constructTimestampNTZNanosLiteral)
.getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType))
}
}

case TimestampType =>
constructTimestampLTZLiteral(value)
nanosLiteralOpt(constructTimestampLTZNanosLiteral)
.getOrElse(constructTimestampLTZLiteral(value))
}

case INTERVAL =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package org.apache.spark.sql.catalyst.parser

import java.sql.{Date, Timestamp}
import java.time.{Duration, LocalDateTime, LocalTime, Period}
import java.time.{Duration, LocalDateTime, LocalTime, Period, ZoneOffset}
import java.util.concurrent.TimeUnit

import scala.language.implicitConversions
Expand Down Expand Up @@ -1184,6 +1184,92 @@ class ExpressionParserSuite extends AnalysisTest {
}
}

test("SPARK-57250: nanosecond timestamp typed literals") {
import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._

// Expected NTZ / LTZ nanos literals from readable components. The session time zone is fixed
// to UTC below so the wall-clock fields of the LTZ literal map to the same instant.
def ntz(p: Int, y: Int, mo: Int, d: Int, h: Int, mi: Int, s: Int, nanoOfSec: Int): Literal =
Literal(localDateTimeToNanosVal(timestampNTZ(y, mo, d, h, mi, s, nanoOfSec)),
TimestampNTZNanosType(p))
def ltz(p: Int, y: Int, mo: Int, d: Int, h: Int, mi: Int, s: Int, nanoOfSec: Int): Literal =
Literal(instantToNanosVal(timestampLTZ(y, mo, d, h, mi, s, nanoOfSec)),
TimestampLTZNanosType(p))

withSQLConf(
SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true",
SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
// Precision is derived from the number of fractional digits (ANSI SQL Subclause 5.3 SR 27).
assertEqual("TIMESTAMP_NTZ '2020-01-01 00:00:00.1234567'",
ntz(7, 2020, 1, 1, 0, 0, 0, 123456700))
assertEqual("TIMESTAMP_NTZ '2020-01-01 00:00:00.12345678'",
ntz(8, 2020, 1, 1, 0, 0, 0, 123456780))
assertEqual("TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789'",
ntz(9, 2020, 1, 1, 0, 0, 0, 123456789))

// TIMESTAMP_LTZ: value interpreted in the session time zone (UTC here).
assertEqual("TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789'",
ltz(9, 2020, 1, 1, 0, 0, 0, 123456789))

// TIMESTAMP_LTZ with an explicit zone offset in the literal: the offset takes precedence
// over the session timezone. '2020-01-01 00:00:00.123456789+05:00' is the instant
// 2019-12-31 19:00:00.123456789 UTC.
assertEqual("TIMESTAMP_LTZ '2020-01-01 00:00:00.123456789+05:00'",
Literal(
instantToNanosVal(timestampLTZ(2020, 1, 1, 0, 0, 0, 123456789, ZoneOffset.of("+05:00"))),
TimestampLTZNanosType(9)))

// Bare TIMESTAMP keyword resolves to LTZ nanos by default (TIMESTAMP_TYPE = LTZ).
assertEqual("TIMESTAMP '2020-01-01 00:00:00.123456789'",
ltz(9, 2020, 1, 1, 0, 0, 0, 123456789))

// Under the NTZ default, bare TIMESTAMP resolves to NTZ nanos, unless the string carries a
// time-zone offset, which flips it to LTZ nanos.
withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> TimestampTypes.TIMESTAMP_NTZ.toString) {
assertEqual("TIMESTAMP '2020-01-01 00:00:00.123456789'",
ntz(9, 2020, 1, 1, 0, 0, 0, 123456789))
assertEqual("TIMESTAMP '2020-01-01 00:00:00.123456789+00:00'",
ltz(9, 2020, 1, 1, 0, 0, 0, 123456789))
}

// Boundary values: nanosWithinMicro 0 and 999; pre-epoch (1582) and the max year (9999).
assertEqual("TIMESTAMP_NTZ '1970-01-01 00:00:00.000000000'",
ntz(9, 1970, 1, 1, 0, 0, 0, 0))
assertEqual("TIMESTAMP_NTZ '1970-01-01 00:00:00.000000999'",
ntz(9, 1970, 1, 1, 0, 0, 0, 999))
assertEqual("TIMESTAMP_NTZ '1582-10-15 23:59:59.123456789'",
ntz(9, 1582, 10, 15, 23, 59, 59, 123456789))
assertEqual("TIMESTAMP_NTZ '9999-12-31 23:59:59.999999999'",
ntz(9, 9999, 12, 31, 23, 59, 59, 999999999))

// Exactly 6 fractional digits stays a microsecond literal.
assertEqual("TIMESTAMP_NTZ '2020-01-01 00:00:00.123456'",
Literal(LocalDateTime.parse("2020-01-01T00:00:00.123456")))

// More than 9 fractional digits is rejected.
checkError(
exception = parseException("TIMESTAMP_NTZ '2020-01-01 00:00:00.1234567890'"),
condition = "INVALID_TIMESTAMP_LITERAL_PRECISION",
parameters = Map("value" -> "'2020-01-01 00:00:00.1234567890'"),
context = ExpectedContext(
fragment = "TIMESTAMP_NTZ '2020-01-01 00:00:00.1234567890'",
start = 0,
stop = 45))

// Special values have no fractional part, so nanosLiteralOpt returns None and the
// existing special-value path handles them, producing plain microsecond literals.
assertEqual("TIMESTAMP_NTZ 'epoch'", Literal(0L, TimestampNTZType))
}

// With the preview flag off, 7-9 digit literals narrow to microseconds (legacy behavior).
withSQLConf(
SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "false",
SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
assertEqual("TIMESTAMP_NTZ '2020-01-01 00:00:00.123456789'",
Literal(LocalDateTime.parse("2020-01-01T00:00:00.123456")))
}
}

test("date literals") {
DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
Expand Down