diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml index 44780a42bc..00d80a1b91 100644 --- a/.github/workflows/spark_sql_test.yml +++ b/.github/workflows/spark_sql_test.yml @@ -137,10 +137,6 @@ jobs: - {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'auto'} - {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'auto'} - {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto'} - # Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946 - exclude: - - config: {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto'} - module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"} fail-fast: false name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }} runs-on: ${{ matrix.os }} diff --git a/dev/diffs/4.0.1.diff b/dev/diffs/4.0.1.diff index 700cb148b2..2d542aa354 100644 --- a/dev/diffs/4.0.1.diff +++ b/dev/diffs/4.0.1.diff @@ -3903,16 +3903,44 @@ index 52abd248f3a..b4e096cae24 100644 case d: DynamicPruningExpression => d.child } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala -index 4b27082e188..6710c90c789 100644 +index 4b27082e188..057b2430872 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala -@@ -147,7 +147,9 @@ class HiveUDFDynamicLoadSuite extends QueryTest with SQLTestUtils with TestHiveS +@@ -17,7 +17,7 @@ + + package org.apache.spark.sql.hive + +-import org.apache.spark.sql.{QueryTest, Row} ++import org.apache.spark.sql.{IgnoreCometSuite, QueryTest, Row} + import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression} + import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper + import org.apache.spark.sql.hive.test.TestHiveSingleton +@@ -26,7 +26,13 @@ import org.apache.spark.sql.types.{IntegerType, StringType} + import org.apache.spark.util.ArrayImplicits._ + import org.apache.spark.util.Utils + +-class HiveUDFDynamicLoadSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { ++// Comet: mix in IgnoreCometSuite so these tests are reported as ignored when Comet is enabled ++// (ENABLE_COMET=true). The jar these tests depend on (`hive-test-udfs.jar`) is stripped from the ++// Spark 4.0.1 release source tag per the ASF binary-artifact policy, so the tests cannot run in ++// Comet's CI. Ignoring keeps the suite passing without masking real regressions; the upstream ++// tests still run in non-Comet Spark builds that ship the jar on branch-4.0. ++class HiveUDFDynamicLoadSuite extends QueryTest with SQLTestUtils with TestHiveSingleton ++ with IgnoreCometSuite { + + case class UDFTestInformation( + identifier: String, +@@ -147,7 +153,13 @@ class HiveUDFDynamicLoadSuite extends QueryTest with SQLTestUtils with TestHiveS // This jar file should not be placed to the classpath. val jarPath = "src/test/noclasspath/hive-test-udfs.jar" - assume(new java.io.File(jarPath).exists) -+ // Comet: hive-test-udfs.jar files has been removed from Apache Spark repository -+ // comment out the following line for now ++ // Comet: the upstream `assume(...)` runs here in the suite constructor (inside this foreach, ++ // before `test(...)` registers a case). When the jar is missing - as it is on the v4.0.1 ++ // release tag - `assume` throws TestCanceledException out of ``, which ScalaTest ++ // reports as a suite abort (not a per-test cancel) and fails the whole job. The ++ // IgnoreCometSuite mixin above already reroutes these tests to `ignore` under Comet, so ++ // the jar presence check is unnecessary; comment it out to avoid the constructor-time abort. + // assume(new java.io.File(jarPath).exists) val jarUrl = s"file://${System.getProperty("user.dir")}/$jarPath" diff --git a/spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimSparkErrorConverter.scala b/spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimSparkErrorConverter.scala index fc7d8277ed..46bdd2ec03 100644 --- a/spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimSparkErrorConverter.scala +++ b/spark/src/main/spark-4.0/org/apache/spark/sql/comet/shims/ShimSparkErrorConverter.scala @@ -19,11 +19,12 @@ package org.apache.spark.sql.comet.shims +import java.io.FileNotFoundException + import scala.util.matching.Regex import org.apache.spark.QueryContext import org.apache.spark.SparkException -import org.apache.spark.SparkFileNotFoundException import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -292,17 +293,13 @@ trait ShimSparkErrorConverter { case "FileNotFound" => val msg = params("message").toString - // Extract file path from native error message and format like Hadoop's - // FileNotFoundException: "File does not exist" val path = ShimSparkErrorConverter.ObjectLocationPattern .findFirstMatchIn(msg) .map(_.group(1)) .getOrElse(msg) - // readCurrentFileNotFoundError was removed in Spark 4.0; construct directly Some( - new SparkFileNotFoundException( - errorClass = "_LEGACY_ERROR_TEMP_2055", - messageParameters = Map("message" -> s"File $path does not exist"))) + QueryExecutionErrors + .fileNotExistError(path, new FileNotFoundException(s"File $path does not exist"))) case _ => // Unknown error type - return None to trigger fallback