Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .github/workflows/spark_sql_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,6 @@ jobs:
- {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'auto'}
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'auto'}
- {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto'}
# Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946
exclude:
- config: {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto'}
module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
fail-fast: false
name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}
runs-on: ${{ matrix.os }}
Expand Down
36 changes: 32 additions & 4 deletions dev/diffs/4.0.1.diff
Original file line number Diff line number Diff line change
Expand Up @@ -3903,16 +3903,44 @@ index 52abd248f3a..b4e096cae24 100644
case d: DynamicPruningExpression => d.child
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala
index 4b27082e188..6710c90c789 100644
index 4b27082e188..057b2430872 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala
@@ -147,7 +147,9 @@ class HiveUDFDynamicLoadSuite extends QueryTest with SQLTestUtils with TestHiveS
@@ -17,7 +17,7 @@

package org.apache.spark.sql.hive

-import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.{IgnoreCometSuite, QueryTest, Row}
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -26,7 +26,13 @@ import org.apache.spark.sql.types.{IntegerType, StringType}
import org.apache.spark.util.ArrayImplicits._
import org.apache.spark.util.Utils

-class HiveUDFDynamicLoadSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
+// Comet: mix in IgnoreCometSuite so these tests are reported as ignored when Comet is enabled
+// (ENABLE_COMET=true). The jar these tests depend on (`hive-test-udfs.jar`) is stripped from the
+// Spark 4.0.1 release source tag per the ASF binary-artifact policy, so the tests cannot run in
+// Comet's CI. Ignoring keeps the suite passing without masking real regressions; the upstream
+// tests still run in non-Comet Spark builds that ship the jar on branch-4.0.
+class HiveUDFDynamicLoadSuite extends QueryTest with SQLTestUtils with TestHiveSingleton
+ with IgnoreCometSuite {

case class UDFTestInformation(
identifier: String,
@@ -147,7 +153,13 @@ class HiveUDFDynamicLoadSuite extends QueryTest with SQLTestUtils with TestHiveS

// This jar file should not be placed to the classpath.
val jarPath = "src/test/noclasspath/hive-test-udfs.jar"
- assume(new java.io.File(jarPath).exists)
+ // Comet: hive-test-udfs.jar files has been removed from Apache Spark repository
+ // comment out the following line for now
+ // Comet: the upstream `assume(...)` runs here in the suite constructor (inside this foreach,
+ // before `test(...)` registers a case). When the jar is missing - as it is on the v4.0.1
+ // release tag - `assume` throws TestCanceledException out of `<init>`, which ScalaTest
+ // reports as a suite abort (not a per-test cancel) and fails the whole job. The
+ // IgnoreCometSuite mixin above already reroutes these tests to `ignore` under Comet, so
+ // the jar presence check is unnecessary; comment it out to avoid the constructor-time abort.
+ // assume(new java.io.File(jarPath).exists)
val jarUrl = s"file://${System.getProperty("user.dir")}/$jarPath"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@

package org.apache.spark.sql.comet.shims

import java.io.FileNotFoundException

import scala.util.matching.Regex

import org.apache.spark.QueryContext
import org.apache.spark.SparkException
import org.apache.spark.SparkFileNotFoundException
import org.apache.spark.sql.errors.QueryExecutionErrors
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
Expand Down Expand Up @@ -292,17 +293,13 @@ trait ShimSparkErrorConverter {

case "FileNotFound" =>
val msg = params("message").toString
// Extract file path from native error message and format like Hadoop's
// FileNotFoundException: "File <path> does not exist"
val path = ShimSparkErrorConverter.ObjectLocationPattern
.findFirstMatchIn(msg)
.map(_.group(1))
.getOrElse(msg)
// readCurrentFileNotFoundError was removed in Spark 4.0; construct directly
Some(
new SparkFileNotFoundException(
errorClass = "_LEGACY_ERROR_TEMP_2055",
messageParameters = Map("message" -> s"File $path does not exist")))
QueryExecutionErrors
.fileNotExistError(path, new FileNotFoundException(s"File $path does not exist")))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think Spark 4.0 will use - FAILED_READ_FILE.FILE_NOT_EXIST instead of the legacy error class. Not a blocker (See https://github.com/apache/spark/blob/c241d5ad4a2372bbddc7dd8339987a09f501dc36/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala#L879)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that is what this code change achieves? Maybe I am not understanding something here though


case _ =>
// Unknown error type - return None to trigger fallback
Expand Down
Loading