Skip to content

Commit ec42eb9

Browse files
andygroveclaude
andcommitted
fix: unignore input_file_name Spark SQL tests for native_datafusion
The native_datafusion scan now correctly falls back to Spark's FileSourceScanExec when metadata columns (like input_file_name) are present, so the 3 input_file_name tests no longer need to be ignored. For ExtractPythonUDFsSuite, the issue was that the test's collect pattern didn't match CometNativeScanExec. Fixed by adding CometNativeScanExec to the collect and dataFilters match blocks. Closes #3312 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 599af33 commit ec42eb9

1 file changed

Lines changed: 9 additions & 120 deletions

File tree

dev/diffs/3.5.8.diff

Lines changed: 9 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -238,20 +238,6 @@ index e5494726695..00937f025c2 100644
238238
}
239239

240240
test("A cached table preserves the partitioning and ordering of its cached SparkPlan") {
241-
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
242-
index 9e8d77c53f3..855e3ada7d1 100644
243-
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
244-
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
245-
@@ -790,7 +790,8 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
246-
}
247-
}
248-
249-
- test("input_file_name, input_file_block_start, input_file_block_length - FileScanRDD") {
250-
+ test("input_file_name, input_file_block_start, input_file_block_length - FileScanRDD",
251-
+ IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3312")) {
252-
withTempPath { dir =>
253-
val data = sparkContext.parallelize(0 to 10).toDF("id")
254-
data.write.parquet(dir.getCanonicalPath)
255241
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
256242
index 6f3090d8908..c08a60fb0c2 100644
257243
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -588,57 +574,6 @@ index 93275487f29..510e3087e0f 100644
588574
}.flatten
589575
assert(filters.contains(GreaterThan(scan.logicalPlan.output.head, Literal(5L))))
590576
}
591-
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IgnoreComet.scala b/sql/core/src/test/scala/org/apache/spark/sql/IgnoreComet.scala
592-
new file mode 100644
593-
index 00000000000..1ee842b6f62
594-
--- /dev/null
595-
+++ b/sql/core/src/test/scala/org/apache/spark/sql/IgnoreComet.scala
596-
@@ -0,0 +1,45 @@
597-
+/*
598-
+ * Licensed to the Apache Software Foundation (ASF) under one or more
599-
+ * contributor license agreements. See the NOTICE file distributed with
600-
+ * this work for additional information regarding copyright ownership.
601-
+ * The ASF licenses this file to You under the Apache License, Version 2.0
602-
+ * (the "License"); you may not use this file except in compliance with
603-
+ * the License. You may obtain a copy of the License at
604-
+ *
605-
+ * http://www.apache.org/licenses/LICENSE-2.0
606-
+ *
607-
+ * Unless required by applicable law or agreed to in writing, software
608-
+ * distributed under the License is distributed on an "AS IS" BASIS,
609-
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
610-
+ * See the License for the specific language governing permissions and
611-
+ * limitations under the License.
612-
+ */
613-
+
614-
+package org.apache.spark.sql
615-
+
616-
+import org.scalactic.source.Position
617-
+import org.scalatest.Tag
618-
+
619-
+import org.apache.spark.sql.test.SQLTestUtils
620-
+
621-
+/**
622-
+ * Tests with this tag will be ignored when Comet is enabled (e.g., via `ENABLE_COMET`).
623-
+ */
624-
+case class IgnoreComet(reason: String) extends Tag("DisableComet")
625-
+case class IgnoreCometNativeIcebergCompat(reason: String) extends Tag("DisableComet")
626-
+case class IgnoreCometNativeDataFusion(reason: String) extends Tag("DisableComet")
627-
+case class IgnoreCometNativeScan(reason: String) extends Tag("DisableComet")
628-
+
629-
+/**
630-
+ * Helper trait that disables Comet for all tests regardless of default config values.
631-
+ */
632-
+trait IgnoreCometSuite extends SQLTestUtils {
633-
+ override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit
634-
+ pos: Position): Unit = {
635-
+ if (isCometEnabled) {
636-
+ ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun)
637-
+ } else {
638-
+ super.test(testName, testTags: _*)(testFun)
639-
+ }
640-
+ }
641-
+}
642577
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
643578
index 7af826583bd..3c3def1eb67 100644
644579
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
@@ -1084,20 +1019,6 @@ index 04702201f82..5ee11f83ecf 100644
10841019
}
10851020
assert(exchanges.size === 1)
10861021
}
1087-
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
1088-
index 9f8e979e3fb..3bc9dab8023 100644
1089-
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
1090-
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
1091-
@@ -87,7 +87,8 @@ class UDFSuite extends QueryTest with SharedSparkSession {
1092-
spark.catalog.dropTempView("tmp_table")
1093-
}
1094-
1095-
- test("SPARK-8005 input_file_name") {
1096-
+ test("SPARK-8005 input_file_name",
1097-
+ IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3312")) {
1098-
withTempPath { dir =>
1099-
val data = sparkContext.parallelize(0 to 10, 2).toDF("id")
1100-
data.write.parquet(dir.getCanonicalPath)
11011022
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
11021023
index d269290e616..13726a31e07 100644
11031024
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
@@ -2504,42 +2425,32 @@ index 5cdbdc27b32..307fba16578 100644
25042425
spark.range(10).selectExpr("id", "id % 3 as p")
25052426
.write.partitionBy("p").saveAsTable("testDataForScan")
25062427
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
2507-
index 0ab8691801d..7b81f3a8f6d 100644
2428+
index 0ab8691801d..b18a5bea944 100644
25082429
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
25092430
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
2510-
@@ -17,7 +17,9 @@
2511-
2431+
@@ -18,6 +18,7 @@
25122432
package org.apache.spark.sql.execution.python
25132433

2514-
+import org.apache.spark.sql.IgnoreCometNativeDataFusion
25152434
import org.apache.spark.sql.catalyst.plans.logical.{ArrowEvalPython, BatchEvalPython, Limit, LocalLimit}
25162435
+import org.apache.spark.sql.comet._
25172436
import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan, SparkPlanTest}
25182437
import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
25192438
import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
2520-
@@ -93,7 +95,8 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
2521-
assert(arrowEvalNodes.size == 2)
2522-
}
2523-
2524-
- test("Python UDF should not break column pruning/filter pushdown -- Parquet V1") {
2525-
+ test("Python UDF should not break column pruning/filter pushdown -- Parquet V1",
2526-
+ IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3312")) {
2527-
withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "parquet") {
2528-
withTempPath { f =>
2529-
spark.range(10).select($"id".as("a"), $"id".as("b"))
2530-
@@ -108,6 +111,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
2439+
@@ -108,6 +109,8 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
25312440

25322441
val scanNodes = query.queryExecution.executedPlan.collect {
25332442
case scan: FileSourceScanExec => scan
25342443
+ case scan: CometScanExec => scan
2444+
+ case scan: CometNativeScanExec => scan
25352445
}
25362446
assert(scanNodes.length == 1)
25372447
assert(scanNodes.head.output.map(_.name) == Seq("a"))
2538-
@@ -120,11 +124,16 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
2448+
@@ -120,11 +123,18 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
25392449

25402450
val scanNodes = query.queryExecution.executedPlan.collect {
25412451
case scan: FileSourceScanExec => scan
25422452
+ case scan: CometScanExec => scan
2453+
+ case scan: CometNativeScanExec => scan
25432454
}
25442455
assert(scanNodes.length == 1)
25452456
// $"a" is not null and $"a" > 1
@@ -2548,21 +2459,22 @@ index 0ab8691801d..7b81f3a8f6d 100644
25482459
+ val dataFilters = scanNodes.head match {
25492460
+ case scan: FileSourceScanExec => scan.dataFilters
25502461
+ case scan: CometScanExec => scan.dataFilters
2462+
+ case scan: CometNativeScanExec => scan.dataFilters
25512463
+ }
25522464
+ assert(dataFilters.length == 2)
25532465
+ assert(dataFilters.flatMap(_.references.map(_.name)).distinct == Seq("a"))
25542466
}
25552467
}
25562468
}
2557-
@@ -145,6 +154,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
2469+
@@ -145,6 +155,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
25582470

25592471
val scanNodes = query.queryExecution.executedPlan.collect {
25602472
case scan: BatchScanExec => scan
25612473
+ case scan: CometBatchScanExec => scan
25622474
}
25632475
assert(scanNodes.length == 1)
25642476
assert(scanNodes.head.output.map(_.name) == Seq("a"))
2565-
@@ -157,6 +167,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
2477+
@@ -157,6 +168,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
25662478

25672479
val scanNodes = query.queryExecution.executedPlan.collect {
25682480
case scan: BatchScanExec => scan
@@ -3243,29 +3155,6 @@ index de3b1ffccf0..2a76d127093 100644
32433155

32443156
override def beforeEach(): Unit = {
32453157
super.beforeEach()
3246-
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
3247-
index f3be79f9022..b4b1ea8dbc4 100644
3248-
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
3249-
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
3250-
@@ -34,7 +34,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
3251-
import org.apache.hadoop.io.{LongWritable, Writable}
3252-
3253-
import org.apache.spark.{SparkException, SparkFiles, TestUtils}
3254-
-import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
3255-
+import org.apache.spark.sql.{AnalysisException, IgnoreCometNativeDataFusion, QueryTest, Row}
3256-
import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
3257-
import org.apache.spark.sql.catalyst.plans.logical.Project
3258-
import org.apache.spark.sql.execution.WholeStageCodegenExec
3259-
@@ -448,7 +448,8 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
3260-
}
3261-
}
3262-
3263-
- test("SPARK-11522 select input_file_name from non-parquet table") {
3264-
+ test("SPARK-11522 select input_file_name from non-parquet table",
3265-
+ IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3312")) {
3266-
3267-
withTempDir { tempDir =>
3268-
32693158
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
32703159
index 6160c3e5f6c..0956d7d9edc 100644
32713160
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala

0 commit comments

Comments
 (0)