apache
diff --git a/‎.github/workflows/pr_build_linux.yml‎
Lines changed: 5 additions & 0 deletions b/‎.github/workflows/pr_build_linux.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎common/src/main/spark-4.1/org/apache/comet/shims/CometTypeShim.scala‎
Lines changed: 27 additions & 0 deletions b/‎common/src/main/spark-4.1/org/apache/comet/shims/CometTypeShim.scala‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎common/src/main/spark-4.1/org/apache/comet/shims/ShimBatchReader.scala‎
Lines changed: 36 additions & 0 deletions b/‎common/src/main/spark-4.1/org/apache/comet/shims/ShimBatchReader.scala‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎common/src/main/spark-4.1/org/apache/comet/shims/ShimCometConf.scala‎
Lines changed: 24 additions & 0 deletions b/‎common/src/main/spark-4.1/org/apache/comet/shims/ShimCometConf.scala‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎common/src/main/spark-4.1/org/apache/comet/shims/ShimFileFormat.scala‎
Lines changed: 33 additions & 0 deletions b/‎common/src/main/spark-4.1/org/apache/comet/shims/ShimFileFormat.scala‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎common/src/main/spark-4.1/org/apache/spark/sql/comet/shims/ShimTaskMetrics.scala‎
Lines changed: 29 additions & 0 deletions b/‎common/src/main/spark-4.1/org/apache/spark/sql/comet/shims/ShimTaskMetrics.scala‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎pom.xml‎
Lines changed: 27 additions & 0 deletions b/‎pom.xml‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎spark/pom.xml‎
Lines changed: 13 additions & 0 deletions b/‎spark/pom.xml‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometBypassMergeSortShuffleWriter.java‎
Lines changed: 3 additions & 2 deletions b/‎spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometBypassMergeSortShuffleWriter.java‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometUnsafeShuffleWriter.java‎
Lines changed: 2 additions & 1 deletion b/‎spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometUnsafeShuffleWriter.java‎
Lines changed: 2 additions & 1 deletion
@@ -98,6 +98,11 @@ jobs:
             java_version: "17"
             maven_opts: "-Pspark-4.0"
             scan_impl: "native_comet"
+
+          - name: "Spark 4.1, JDK 17"
+            java_version: "17"
+            maven_opts: "-Pspark-4.1"
+            scan_impl: "native_comet"
         suite:
           - name: "fuzz"
             value: |
 
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
+import org.apache.spark.sql.types.DataType
+
+trait CometTypeShim {
+  def isStringCollationType(dt: DataType): Boolean = dt.isInstanceOf[StringTypeWithCollation]
+}
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+import org.apache.spark.paths.SparkPath
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+
+object ShimBatchReader {
+  def newPartitionedFile(partitionValues: InternalRow, file: String): PartitionedFile =
+    PartitionedFile(
+      partitionValues,
+      SparkPath.fromUrlString(file),
+      -1, // -1 means we read the entire file
+      -1,
+      Array.empty[String],
+      0,
+      0)
+}
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+trait ShimCometConf {
+  protected val COMET_SCHEMA_EVOLUTION_ENABLED_DEFAULT = true
+}
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.shims
+
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.datasources.parquet.ParquetRowIndexUtil
+import org.apache.spark.sql.types.StructType
+
+object ShimFileFormat {
+  // A name for a temporary column that holds row indexes computed by the file format reader
+  // until they can be placed in the _metadata struct.
+  val ROW_INDEX_TEMPORARY_COLUMN_NAME = ParquetFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME
+
+  def findRowIndexColumnIndexInSchema(sparkSchema: StructType): Int =
+    ParquetRowIndexUtil.findRowIndexColumnIndexInSchema(sparkSchema)
+}
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet.shims
+
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.util.AccumulatorV2
+
+object ShimTaskMetrics {
+
+  def getTaskAccumulator(taskMetrics: TaskMetrics): Option[AccumulatorV2[_, _]] =
+    taskMetrics._externalAccums.lastOption
+}
@@ -651,6 +651,33 @@ under the License.
       </properties>
     </profile>
 
+    <profile>
+      <!-- FIXME: this is WIP. Tests may fail https://github.com/apache/datafusion-comet/issues/551 -->
+      <id>spark-4.1</id>
+      <properties>
+        <!-- Use Scala 2.13 by default -->
+        <scala.version>2.13.17</scala.version>
+        <scala.binary.version>2.13</scala.binary.version>
+        <spark.version>4.1.0</spark.version>
+        <spark.version.short>4.1</spark.version.short>
+        <parquet.version>1.16.0</parquet.version>
+        <semanticdb.version>4.13.9</semanticdb.version>
+        <slf4j.version>2.0.17</slf4j.version>
+        <shims.majorVerSrc>spark-4.1</shims.majorVerSrc>
+        <shims.minorVerSrc>not-needed-yet</shims.minorVerSrc>
+        <!-- Use jdk17 by default -->
+        <java.version>17</java.version>
+        <maven.compiler.source>${java.version}</maven.compiler.source>
+        <maven.compiler.target>${java.version}</maven.compiler.target>
+      </properties>
+      <repositories>
+        <repository>
+          <id>apache-staging</id>
+          <url>https://repository.apache.org/content/repositories/orgapachespark-1506/</url>
+        </repository>
+      </repositories>
+    </profile>
+
     <profile>
       <id>scala-2.12</id>
     </profile>
 
@@ -217,6 +217,19 @@ under the License.
         </dependency>
       </dependencies>
     </profile>
+
+    <profile>
+      <id>spark-4.1</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.iceberg</groupId>
+          <!-- TODO: Upgrade after iceberg-spark-runtime-4.1_2.13 release -->
+          <artifactId>iceberg-spark-runtime-4.0_${scala.binary.version}</artifactId>
+          <version>1.10.0</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
   </profiles>
 
   <build>
 
@@ -172,7 +172,7 @@ public void write(Iterator<Product2<K, V>> records) throws IOException {
                 .commitAllPartitions(ShuffleChecksumHelper.EMPTY_CHECKSUM_VALUE)
                 .getPartitionLengths();
         mapStatus =
-            MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId);
+            MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId, 0);
         return;
       }
       final long openStartTime = System.nanoTime();
@@ -261,7 +261,8 @@ public void write(Iterator<Product2<K, V>> records) throws IOException {
 
       // TODO: We probably can move checksum generation here when concatenating partition files
       partitionLengths = writePartitionedData(mapOutputWriter);
-      mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId);
+      mapStatus =
+          MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId, 0);
     } catch (Exception e) {
       try {
         mapOutputWriter.abort(e);
 
@@ -288,7 +288,8 @@ void closeAndWriteOutput() throws IOException {
         }
       }
     }
-    mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId);
+    mapStatus =
+        MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId, 0);
   }
 
   @VisibleForTesting
Original file line number	Diff line number	Diff line change
`@@ -288,7 +288,8 @@ void closeAndWriteOutput() throws IOException {`
`288`	`288`	`}`
`289`	`289`	`}`
`290`	`290`	`}`
`291`		`- mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId);`
	`291`	`+ mapStatus =`
	`292`	`+ MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths, mapId, 0);`
`292`	`293`	`}`
`293`	`294`
`294`	`295`	`@VisibleForTesting`