From b8f932c507f07c07eb4194268304fe0a27168abe Mon Sep 17 00:00:00 2001
From: Mohammad Linjawi <Mohammad.Linjawi@ibm.com>
Date: Sun, 24 May 2026 09:42:05 +0300
Subject: [PATCH 1/9] [VL][Delta] Add JVM Delta DV scan handoff

---
 .../velox/VeloxDeltaMetadataUtils.scala       | 157 +++++++++
 .../sql/delta/PreprocessTableWithDVs.scala    | 228 +++++++++++++
 .../sql/delta/stats/PrepareDeltaScan.scala    |  58 ++++
 .../DeltaDeletionVectorHandoffSuite.scala     | 124 +++++++
 .../apache/spark/sql/delta/DeltaSuite.scala   |  28 ++
 .../velox/VeloxDeltaMetadataUtils.scala       | 172 ++++++++++
 .../DeltaDeletionVectorHandoffSuite.scala     | 157 +++++++++
 .../backendsapi/velox/VeloxIteratorApi.scala  | 147 +++++++-
 .../velox/VeloxSplitInfoWithPayloads.scala    |  31 ++
 cpp/core/compute/Runtime.h                    |   2 +
 cpp/core/jni/JniWrapper.cc                    |  26 ++
 cpp/velox/compute/VeloxPlanConverter.cc       |  72 +++-
 cpp/velox/compute/VeloxPlanConverter.h        |   3 +-
 cpp/velox/compute/VeloxRuntime.cc             |  11 +-
 cpp/velox/compute/VeloxRuntime.h              |   3 +
 cpp/velox/compute/WholeStageResultIterator.cc | 121 ++++++-
 cpp/velox/substrait/SubstraitToVeloxPlan.cc   |  26 +-
 cpp/velox/substrait/SubstraitToVeloxPlan.h    |   4 +
 .../vectorized/ColumnarBatchOutIterator.java  |   6 +
 .../vectorized/NativePlanEvaluator.java       |  21 +-
 .../vectorized/PlanEvaluatorJniWrapper.java   |   3 +
 .../execution/DeltaScanTransformer.scala      | 316 +++++++++++++++++-
 .../extension/DeltaPostTransformRules.scala   | 274 +++++++++------
 .../gluten/extension/OffloadDeltaScan.scala   |  54 ++-
 .../apache/gluten/execution/DeltaSuite.scala  |  44 ++-
 .../utils/DeltaDeletionVectorRegistry.scala   |  41 +++
 .../execution/GlutenFallbackReporter.scala    |  20 ++
 .../GlutenQueryExecutionListener.scala        |  13 +
 28 files changed, 2031 insertions(+), 131 deletions(-)
 create mode 100644 backends-velox/src-delta33/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
 create mode 100644 backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala
 create mode 100644 backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala
 create mode 100644 backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
 create mode 100644 backends-velox/src-delta40/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
 create mode 100644 backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
 create mode 100644 backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSplitInfoWithPayloads.scala
 create mode 100644 gluten-substrait/src/main/scala/org/apache/gluten/utils/DeltaDeletionVectorRegistry.scala

diff --git a/backends-velox/src-delta33/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala b/backends-velox/src-delta33/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
new file mode 100644
index 00000000000..699fe2d61b0
--- /dev/null
+++ b/backends-velox/src-delta33/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.backendsapi.velox
+
+import org.apache.gluten.backendsapi.velox.VeloxIteratorApi.unescapePathName
+import org.apache.gluten.sql.shims.SparkShimLoader
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor
+import org.apache.spark.sql.delta.deletionvectors.{RoaringBitmapArrayFormat, StoredBitmap}
+import org.apache.spark.sql.delta.storage.dv.HadoopFileSystemDVStore
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+
+import org.apache.hadoop.fs.Path
+
+import java.util.{ArrayList => JArrayList, HashMap => JHashMap, List => JList, Map => JMap}
+
+import scala.collection.JavaConverters._
+import scala.util.Try
+import scala.util.control.NonFatal
+
+object VeloxDeltaMetadataUtils {
+  val DeltaDvCardinality = "delta_dv_cardinality"
+  val DeltaDvPayloadIndex = "delta_dv_payload_index"
+
+  private val RowIndexFilterIdEncoded = "row_index_filter_id_encoded"
+  private val RowIndexFilterType = "row_index_filter_type"
+  private val RowIndexFilterTypeIfContained = "IF_CONTAINED"
+
+  final class NormalizedSplitMetadata(
+      val otherMetadataColumns: JList[JMap[String, Object]],
+      val deletionVectorPayloads: Array[Array[Byte]])
+    extends Serializable
+
+  private def decodeDescriptor(
+      normalizedMetadata: JMap[String, Object]): Option[DeletionVectorDescriptor] = {
+    Option(normalizedMetadata.get(RowIndexFilterIdEncoded))
+      .map(_.toString)
+      .filter(_.nonEmpty)
+      .flatMap(encoded => Try(DeletionVectorDescriptor.deserializeFromBase64(encoded)).toOption)
+  }
+
+  private def serializePayload(
+      dvStore: HadoopFileSystemDVStore,
+      tablePath: Path,
+      descriptor: DeletionVectorDescriptor): Array[Byte] = {
+    if (tablePath == null) {
+      throw new IllegalStateException(
+        "Unable to resolve Delta table path while materializing deletion vector payload")
+    }
+    StoredBitmap
+      .create(descriptor, tablePath)
+      .load(dvStore)
+      .serializeAsByteArray(RoaringBitmapArrayFormat.Portable)
+  }
+
+  private def normalizeMetadataWithDescriptor(
+      metadata: JMap[String, Object],
+      descriptor: DeletionVectorDescriptor): JMap[String, Object] = {
+    val normalized = new JHashMap[String, Object]()
+    if (metadata != null) {
+      normalized.putAll(metadata)
+    }
+    normalized.put(DeltaDvCardinality, Long.box(descriptor.cardinality))
+    normalized.remove(RowIndexFilterIdEncoded)
+    if (!normalized.containsKey(RowIndexFilterType)) {
+      normalized.put(RowIndexFilterType, RowIndexFilterTypeIfContained)
+    }
+    normalized
+  }
+
+  def normalizeSplitMetadata(
+      partitionColumnCount: Int,
+      files: JList[PartitionedFile]): NormalizedSplitMetadata = {
+    val dvStore = new HadoopFileSystemDVStore(activeSpark.sessionState.newHadoopConf())
+    val normalizedMetadataColumns = new JArrayList[JMap[String, Object]](files.size())
+    val deletionVectorPayloads = scala.collection.mutable.ArrayBuffer.empty[Array[Byte]]
+
+    files.asScala.foreach {
+      file =>
+        val otherMetadata =
+          SparkShimLoader.getSparkShims.getOtherConstantMetadataColumnValues(file)
+        val metadataWithDecodedPayload = new JHashMap[String, Object]()
+        if (otherMetadata != null) {
+          metadataWithDecodedPayload.putAll(otherMetadata)
+        }
+
+        val descriptor = decodeDescriptor(metadataWithDecodedPayload)
+
+        descriptor match {
+          case Some(descriptor) =>
+            val normalized = normalizeMetadataWithDescriptor(metadataWithDecodedPayload, descriptor)
+            val payloadTablePath = resolveTablePath(partitionColumnCount, file)
+            val serializedPayload = serializePayload(dvStore, payloadTablePath, descriptor)
+            normalized.put(DeltaDvPayloadIndex, Int.box(deletionVectorPayloads.length))
+            deletionVectorPayloads += serializedPayload
+            normalizedMetadataColumns.add(normalized)
+          case None =>
+            normalizedMetadataColumns.add(metadataWithDecodedPayload)
+        }
+    }
+
+    new NormalizedSplitMetadata(normalizedMetadataColumns, deletionVectorPayloads.toArray)
+  }
+
+  private def activeSpark: SparkSession = {
+    SparkSession.getActiveSession
+      .orElse(SparkSession.getDefaultSession)
+      .getOrElse {
+        throw new IllegalStateException(
+          "Active SparkSession is required to materialize Delta deletion vectors")
+      }
+  }
+
+  private def resolveTablePath(partitionColumnCount: Int, file: PartitionedFile): Path = {
+    val fileParent = new Path(unescapePathName(file.filePath.toString)).getParent
+    var tablePath = fileParent
+    for (_ <- 0 until partitionColumnCount) {
+      tablePath = tablePath.getParent
+    }
+    val spark = activeSpark
+    if (tablePath != null && isDeltaTablePath(spark, tablePath)) {
+      return tablePath
+    }
+
+    // Spark can report a partition column count that does not map 1:1 to path depth for
+    // prepared Delta scans. Find the nearest ancestor of the file path that has _delta_log.
+    var candidate = fileParent
+    while (candidate != null && !isDeltaTablePath(spark, candidate)) {
+      candidate = candidate.getParent
+    }
+    if (candidate != null) candidate else tablePath
+  }
+
+  private def isDeltaTablePath(spark: SparkSession, tablePath: Path): Boolean = {
+    val deltaLogPath = new Path(tablePath, "_delta_log")
+    try {
+      deltaLogPath.getFileSystem(spark.sessionState.newHadoopConf()).exists(deltaLogPath)
+    } catch {
+      case NonFatal(_) => false
+    }
+  }
+}
diff --git a/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala b/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala
new file mode 100644
index 00000000000..4c353c4a575
--- /dev/null
+++ b/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.delta
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
+import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
+import org.apache.spark.sql.delta.DeltaParquetFileFormat._
+import org.apache.spark.sql.delta.commands.DeletionVectorUtils.deletionVectorsReadable
+import org.apache.spark.sql.delta.files.{TahoeFileIndex, TahoeLogFileIndex}
+import org.apache.spark.sql.delta.sources.DeltaSQLConf
+import org.apache.spark.sql.execution.datasources.FileFormat.METADATA_NAME
+import org.apache.spark.sql.execution.datasources.HadoopFsRelation
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Rewrites Delta scans over DV-enabled tables to request the backend-specific skip-row metadata
+ * column only when the snapshot actually contains DVs.
+ */
+trait PreprocessTableWithDVs extends SubqueryTransformerHelper {
+  def preprocessTablesWithDVs(plan: LogicalPlan): LogicalPlan = {
+    transformWithSubqueries(plan) { case ScanWithDeletionVectors(dvScan) => dvScan }
+  }
+}
+
+object ScanWithDeletionVectors {
+  def unapply(a: LogicalRelation): Option[LogicalPlan] = a match {
+    case scan @ LogicalRelation(
+          relation @ HadoopFsRelation(
+            index: TahoeFileIndex,
+            _,
+            _,
+            _,
+            format: DeltaParquetFileFormat,
+            _),
+          _,
+          _,
+          _) =>
+      dvEnabledScanFor(scan, relation, format, index)
+    case scan @ LogicalRelation(
+          relation @ HadoopFsRelation(
+            index: TahoeFileIndex,
+            _,
+            _,
+            _,
+            format: GlutenDeltaParquetFileFormat,
+            _),
+          _,
+          _,
+          _) =>
+      dvEnabledScanFor(scan, relation, format, index)
+    case _ => None
+  }
+
+  def dvEnabledScanFor(
+      scan: LogicalRelation,
+      hadoopRelation: HadoopFsRelation,
+      fileFormat: DeltaParquetFileFormat,
+      index: TahoeFileIndex): Option[LogicalPlan] = {
+    if (!deletionVectorsReadable(index.protocol, index.metadata)) {
+      return None
+    }
+
+    require(
+      !index.isInstanceOf[TahoeLogFileIndex],
+      "Cannot work with a non-pinned table snapshot of the TahoeFileIndex")
+
+    if (fileFormat.hasTablePath) {
+      return None
+    }
+
+    val filesWithDVs = index
+      .matchingFiles(partitionFilters = Seq(TrueLiteral), dataFilters = Seq(TrueLiteral))
+      .filter(_.deletionVector != null)
+    if (filesWithDVs.isEmpty) {
+      return None
+    }
+
+    val planOutput = scan.output
+    val spark = SparkSession.getActiveSession.get
+    val newScan = createScanWithSkipRowColumn(spark, scan, fileFormat, index, hadoopRelation)
+    val rowIndexFilter = createRowIndexFilterNode(newScan)
+    Some(Project(planOutput, rowIndexFilter))
+  }
+
+  def dvEnabledScanFor(
+      scan: LogicalRelation,
+      hadoopRelation: HadoopFsRelation,
+      fileFormat: GlutenDeltaParquetFileFormat,
+      index: TahoeFileIndex): Option[LogicalPlan] = {
+    if (!deletionVectorsReadable(index.protocol, index.metadata)) {
+      return None
+    }
+
+    require(
+      !index.isInstanceOf[TahoeLogFileIndex],
+      "Cannot work with a non-pinned table snapshot of the TahoeFileIndex")
+
+    if (fileFormat.hasTablePath) {
+      return None
+    }
+
+    val filesWithDVs = index
+      .matchingFiles(partitionFilters = Seq(TrueLiteral), dataFilters = Seq(TrueLiteral))
+      .filter(_.deletionVector != null)
+    if (filesWithDVs.isEmpty) {
+      return None
+    }
+
+    val planOutput = scan.output
+    val spark = SparkSession.getActiveSession.get
+    val newScan = createScanWithSkipRowColumn(spark, scan, fileFormat, index, hadoopRelation)
+    val rowIndexFilter = createRowIndexFilterNode(newScan)
+    Some(Project(planOutput, rowIndexFilter))
+  }
+
+  private def addRowIndexIfMissing(attribute: AttributeReference): AttributeReference = {
+    require(attribute.name == METADATA_NAME)
+
+    val dataType = attribute.dataType.asInstanceOf[StructType]
+    if (dataType.fieldNames.contains(ParquetFileFormat.ROW_INDEX)) {
+      return attribute
+    }
+
+    val newDatatype = dataType.add(ParquetFileFormat.ROW_INDEX_FIELD)
+    attribute.copy(dataType = newDatatype)(
+      exprId = attribute.exprId,
+      qualifier = attribute.qualifier)
+  }
+
+  private def createScanWithSkipRowColumn(
+      spark: SparkSession,
+      inputScan: LogicalRelation,
+      fileFormat: DeltaParquetFileFormat,
+      tahoeFileIndex: TahoeFileIndex,
+      hadoopFsRelation: HadoopFsRelation): LogicalRelation = {
+    val useMetadataRowIndex =
+      spark.sessionState.conf.getConf(DeltaSQLConf.DELETION_VECTORS_USE_METADATA_ROW_INDEX)
+
+    val skipRowField = IS_ROW_DELETED_STRUCT_FIELD
+    val scanOutputWithMetadata = if (useMetadataRowIndex) {
+      if (inputScan.output.map(_.name).contains(METADATA_NAME)) {
+        inputScan.output.collect {
+          case a: AttributeReference if a.name == METADATA_NAME => addRowIndexIfMissing(a)
+          case o => o
+        }
+      } else {
+        inputScan.output :+ fileFormat.createFileMetadataCol()
+      }
+    } else {
+      inputScan.output
+    }
+
+    val newScanOutput =
+      scanOutputWithMetadata :+ AttributeReference(skipRowField.name, skipRowField.dataType)()
+    val newDataSchema = hadoopFsRelation.dataSchema.add(skipRowField)
+    val newFileFormat = fileFormat.copyWithDVInfo(
+      tablePath = tahoeFileIndex.path.toString,
+      optimizationsEnabled = useMetadataRowIndex)
+
+    val newRelation = hadoopFsRelation.copy(fileFormat = newFileFormat, dataSchema = newDataSchema)(
+      hadoopFsRelation.sparkSession)
+
+    inputScan.copy(relation = newRelation, output = newScanOutput)
+  }
+
+  private def createScanWithSkipRowColumn(
+      spark: SparkSession,
+      inputScan: LogicalRelation,
+      fileFormat: GlutenDeltaParquetFileFormat,
+      tahoeFileIndex: TahoeFileIndex,
+      hadoopFsRelation: HadoopFsRelation): LogicalRelation = {
+    val useMetadataRowIndex =
+      spark.sessionState.conf.getConf(DeltaSQLConf.DELETION_VECTORS_USE_METADATA_ROW_INDEX)
+
+    val skipRowField = GlutenDeltaParquetFileFormat.IS_ROW_DELETED_STRUCT_FIELD
+    val scanOutputWithMetadata = if (useMetadataRowIndex) {
+      if (inputScan.output.map(_.name).contains(METADATA_NAME)) {
+        inputScan.output.collect {
+          case a: AttributeReference if a.name == METADATA_NAME => addRowIndexIfMissing(a)
+          case o => o
+        }
+      } else {
+        inputScan.output :+ fileFormat.createFileMetadataCol()
+      }
+    } else {
+      inputScan.output
+    }
+
+    val newScanOutput =
+      scanOutputWithMetadata :+ AttributeReference(skipRowField.name, skipRowField.dataType)()
+    val newDataSchema = hadoopFsRelation.dataSchema.add(skipRowField)
+    val newFileFormat = fileFormat.copyWithDVInfo(
+      tablePath = tahoeFileIndex.path.toString,
+      optimizationsEnabled = useMetadataRowIndex)
+
+    val newRelation = hadoopFsRelation.copy(fileFormat = newFileFormat, dataSchema = newDataSchema)(
+      hadoopFsRelation.sparkSession)
+
+    inputScan.copy(relation = newRelation, output = newScanOutput)
+  }
+
+  private def createRowIndexFilterNode(newScan: LogicalRelation): Filter = {
+    val skipRowColumnRefs = newScan.output.filter(_.name == IS_ROW_DELETED_COLUMN_NAME)
+    require(
+      skipRowColumnRefs.size == 1,
+      s"Expected only one column with name=$IS_ROW_DELETED_COLUMN_NAME")
+    val skipRowColumnRef = skipRowColumnRefs.head
+    Filter(EqualTo(skipRowColumnRef, Literal(RowIndexFilter.KEEP_ROW_VALUE)), newScan)
+  }
+}
diff --git a/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala b/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala
new file mode 100644
index 00000000000..37a46147432
--- /dev/null
+++ b/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.delta.stats
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, V2WriteCommand}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.delta.{DeltaTable, OptimisticTransaction, PreprocessTableWithDVs}
+import org.apache.spark.sql.delta.sources.DeltaSQLConf
+
+/** Shadow Delta's PrepareDeltaScan to inject backend-specific DV preprocessing. */
+class PrepareDeltaScan(protected val spark: SparkSession)
+  extends Rule[LogicalPlan]
+  with PrepareDeltaScanBase
+  with PreprocessTableWithDVs {
+
+  override def apply(plan0: LogicalPlan): LogicalPlan = {
+    var plan = plan0
+
+    val isSubquery = isSubqueryRoot(plan)
+    val isDataSourceV2 = plan.isInstanceOf[V2WriteCommand]
+    if (isSubquery || isDataSourceV2) {
+      return plan
+    }
+
+    val updatedPlan = if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_STATS_SKIPPING)) {
+      if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED)) {
+        plan = optimizeQueryWithMetadata(plan)
+      }
+      prepareDeltaScan(plan)
+    } else {
+      OptimisticTransaction.getActive.foreach {
+        txn =>
+          val logsInPlan = plan.collect { case DeltaTable(fileIndex) => fileIndex.deltaLog }
+          if (logsInPlan.exists(_.isSameLogAs(txn.deltaLog))) {
+            txn.readWholeTable()
+          }
+      }
+      plan
+    }
+
+    preprocessTablesWithDVs(updatedPlan)
+  }
+}
diff --git a/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala b/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
new file mode 100644
index 00000000000..9a73b97d573
--- /dev/null
+++ b/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.delta
+
+import org.apache.gluten.backendsapi.velox.VeloxDeltaMetadataUtils
+import org.apache.gluten.backendsapi.velox.VeloxDeltaMetadataUtils.{DeltaDvCardinality, DeltaDvPayloadIndex}
+
+import org.apache.spark.paths.SparkPath
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.delta.test.{DeltaSQLCommandTest, DeltaSQLTestUtils}
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.tags.ExtendedSQLTest
+
+import org.apache.hadoop.fs.Path
+
+import scala.collection.JavaConverters._
+
+@ExtendedSQLTest
+class DeltaDeletionVectorHandoffSuite
+  extends QueryTest
+  with SharedSparkSession
+  with DeltaSQLTestUtils
+  with DeltaSQLCommandTest {
+
+  import testImplicits._
+
+  test("Spark 3.5 Delta DV handoff should materialize serialized payloads from scan metadata") {
+    withTempDir {
+      tempDir =>
+        val path = tempDir.getCanonicalPath
+        Seq((1, "a"), (2, "b"), (3, "c"), (4, "d"))
+          .toDF("id", "value")
+          .coalesce(1)
+          .write
+          .format("delta")
+          .save(path)
+
+        spark.sql(
+          s"ALTER TABLE delta.`$path` SET TBLPROPERTIES ('delta.enableDeletionVectors' = true)")
+        spark.sql(s"DELETE FROM delta.`$path` WHERE id IN (3, 4)")
+
+        val log = DeltaLog.forTable(spark, new Path(path))
+        val addFileWithDv = log.update().allFiles.collect().find(_.deletionVector != null)
+        assert(addFileWithDv.nonEmpty)
+
+        val dataFile = addFileWithDv.get
+        val basePartitionedFile = PartitionedFile(
+          partitionValues = InternalRow.empty,
+          filePath = SparkPath.fromPath(new Path(path, dataFile.path)),
+          start = 0L,
+          length = dataFile.size,
+          fileSize = dataFile.size)
+        val partitionedFile = basePartitionedFile.copy(
+          otherConstantMetadataColumnValues = Map[String, Object](
+            GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_ID_ENCODED ->
+              dataFile.deletionVector.serializeToBase64(),
+            GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_TYPE -> "IF_CONTAINED"
+          ))
+        val normalized = VeloxDeltaMetadataUtils.normalizeSplitMetadata(
+          partitionColumnCount = 0,
+          files = Seq(partitionedFile).asJava)
+        val metadata = normalized.otherMetadataColumns.get(0)
+
+        assert(normalized.deletionVectorPayloads.length == 1)
+        assert(normalized.deletionVectorPayloads.head.nonEmpty)
+        assert(metadata.get(DeltaDvPayloadIndex) == Int.box(0))
+        assert(metadata.get(DeltaDvCardinality) == Long.box(dataFile.deletionVector.cardinality))
+        assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_ID_ENCODED))
+    }
+  }
+
+  test("Spark 3.5 Delta DV handoff should skip payload materialization without scan metadata") {
+    withTempDir {
+      tempDir =>
+        val path = tempDir.getCanonicalPath
+        Seq((1, "a"), (2, "b"), (3, "c"), (4, "d"))
+          .toDF("id", "value")
+          .coalesce(1)
+          .write
+          .format("delta")
+          .save(path)
+
+        spark.sql(
+          s"ALTER TABLE delta.`$path` SET TBLPROPERTIES ('delta.enableDeletionVectors' = true)")
+        spark.sql(s"DELETE FROM delta.`$path` WHERE id IN (3, 4)")
+
+        val log = DeltaLog.forTable(spark, new Path(path))
+        val addFileWithDv = log.update().allFiles.collect().find(_.deletionVector != null)
+        assert(addFileWithDv.nonEmpty)
+
+        val dataFile = addFileWithDv.get
+        val partitionedFile = PartitionedFile(
+          partitionValues = InternalRow.empty,
+          filePath = SparkPath.fromPath(new Path(path, dataFile.path)),
+          start = 0L,
+          length = dataFile.size,
+          fileSize = dataFile.size)
+        val normalized = VeloxDeltaMetadataUtils.normalizeSplitMetadata(
+          partitionColumnCount = 0,
+          files = Seq(partitionedFile).asJava)
+        val metadata = normalized.otherMetadataColumns.get(0)
+
+        assert(normalized.deletionVectorPayloads.isEmpty)
+        assert(!metadata.containsKey(DeltaDvPayloadIndex))
+        assert(!metadata.containsKey(DeltaDvCardinality))
+    }
+  }
+}
diff --git a/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala b/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala
index f265168ddbd..2ef0a9b4acf 100644
--- a/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala
+++ b/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala
@@ -197,6 +197,34 @@ class DeltaSuite
     checkAnswer(data.toDF(), Row(1) :: Row(2) :: Row(3) :: Row(4) :: Row(5) :: Row(6) :: Nil)
   }
 
+  test("native DV scan when metadata row index is disabled") {
+    withTempDir {
+      tempDir =>
+        val path = tempDir.getCanonicalPath
+        Seq((1, "a"), (2, "b"), (3, "c"), (4, "d"))
+          .toDF("id", "value")
+          .coalesce(1)
+          .write
+          .format("delta")
+          .save(path)
+
+        spark.sql(
+          s"ALTER TABLE delta.`$path` SET TBLPROPERTIES ('delta.enableDeletionVectors' = true)")
+
+        withSQLConf(DeltaSQLConf.DELETION_VECTORS_USE_METADATA_ROW_INDEX.key -> "false") {
+          spark.sql(s"DELETE FROM delta.`$path` WHERE id IN (3, 4)")
+
+          val log = DeltaLog.forTable(spark, new Path(path))
+          assert(log.update().allFiles.collect().exists(_.deletionVector != null))
+
+          val df = spark.read.format("delta").load(path)
+          val executedPlan = df.queryExecution.executedPlan
+          assert(executedPlan.collect { case _: DeltaScanTransformer => true }.nonEmpty)
+          checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
+        }
+    }
+  }
+
   test("partitioned append - nulls") {
     val tempDir = Utils.createTempDir()
     Seq(Some(1), None)
diff --git a/backends-velox/src-delta40/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala b/backends-velox/src-delta40/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
new file mode 100644
index 00000000000..335b88e7fd7
--- /dev/null
+++ b/backends-velox/src-delta40/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.backendsapi.velox
+
+import org.apache.gluten.backendsapi.velox.VeloxIteratorApi.unescapePathName
+import org.apache.gluten.sql.shims.SparkShimLoader
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor
+import org.apache.spark.sql.delta.deletionvectors.{RoaringBitmapArrayFormat, StoredBitmap}
+import org.apache.spark.sql.delta.storage.dv.HadoopFileSystemDVStore
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+
+import org.apache.hadoop.fs.Path
+
+import java.util.{ArrayList => JArrayList, HashMap => JHashMap, List => JList, Map => JMap}
+
+import scala.collection.JavaConverters._
+import scala.util.Try
+import scala.util.control.NonFatal
+
+object VeloxDeltaMetadataUtils {
+  val DeltaDvCardinality = "delta_dv_cardinality"
+  val DeltaDvPayloadIndex = "delta_dv_payload_index"
+
+  private val RowIndexFilterIdEncoded = "row_index_filter_id_encoded"
+  private val RowIndexFilterType = "row_index_filter_type"
+  private val RowIndexFilterTypeIfContained = "IF_CONTAINED"
+
+  final class NormalizedSplitMetadata(
+      val otherMetadataColumns: JList[JMap[String, Object]],
+      val deletionVectorPayloads: Array[Array[Byte]])
+    extends Serializable
+
+  private def decodeDescriptor(
+      normalizedMetadata: JMap[String, Object]): Option[DeletionVectorDescriptor] = {
+    Option(normalizedMetadata.get(RowIndexFilterIdEncoded))
+      .map(_.toString)
+      .filter(_.nonEmpty)
+      .flatMap(parseDescriptor)
+  }
+
+  private def parseDescriptor(encodedDescriptor: String): Option[DeletionVectorDescriptor] = {
+    val methods = Seq("deserializeFromBase64", "fromJson")
+    methods.iterator
+      .map {
+        methodName =>
+          Try {
+            val method = DeletionVectorDescriptor.getClass.getMethod(methodName, classOf[String])
+            method
+              .invoke(DeletionVectorDescriptor, encodedDescriptor)
+              .asInstanceOf[DeletionVectorDescriptor]
+          }.toOption
+      }
+      .collectFirst { case Some(descriptor) => descriptor }
+  }
+
+  private def serializePayload(
+      dvStore: HadoopFileSystemDVStore,
+      tablePath: Path,
+      descriptor: DeletionVectorDescriptor): Array[Byte] = {
+    if (tablePath == null) {
+      throw new IllegalStateException(
+        "Unable to resolve Delta table path while materializing deletion vector payload")
+    }
+    StoredBitmap
+      .create(descriptor, tablePath)
+      .load(dvStore)
+      .serializeAsByteArray(RoaringBitmapArrayFormat.Portable)
+  }
+
+  private def normalizeMetadataWithDescriptor(
+      metadata: JMap[String, Object],
+      descriptor: DeletionVectorDescriptor): JMap[String, Object] = {
+    val normalized = new JHashMap[String, Object]()
+    if (metadata != null) {
+      normalized.putAll(metadata)
+    }
+    normalized.put(DeltaDvCardinality, Long.box(descriptor.cardinality))
+    normalized.remove(RowIndexFilterIdEncoded)
+    if (!normalized.containsKey(RowIndexFilterType)) {
+      normalized.put(RowIndexFilterType, RowIndexFilterTypeIfContained)
+    }
+    normalized
+  }
+
+  def normalizeSplitMetadata(
+      partitionColumnCount: Int,
+      files: JList[PartitionedFile]): NormalizedSplitMetadata = {
+    val dvStore = new HadoopFileSystemDVStore(activeSpark.sessionState.newHadoopConf())
+    val normalizedMetadataColumns = new JArrayList[JMap[String, Object]](files.size())
+    val deletionVectorPayloads = scala.collection.mutable.ArrayBuffer.empty[Array[Byte]]
+
+    files.asScala.foreach {
+      file =>
+        val otherMetadata =
+          SparkShimLoader.getSparkShims.getOtherConstantMetadataColumnValues(file)
+        val metadataWithDecodedPayload = new JHashMap[String, Object]()
+        if (otherMetadata != null) {
+          metadataWithDecodedPayload.putAll(otherMetadata)
+        }
+
+        val descriptor = decodeDescriptor(metadataWithDecodedPayload)
+
+        descriptor match {
+          case Some(descriptor) =>
+            val normalized = normalizeMetadataWithDescriptor(metadataWithDecodedPayload, descriptor)
+            val payloadTablePath = resolveTablePath(partitionColumnCount, file)
+            val serializedPayload = serializePayload(dvStore, payloadTablePath, descriptor)
+            normalized.put(DeltaDvPayloadIndex, Int.box(deletionVectorPayloads.length))
+            deletionVectorPayloads += serializedPayload
+            normalizedMetadataColumns.add(normalized)
+          case None =>
+            normalizedMetadataColumns.add(metadataWithDecodedPayload)
+        }
+    }
+
+    new NormalizedSplitMetadata(normalizedMetadataColumns, deletionVectorPayloads.toArray)
+  }
+
+  private def activeSpark: SparkSession = {
+    SparkSession.getActiveSession
+      .orElse(SparkSession.getDefaultSession)
+      .getOrElse {
+        throw new IllegalStateException(
+          "Active SparkSession is required to materialize Delta deletion vectors")
+      }
+  }
+
+  private def resolveTablePath(partitionColumnCount: Int, file: PartitionedFile): Path = {
+    val fileParent = new Path(unescapePathName(file.filePath.toString)).getParent
+    var tablePath = fileParent
+    for (_ <- 0 until partitionColumnCount) {
+      tablePath = tablePath.getParent
+    }
+    val spark = activeSpark
+    if (tablePath != null && isDeltaTablePath(spark, tablePath)) {
+      return tablePath
+    }
+
+    // Spark can report a partition column count that does not map 1:1 to path depth for
+    // prepared Delta scans. Find the nearest ancestor of the file path that has _delta_log.
+    var candidate = fileParent
+    while (candidate != null && !isDeltaTablePath(spark, candidate)) {
+      candidate = candidate.getParent
+    }
+    if (candidate != null) candidate else tablePath
+  }
+
+  private def isDeltaTablePath(spark: SparkSession, tablePath: Path): Boolean = {
+    val deltaLogPath = new Path(tablePath, "_delta_log")
+    try {
+      deltaLogPath.getFileSystem(spark.sessionState.newHadoopConf()).exists(deltaLogPath)
+    } catch {
+      case NonFatal(_) => false
+    }
+  }
+}
diff --git a/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala b/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
new file mode 100644
index 00000000000..46f87765c17
--- /dev/null
+++ b/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.delta
+
+import org.apache.gluten.backendsapi.velox.VeloxDeltaMetadataUtils
+import org.apache.gluten.backendsapi.velox.VeloxDeltaMetadataUtils.{DeltaDvCardinality, DeltaDvPayloadIndex}
+import org.apache.gluten.execution.DeltaScanTransformer
+
+import org.apache.spark.paths.SparkPath
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.delta.sources.DeltaSQLConf
+import org.apache.spark.sql.delta.test.{DeltaSQLCommandTest, DeltaSQLTestUtils}
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.tags.ExtendedSQLTest
+
+import org.apache.hadoop.fs.Path
+
+import scala.collection.JavaConverters._
+
+@ExtendedSQLTest
+class DeltaDeletionVectorHandoffSuite
+  extends QueryTest
+  with SharedSparkSession
+  with DeltaSQLTestUtils
+  with DeltaSQLCommandTest {
+
+  import testImplicits._
+
+  test("Spark 4 Delta DV scan should stay native when metadata row index is disabled") {
+    withTempDir {
+      tempDir =>
+        val path = tempDir.getCanonicalPath
+        Seq((1, "a"), (2, "b"), (3, "c"), (4, "d"))
+          .toDF("id", "value")
+          .coalesce(1)
+          .write
+          .format("delta")
+          .save(path)
+
+        spark.sql(
+          s"ALTER TABLE delta.`$path` SET TBLPROPERTIES ('delta.enableDeletionVectors' = true)")
+
+        withSQLConf(DeltaSQLConf.DELETION_VECTORS_USE_METADATA_ROW_INDEX.key -> "false") {
+          spark.sql(s"DELETE FROM delta.`$path` WHERE id IN (3, 4)")
+
+          val log = DeltaLog.forTable(spark, new Path(path))
+          assert(log.update().allFiles.collect().exists(_.deletionVector != null))
+
+          val df = spark.read.format("delta").load(path)
+          val executedPlan = df.queryExecution.executedPlan
+          assert(executedPlan.collect { case _: DeltaScanTransformer => true }.nonEmpty)
+          checkAnswer(df, Seq((1, "a"), (2, "b")).toDF())
+        }
+    }
+  }
+
+  test("Spark 4 Delta DV handoff should materialize serialized payloads from scan metadata") {
+    withTempDir {
+      tempDir =>
+        val path = tempDir.getCanonicalPath
+        Seq((1, "a"), (2, "b"), (3, "c"), (4, "d"))
+          .toDF("id", "value")
+          .coalesce(1)
+          .write
+          .format("delta")
+          .save(path)
+
+        spark.sql(
+          s"ALTER TABLE delta.`$path` SET TBLPROPERTIES ('delta.enableDeletionVectors' = true)")
+        spark.sql(s"DELETE FROM delta.`$path` WHERE id IN (3, 4)")
+
+        val log = DeltaLog.forTable(spark, new Path(path))
+        val addFileWithDv = log.update().allFiles.collect().find(_.deletionVector != null)
+        assert(addFileWithDv.nonEmpty)
+
+        val dataFile = addFileWithDv.get
+        val basePartitionedFile = PartitionedFile(
+          partitionValues = InternalRow.empty,
+          filePath = SparkPath.fromPath(new Path(path, dataFile.path)),
+          start = 0L,
+          length = dataFile.size,
+          fileSize = dataFile.size)
+        val partitionedFile = basePartitionedFile.copy(
+          otherConstantMetadataColumnValues = Map[String, Object](
+            GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_ID_ENCODED ->
+              dataFile.deletionVector.serializeToBase64(),
+            GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_TYPE -> "IF_CONTAINED"
+          ))
+        val normalized = VeloxDeltaMetadataUtils.normalizeSplitMetadata(
+          partitionColumnCount = 0,
+          files = Seq(partitionedFile).asJava)
+        val metadata = normalized.otherMetadataColumns.get(0)
+
+        assert(normalized.deletionVectorPayloads.length == 1)
+        assert(normalized.deletionVectorPayloads.head.nonEmpty)
+        assert(metadata.get(DeltaDvPayloadIndex) == Int.box(0))
+        assert(metadata.get(DeltaDvCardinality) == Long.box(dataFile.deletionVector.cardinality))
+        assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_ID_ENCODED))
+
+        val df = spark.read.format("delta").load(path)
+        checkAnswer(df, Seq((1, "a"), (2, "b")).toDF())
+    }
+  }
+
+  test("Spark 4 Delta DV handoff should skip payload materialization without scan metadata") {
+    withTempDir {
+      tempDir =>
+        val path = tempDir.getCanonicalPath
+        Seq((1, "a"), (2, "b"), (3, "c"), (4, "d"))
+          .toDF("id", "value")
+          .coalesce(1)
+          .write
+          .format("delta")
+          .save(path)
+
+        spark.sql(
+          s"ALTER TABLE delta.`$path` SET TBLPROPERTIES ('delta.enableDeletionVectors' = true)")
+        spark.sql(s"DELETE FROM delta.`$path` WHERE id IN (3, 4)")
+
+        val log = DeltaLog.forTable(spark, new Path(path))
+        val addFileWithDv = log.update().allFiles.collect().find(_.deletionVector != null)
+        assert(addFileWithDv.nonEmpty)
+
+        val dataFile = addFileWithDv.get
+        val partitionedFile = PartitionedFile(
+          partitionValues = InternalRow.empty,
+          filePath = SparkPath.fromPath(new Path(path, dataFile.path)),
+          start = 0L,
+          length = dataFile.size,
+          fileSize = dataFile.size)
+        val normalized = VeloxDeltaMetadataUtils.normalizeSplitMetadata(
+          partitionColumnCount = 0,
+          files = Seq(partitionedFile).asJava)
+        val metadata = normalized.otherMetadataColumns.get(0)
+
+        assert(normalized.deletionVectorPayloads.isEmpty)
+        assert(!metadata.containsKey(DeltaDvPayloadIndex))
+        assert(!metadata.containsKey(DeltaDvCardinality))
+    }
+  }
+}
diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
index d8b23b358fa..719bb8758cd 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
@@ -26,6 +26,7 @@ import org.apache.gluten.sql.shims.SparkShimLoader
 import org.apache.gluten.substrait.plan.PlanNode
 import org.apache.gluten.substrait.rel.{LocalFilesBuilder, LocalFilesNode, SplitInfo}
 import org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat
+import org.apache.gluten.utils.DeltaDeletionVectorRegistry
 import org.apache.gluten.vectorized._
 
 import org.apache.spark.{Partition, SparkConf, TaskContext}
@@ -40,15 +41,21 @@ import org.apache.spark.sql.utils.SparkInputMetricsUtil.InputMetricsWrapper
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.SparkDirectoryUtil
 
+import org.apache.hadoop.fs.Path
+
 import java.lang.{Long => JLong}
+import java.nio.ByteBuffer
 import java.nio.charset.StandardCharsets
 import java.time.ZoneOffset
-import java.util.UUID
+import java.util.{ArrayList => JArrayList, HashMap => JHashMap, UUID}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
+import scala.util.Try
 
 class VeloxIteratorApi extends IteratorApi with Logging {
+  private val deltaMetadataUtilsClassName =
+    "org.apache.gluten.backendsapi.velox.VeloxDeltaMetadataUtils$"
 
   private def setFileSchemaForLocalFiles(
       localFilesNode: LocalFilesNode,
@@ -94,10 +101,18 @@ class VeloxIteratorApi extends IteratorApi with Logging {
     val metadataColumns = partitionFiles
       .map(
         f => SparkShimLoader.getSparkShims.generateMetadataColumns(f, metadataColumnNames).asJava)
-    val otherMetadataColumns = partitionFiles
-      .map(f => SparkShimLoader.getSparkShims.getOtherConstantMetadataColumnValues(f))
+    val (otherMetadataColumns, deletionVectorPayloads) =
+      normalizeRegisteredDeltaSplitMetadata(partitionFiles, properties)
+        .orElse(normalizeDeltaSplitMetadata(partitionSchema.fields.length, partitionFiles))
+        .getOrElse {
+          (
+            partitionFiles.map {
+              f => SparkShimLoader.getSparkShims.getOtherConstantMetadataColumnValues(f)
+            },
+            Array.empty[Array[Byte]])
+        }
 
-    setFileSchemaForLocalFiles(
+    val localFiles = setFileSchemaForLocalFiles(
       LocalFilesBuilder.makeLocalFiles(
         partitionIndex,
         paths.asJava,
@@ -115,6 +130,12 @@ class VeloxIteratorApi extends IteratorApi with Logging {
       dataSchema,
       fileFormat
     )
+
+    if (deletionVectorPayloads.nonEmpty) {
+      VeloxSplitInfoWithPayloads(localFiles, deletionVectorPayloads)
+    } else {
+      localFiles
+    }
   }
 
   /** Generate native row partition. */
@@ -179,6 +200,120 @@ class VeloxIteratorApi extends IteratorApi with Logging {
     NativePlanEvaluator.injectWriteFilesTempPath(path, fileName)
   }
 
+  private def buildSplitPayloadBuffers(splitInfos: Array[SplitInfo]): Array[Array[ByteBuffer]] = {
+    val payloadBuffers = splitInfos.map {
+      case splitInfoWithPayloads: VeloxSplitInfoWithPayloads
+          if splitInfoWithPayloads.deletionVectorPayloads.nonEmpty =>
+        splitInfoWithPayloads.deletionVectorPayloads.map(toDirectByteBuffer)
+      case _ =>
+        null
+    }
+    if (payloadBuffers.exists(_ != null)) payloadBuffers else null
+  }
+
+  private def toDirectByteBuffer(bytes: Array[Byte]): ByteBuffer = {
+    val directBuffer = ByteBuffer.allocateDirect(bytes.length)
+    directBuffer.put(bytes)
+    directBuffer.flip()
+    directBuffer
+  }
+
+  private def normalizeDeltaSplitMetadata(
+      partitionColumnCount: Int,
+      partitionFiles: Seq[PartitionedFile])
+      : Option[(Seq[java.util.Map[String, Object]], Array[Array[Byte]])] = {
+    try {
+      // scalastyle:off classforname
+      val moduleClass = Class.forName(deltaMetadataUtilsClassName)
+      // scalastyle:on classforname
+      val module = moduleClass.getField("MODULE$").get(null)
+      val normalizeMethod =
+        moduleClass.getMethod("normalizeSplitMetadata", classOf[Int], classOf[java.util.List[_]])
+      val normalized =
+        normalizeMethod.invoke(module, Int.box(partitionColumnCount), partitionFiles.asJava)
+      val metadataMethod = normalized.getClass.getMethod("otherMetadataColumns")
+      val payloadsMethod = normalized.getClass.getMethod("deletionVectorPayloads")
+      Some(
+        metadataMethod
+          .invoke(normalized)
+          .asInstanceOf[java.util.List[java.util.Map[String, Object]]]
+          .asScala
+          .toSeq,
+        payloadsMethod.invoke(normalized).asInstanceOf[Array[Array[Byte]]]
+      )
+    } catch {
+      case _: ClassNotFoundException | _: NoSuchMethodException =>
+        None
+    }
+  }
+
+  private def normalizeRegisteredDeltaSplitMetadata(
+      partitionFiles: Seq[PartitionedFile],
+      properties: Map[String, String])
+      : Option[(Seq[java.util.Map[String, Object]], Array[Array[Byte]])] = {
+    properties
+      .get(DeltaDeletionVectorRegistry.RegistryIdProperty)
+      .flatMap(DeltaDeletionVectorRegistry.get)
+      .flatMap {
+        registeredEntries =>
+          val normalizedMetadataColumns = new JArrayList[java.util.Map[String, Object]]()
+          val deletionVectorPayloads = mutable.ArrayBuffer.empty[Array[Byte]]
+          var matchedDeletionVectors = 0
+          partitionFiles.foreach {
+            file =>
+              val metadata = new JHashMap[String, Object]()
+              val baseMetadata =
+                SparkShimLoader.getSparkShims.getOtherConstantMetadataColumnValues(file)
+              if (baseMetadata != null) {
+                metadata.putAll(baseMetadata)
+              }
+              lookupRegisteredDeltaDeletionVector(file, registeredEntries).foreach {
+                entry =>
+                  metadata.put("delta_dv_cardinality", Long.box(entry.cardinality))
+                  metadata.put("row_index_filter_type", entry.filterType)
+                  metadata.put("delta_dv_payload_index", Int.box(deletionVectorPayloads.length))
+                  deletionVectorPayloads += entry.payload
+                  matchedDeletionVectors += 1
+              }
+              normalizedMetadataColumns.add(metadata)
+          }
+          if (matchedDeletionVectors == 0) {
+            None
+          } else {
+            Some((normalizedMetadataColumns.asScala.toSeq, deletionVectorPayloads.toArray))
+          }
+      }
+  }
+
+  private def lookupRegisteredDeltaDeletionVector(
+      file: PartitionedFile,
+      registeredEntries: Map[String, DeltaDeletionVectorRegistry.Entry])
+      : Option[DeltaDeletionVectorRegistry.Entry] = {
+    deltaDeletionVectorPathCandidates(file).iterator
+      .map(registeredEntries.get)
+      .collectFirst { case Some(entry) => entry }
+  }
+
+  private def deltaDeletionVectorPathCandidates(file: PartitionedFile): Seq[String] = {
+    val rawPath = unescapePathName(file.filePath.toString)
+    val path = new Path(rawPath)
+    val pathUri = partitionedFilePathUri(file)
+    Seq(
+      pathUri.map(_.toASCIIString),
+      pathUri.map(_.getPath),
+      Some(rawPath),
+      Some(path.toUri.toASCIIString),
+      Some(path.toUri.getPath),
+      Some(rawPath.stripPrefix("/"))
+    ).flatten
+      .map(DeltaDeletionVectorRegistry.normalizePathKey(_))
+      .filter(_.nonEmpty)
+      .distinct
+  }
+
+  private def partitionedFilePathUri(file: PartitionedFile): Option[java.net.URI] =
+    Try(file.getClass.getMethod("pathUri").invoke(file).asInstanceOf[java.net.URI]).toOption
+
   /** Generate Iterator[ColumnarBatch] for first stage. */
   override def genFirstStageIterator(
       inputPartition: BaseGlutenPartition,
@@ -205,6 +340,8 @@ class VeloxIteratorApi extends IteratorApi with Logging {
       .splitInfos
       .map(splitInfo => splitInfo.toProtobuf.toByteArray)
       .toArray
+    val splitPayloadBuffers =
+      buildSplitPayloadBuffers(inputPartition.asInstanceOf[GlutenPartition].splitInfos)
     val spillDirPath = SparkDirectoryUtil
       .get()
       .namespace("gluten-spill")
@@ -214,6 +351,7 @@ class VeloxIteratorApi extends IteratorApi with Logging {
       transKernel.createKernelWithBatchIterator(
         inputPartition.plan,
         if (splitInfoByteArray.nonEmpty) splitInfoByteArray else null,
+        splitPayloadBuffers,
         if (columnarNativeIterators.nonEmpty) columnarNativeIterators.toArray else null,
         partitionIndex,
         BackendsApiManager.getSparkPlanExecApiInstance.rewriteSpillPath(spillDirPath)
@@ -268,6 +406,7 @@ class VeloxIteratorApi extends IteratorApi with Logging {
       transKernel.createKernelWithBatchIterator(
         rootNode.toProtobuf.toByteArray,
         null,
+        null,
         if (columnarNativeIterator.nonEmpty) columnarNativeIterator.toArray else null,
         partitionIndex,
         BackendsApiManager.getSparkPlanExecApiInstance.rewriteSpillPath(spillDirPath)
diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSplitInfoWithPayloads.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSplitInfoWithPayloads.scala
new file mode 100644
index 00000000000..c34fd89da7d
--- /dev/null
+++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSplitInfoWithPayloads.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.backendsapi.velox
+
+import org.apache.gluten.substrait.rel.SplitInfo
+
+import com.google.protobuf.Message
+
+final case class VeloxSplitInfoWithPayloads(
+    delegate: SplitInfo,
+    deletionVectorPayloads: Array[Array[Byte]])
+  extends SplitInfo {
+
+  override def preferredLocations(): java.util.List[String] = delegate.preferredLocations()
+
+  override def toProtobuf(): Message = delegate.toProtobuf()
+}
diff --git a/cpp/core/compute/Runtime.h b/cpp/core/compute/Runtime.h
index 4ab944898bd..b8d6fd3e18a 100644
--- a/cpp/core/compute/Runtime.h
+++ b/cpp/core/compute/Runtime.h
@@ -97,6 +97,8 @@ class Runtime : public std::enable_shared_from_this<Runtime> {
     throw GlutenException("Not implemented");
   }
 
+  virtual void setSplitPayloads(int32_t idx, std::vector<SplitPayloadBufferView> payloads) {}
+
   virtual std::string planString(bool details, const std::unordered_map<std::string, std::string>& sessionConf) {
     throw GlutenException("Not implemented");
   }
diff --git a/cpp/core/jni/JniWrapper.cc b/cpp/core/jni/JniWrapper.cc
index 46b9d7603ce..2136d11a654 100644
--- a/cpp/core/jni/JniWrapper.cc
+++ b/cpp/core/jni/JniWrapper.cc
@@ -19,6 +19,7 @@
 #include <algorithm>
 #include <cstdint>
 #include <filesystem>
+#include <limits>
 
 #include "compute/Runtime.h"
 #include "config/GlutenConfig.h"
@@ -463,6 +464,7 @@ Java_org_apache_gluten_vectorized_PlanEvaluatorJniWrapper_nativeCreateKernelWith
     jobject wrapper,
     jbyteArray planArr,
     jobjectArray splitInfosArr,
+    jobjectArray splitPayloadsArr,
     jobjectArray batchItrArray,
     jint stageId,
     jint partitionId,
@@ -493,6 +495,30 @@ Java_org_apache_gluten_vectorized_PlanEvaluatorJniWrapper_nativeCreateKernelWith
       auto safeSplitArray = getByteArrayElementsSafe(env, splitInfoArray);
       auto splitInfoData = safeSplitArray.elems();
 
+      if (splitPayloadsArr != nullptr) {
+        jobjectArray splitPayloadArray = static_cast<jobjectArray>(env->GetObjectArrayElement(splitPayloadsArr, i));
+        if (splitPayloadArray != nullptr) {
+          std::vector<SplitPayloadBufferView> splitPayloads;
+          splitPayloads.reserve(env->GetArrayLength(splitPayloadArray));
+          for (jsize payloadIndex = 0, payloadCount = env->GetArrayLength(splitPayloadArray);
+               payloadIndex < payloadCount;
+               ++payloadIndex) {
+            jobject payloadBuffer = env->GetObjectArrayElement(splitPayloadArray, payloadIndex);
+            GLUTEN_CHECK(payloadBuffer != nullptr, "Split payload buffer must not be null");
+            auto* payloadData = reinterpret_cast<const uint8_t*>(env->GetDirectBufferAddress(payloadBuffer));
+            const auto payloadCapacity = env->GetDirectBufferCapacity(payloadBuffer);
+            GLUTEN_CHECK(payloadData != nullptr, "Split payload buffer must be a direct ByteBuffer");
+            GLUTEN_CHECK(
+                payloadCapacity >= 0 && payloadCapacity <= std::numeric_limits<int32_t>::max(),
+                "Split payload buffer capacity must fit int32_t");
+            splitPayloads.push_back({payloadData, static_cast<int32_t>(payloadCapacity)});
+            env->DeleteLocalRef(payloadBuffer);
+          }
+          ctx->setSplitPayloads(i, std::move(splitPayloads));
+          env->DeleteLocalRef(splitPayloadArray);
+        }
+      }
+
       ctx->parseSplitInfo(splitInfoData, splitInfoSize, i);
     }
   }
diff --git a/cpp/velox/compute/VeloxPlanConverter.cc b/cpp/velox/compute/VeloxPlanConverter.cc
index f3ffab59a6a..fe442046908 100644
--- a/cpp/velox/compute/VeloxPlanConverter.cc
+++ b/cpp/velox/compute/VeloxPlanConverter.cc
@@ -17,7 +17,10 @@
 
 #include "VeloxPlanConverter.h"
 #include <filesystem>
+#include <optional>
 
+#include <google/protobuf/any.pb.h>
+#include <google/protobuf/wrappers.pb.h>
 #include "config/GlutenConfig.h"
 #include "iceberg/IcebergPlanConverter.h"
 #include "operators/plannodes/IteratorSplit.h"
@@ -48,9 +51,41 @@ VeloxPlanConverter::VeloxPlanConverter(
 }
 
 namespace {
+const std::string kDeltaDvPayloadIndex = "delta_dv_payload_index";
+
+std::optional<std::string> unpackMetadataValue(const google::protobuf::Any& value) {
+  google::protobuf::BytesValue bytesValue;
+  if (value.UnpackTo(&bytesValue)) {
+    return bytesValue.value();
+  }
+
+  google::protobuf::StringValue stringValue;
+  if (value.UnpackTo(&stringValue)) {
+    return stringValue.value();
+  }
+
+  google::protobuf::Int32Value int32Value;
+  if (value.UnpackTo(&int32Value)) {
+    return std::to_string(int32Value.value());
+  }
+
+  google::protobuf::Int64Value int64Value;
+  if (value.UnpackTo(&int64Value)) {
+    return std::to_string(int64Value.value());
+  }
+
+  google::protobuf::DoubleValue doubleValue;
+  if (value.UnpackTo(&doubleValue)) {
+    return std::to_string(doubleValue.value());
+  }
+
+  return std::nullopt;
+}
+
 std::shared_ptr<SplitInfo> parseScanSplitInfo(
     const facebook::velox::config::ConfigBase* veloxCfg,
-    const google::protobuf::RepeatedPtrField<substrait::ReadRel_LocalFiles_FileOrFiles>& fileList) {
+    const google::protobuf::RepeatedPtrField<substrait::ReadRel_LocalFiles_FileOrFiles>& fileList,
+    const std::vector<SplitPayloadBufferView>* splitPayloads) {
   using SubstraitFileFormatCase = ::substrait::ReadRel_LocalFiles_FileOrFiles::FileFormatCase;
 
   auto splitInfo = std::make_shared<SplitInfo>();
@@ -61,6 +96,7 @@ std::shared_ptr<SplitInfo> parseScanSplitInfo(
   splitInfo->partitionColumns.reserve(fileList.size());
   splitInfo->properties.reserve(fileList.size());
   splitInfo->metadataColumns.reserve(fileList.size());
+  splitInfo->deletionVectorPayloads.reserve(fileList.size());
   for (const auto& file : fileList) {
     // Expect all Partitions share the same index.
     splitInfo->partitionIndex = file.partition_index();
@@ -75,6 +111,25 @@ std::shared_ptr<SplitInfo> parseScanSplitInfo(
     for (const auto& metadataColumn : file.metadata_columns()) {
       metadataColumnMap[metadataColumn.key()] = metadataColumn.value();
     }
+    for (const auto& otherMetadataColumn : file.other_const_metadata_columns()) {
+      if (auto unpackedValue = unpackMetadataValue(otherMetadataColumn.value())) {
+        metadataColumnMap[otherMetadataColumn.key()] = std::move(*unpackedValue);
+      }
+    }
+    if (auto payloadIndexIt = metadataColumnMap.find(kDeltaDvPayloadIndex); payloadIndexIt != metadataColumnMap.end()) {
+      VELOX_USER_CHECK_NOT_NULL(splitPayloads, "Split payload index found without an external payload buffer");
+      const auto payloadIndex = static_cast<size_t>(std::stoul(payloadIndexIt->second));
+      VELOX_USER_CHECK_LT(
+          payloadIndex,
+          splitPayloads->size(),
+          "Split payload index {} is out of range for {} payload buffers",
+          payloadIndex,
+          splitPayloads->size());
+      splitInfo->deletionVectorPayloads.emplace_back(splitPayloads->at(payloadIndex));
+      metadataColumnMap.erase(payloadIndexIt);
+    } else {
+      splitInfo->deletionVectorPayloads.emplace_back(std::nullopt);
+    }
     splitInfo->metadataColumns.emplace_back(metadataColumnMap);
 
     splitInfo->paths.emplace_back(file.uri_file());
@@ -138,12 +193,16 @@ std::shared_ptr<SplitInfo> parseScanSplitInfo(
 void parseLocalFileNodes(
     SubstraitToVeloxPlanConverter* planConverter,
     const facebook::velox::config::ConfigBase* veloxCfg,
-    std::vector<::substrait::ReadRel_LocalFiles>& localFiles) {
+    std::vector<::substrait::ReadRel_LocalFiles>& localFiles,
+    const std::unordered_map<int32_t, std::vector<SplitPayloadBufferView>>& splitPayloads) {
   std::vector<std::shared_ptr<SplitInfo>> splitInfos;
   splitInfos.reserve(localFiles.size());
-  for (const auto& localFile : localFiles) {
+  for (size_t splitIndex = 0; splitIndex < localFiles.size(); ++splitIndex) {
+    const auto& localFile = localFiles[splitIndex];
     const auto& fileList = localFile.items();
-    splitInfos.push_back(parseScanSplitInfo(veloxCfg, fileList));
+    auto payloadIt = splitPayloads.find(splitIndex);
+    splitInfos.push_back(
+        parseScanSplitInfo(veloxCfg, fileList, payloadIt == splitPayloads.end() ? nullptr : &payloadIt->second));
   }
 
   planConverter->setSplitInfos(std::move(splitInfos));
@@ -152,9 +211,10 @@ void parseLocalFileNodes(
 
 std::shared_ptr<const facebook::velox::core::PlanNode> VeloxPlanConverter::toVeloxPlan(
     const ::substrait::Plan& substraitPlan,
-    std::vector<::substrait::ReadRel_LocalFiles> localFiles) {
+    std::vector<::substrait::ReadRel_LocalFiles> localFiles,
+    const std::unordered_map<int32_t, std::vector<SplitPayloadBufferView>>& splitPayloads) {
   if (!validationMode_) {
-    parseLocalFileNodes(&substraitVeloxPlanConverter_, veloxCfg_, localFiles);
+    parseLocalFileNodes(&substraitVeloxPlanConverter_, veloxCfg_, localFiles, splitPayloads);
   }
 
   return substraitVeloxPlanConverter_.toVeloxPlan(substraitPlan);
diff --git a/cpp/velox/compute/VeloxPlanConverter.h b/cpp/velox/compute/VeloxPlanConverter.h
index 1aee2c36bd1..fa1ec0f9e04 100644
--- a/cpp/velox/compute/VeloxPlanConverter.h
+++ b/cpp/velox/compute/VeloxPlanConverter.h
@@ -41,7 +41,8 @@ class VeloxPlanConverter {
 
   std::shared_ptr<const facebook::velox::core::PlanNode> toVeloxPlan(
       const ::substrait::Plan& substraitPlan,
-      std::vector<::substrait::ReadRel_LocalFiles> localFiles);
+      std::vector<::substrait::ReadRel_LocalFiles> localFiles,
+      const std::unordered_map<int32_t, std::vector<SplitPayloadBufferView>>& splitPayloads = {});
 
   const std::unordered_map<facebook::velox::core::PlanNodeId, std::shared_ptr<SplitInfo>>& splitInfos() {
     return substraitVeloxPlanConverter_.splitInfos();
diff --git a/cpp/velox/compute/VeloxRuntime.cc b/cpp/velox/compute/VeloxRuntime.cc
index 62e6820e9c3..ca51e6f35ba 100644
--- a/cpp/velox/compute/VeloxRuntime.cc
+++ b/cpp/velox/compute/VeloxRuntime.cc
@@ -363,6 +363,13 @@ void VeloxRuntime::parseSplitInfo(const uint8_t* data, int32_t size, int32_t spl
   localFiles_.push_back(localFile);
 }
 
+void VeloxRuntime::setSplitPayloads(int32_t splitIndex, std::vector<SplitPayloadBufferView> payloads) {
+  if (payloads.empty()) {
+    return;
+  }
+  splitPayloads_[splitIndex] = std::move(payloads);
+}
+
 void VeloxRuntime::getInfoAndIds(
     const std::unordered_map<velox::core::PlanNodeId, std::shared_ptr<SplitInfo>>& splitInfoMap,
     const std::unordered_set<velox::core::PlanNodeId>& leafPlanNodeIds,
@@ -398,7 +405,7 @@ std::string VeloxRuntime::planString(bool details, const std::unordered_map<std:
   auto veloxMemoryPool = gluten::defaultLeafVeloxMemoryPool();
   VeloxPlanConverter veloxPlanConverter(
       veloxMemoryPool.get(), veloxCfg_.get(), {}, connectorIds_, std::nullopt, std::nullopt, true);
-  auto veloxPlan = veloxPlanConverter.toVeloxPlan(substraitPlan_, localFiles_);
+  auto veloxPlan = veloxPlanConverter.toVeloxPlan(substraitPlan_, localFiles_, splitPayloads_);
   return veloxPlan->toString(details, true);
 }
 
@@ -420,7 +427,7 @@ std::shared_ptr<ResultIterator> VeloxRuntime::createResultIterator(
       connectorIds_,
       *localWriteFilesTempPath(),
       *localWriteFileName());
-  veloxPlan_ = veloxPlanConverter.toVeloxPlan(substraitPlan_, std::move(localFiles_));
+  veloxPlan_ = veloxPlanConverter.toVeloxPlan(substraitPlan_, std::move(localFiles_), splitPayloads_);
   LOG_IF(INFO, debugModeEnabled_ && taskInfo_.has_value())
       << "############### Velox plan for task " << taskInfo_.value() << " ###############" << std::endl
       << veloxPlan_->toString(true, true);
diff --git a/cpp/velox/compute/VeloxRuntime.h b/cpp/velox/compute/VeloxRuntime.h
index 37f4da33439..cfba81db92e 100644
--- a/cpp/velox/compute/VeloxRuntime.h
+++ b/cpp/velox/compute/VeloxRuntime.h
@@ -56,6 +56,8 @@ class VeloxRuntime final : public Runtime {
 
   void parseSplitInfo(const uint8_t* data, int32_t size, int32_t splitIndex) override;
 
+  void setSplitPayloads(int32_t splitIndex, std::vector<SplitPayloadBufferView> payloads) override;
+
   VeloxMemoryManager* memoryManager() override;
 
   // FIXME This is not thread-safe?
@@ -159,6 +161,7 @@ class VeloxRuntime final : public Runtime {
   std::unique_ptr<folly::Executor> spillExecutor_;
   std::unique_ptr<folly::Executor> ioExecutor_;
   VeloxConnectorIds connectorIds_;
+  std::unordered_map<int32_t, std::vector<SplitPayloadBufferView>> splitPayloads_;
 
   std::unordered_map<int32_t, std::shared_ptr<VeloxColumnarBatch>> emptySchemaBatchLoopUp_;
 };
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc
index ccc1917f417..990451ec410 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -15,9 +15,13 @@
  * limitations under the License.
  */
 #include "WholeStageResultIterator.h"
+#include <algorithm>
+#include <optional>
 #include "VeloxBackend.h"
 #include "VeloxPlanConverter.h"
 #include "VeloxRuntime.h"
+#include "compute/delta/DeltaConnector.h"
+#include "compute/delta/DeltaSplit.h"
 #include "config/VeloxConfig.h"
 #include "utils/ConfigExtractor.h"
 #include "velox/connectors/hive/HiveConfig.h"
@@ -66,6 +70,90 @@ const std::string kWriteIOTime = "writeIOWallNanos";
 
 // others
 const std::string kHiveDefaultPartition = "__HIVE_DEFAULT_PARTITION__";
+const std::string kDeltaTableFormat = "delta";
+const std::string kTableFormatKey = "table_format";
+const std::string kDeltaDvCardinality = "delta_dv_cardinality";
+const std::string kRowIndexFilterType = "row_index_filter_type";
+
+bool isDeltaMetadata(const std::unordered_map<std::string, std::string>& metadata) {
+  auto tableFormatIt = metadata.find(kTableFormatKey);
+  return (tableFormatIt != metadata.end() && tableFormatIt->second == kDeltaTableFormat) ||
+      metadata.find(kDeltaDvCardinality) != metadata.end() || metadata.find(kRowIndexFilterType) != metadata.end();
+}
+
+bool isDeltaScanInfo(const std::shared_ptr<SplitInfo>& splitInfo) {
+  for (const auto& metadata : splitInfo->metadataColumns) {
+    if (isDeltaMetadata(metadata)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+const velox::core::TableScanNode* findTableScanNodeById(
+    const std::shared_ptr<const velox::core::PlanNode>& planNode,
+    const velox::core::PlanNodeId& nodeId) {
+  if (planNode == nullptr) {
+    return nullptr;
+  }
+
+  if (planNode->id() == nodeId) {
+    return dynamic_cast<const velox::core::TableScanNode*>(planNode.get());
+  }
+
+  for (const auto& source : planNode->sources()) {
+    if (const auto* found = findTableScanNodeById(source, nodeId)) {
+      return found;
+    }
+  }
+  return nullptr;
+}
+
+std::string connectorIdForScanNode(
+    const std::shared_ptr<const velox::core::PlanNode>& planNode,
+    const velox::core::PlanNodeId& nodeId) {
+  const auto* tableScanNode = findTableScanNodeById(planNode, nodeId);
+  if (tableScanNode == nullptr) {
+    return "";
+  }
+  return tableScanNode->tableHandle()->connectorId();
+}
+
+std::optional<uint64_t> getOptionalUint64(
+    const std::unordered_map<std::string, std::string>& metadata,
+    const std::string& key) {
+  auto it = metadata.find(key);
+  if (it == metadata.end() || it->second.empty()) {
+    return std::nullopt;
+  }
+  return static_cast<uint64_t>(std::stoull(it->second));
+}
+
+std::optional<gluten::delta::DeltaDeletionVectorDescriptor> parseDeltaDeletionVector(
+    const std::unordered_map<std::string, std::string>& metadata,
+    std::optional<SplitPayloadBufferView> serializedPayloadView) {
+  if (!serializedPayloadView.has_value()) {
+    return std::nullopt;
+  }
+
+  const auto cardinality = getOptionalUint64(metadata, kDeltaDvCardinality);
+  return gluten::delta::DeltaDeletionVectorDescriptor::serialized(cardinality, serializedPayloadView);
+}
+
+gluten::delta::DeltaRowIndexFilterType parseDeltaRowIndexFilterType(
+    const std::unordered_map<std::string, std::string>& metadata) {
+  auto it = metadata.find(kRowIndexFilterType);
+  if (it == metadata.end()) {
+    return gluten::delta::DeltaRowIndexFilterType::kKeepAll;
+  }
+  if (it->second == "IF_CONTAINED") {
+    return gluten::delta::DeltaRowIndexFilterType::kIfContained;
+  }
+  if (it->second == "IF_NOT_CONTAINED") {
+    return gluten::delta::DeltaRowIndexFilterType::kIfNotContained;
+  }
+  return gluten::delta::DeltaRowIndexFilterType::kKeepAll;
+}
 
 } // namespace
 
@@ -131,7 +219,8 @@ WholeStageResultIterator::WholeStageResultIterator(
     throw std::runtime_error("Invalid scan information.");
   }
 
-  for (const auto& scanInfo : scanInfos) {
+  for (size_t scanInfoIdx = 0; scanInfoIdx < scanInfos.size(); ++scanInfoIdx) {
+    const auto& scanInfo = scanInfos[scanInfoIdx];
     // Get the information for TableScan.
     // Partition index in scan info is not used.
     const auto& paths = scanInfo->paths;
@@ -141,6 +230,13 @@ WholeStageResultIterator::WholeStageResultIterator(
     const auto& format = scanInfo->format;
     const auto& partitionColumns = scanInfo->partitionColumns;
     const auto& metadataColumns = scanInfo->metadataColumns;
+    const auto scanNodeConnectorId = connectorIdForScanNode(veloxPlan_, scanNodeIds_[scanInfoIdx]);
+    const bool isDeltaScan = scanNodeConnectorId == connectorIds_.delta || isDeltaScanInfo(scanInfo);
+    const auto deltaMetadataFiles = std::count_if(
+        metadataColumns.begin(), metadataColumns.end(), [](const auto& metadata) { return isDeltaMetadata(metadata); });
+    LOG(INFO) << "WholeStageResultIterator scanInfo[" << scanInfoIdx << "] nodeId=" << scanNodeIds_[scanInfoIdx]
+              << " files=" << paths.size() << " connectorId=" << scanNodeConnectorId << " isDeltaScan=" << isDeltaScan
+              << " deltaMetadataFiles=" << deltaMetadataFiles;
 #ifdef GLUTEN_ENABLE_GPU
     // Under the pre-condition that all the split infos has same partition column and format.
     const auto canUseCudfConnector = scanInfo->canUseCudfConnector();
@@ -174,10 +270,29 @@ WholeStageResultIterator::WholeStageResultIterator(
             deleteFiles,
             metadataColumn,
             properties[idx]);
+      } else if (isDeltaScan) {
+        std::unordered_map<std::string, std::string> customSplitInfo{{"table_format", kDeltaTableFormat}};
+        split = std::make_shared<gluten::delta::HiveDeltaSplit>(
+            connectorIds_.delta,
+            paths[idx],
+            format,
+            starts[idx],
+            lengths[idx],
+            partitionKeys,
+            std::nullopt,
+            customSplitInfo,
+            nullptr,
+            std::unordered_map<std::string, std::string>(),
+            true,
+            parseDeltaDeletionVector(metadataColumn, scanInfo->deletionVectorPayloads[idx]),
+            std::nullopt,
+            parseDeltaRowIndexFilterType(metadataColumn),
+            metadataColumn,
+            properties[idx]);
       } else {
-        auto connectorId = connectorIds_.hive;
+        auto connectorId = scanNodeConnectorId.empty() ? connectorIds_.hive : scanNodeConnectorId;
 #ifdef GLUTEN_ENABLE_GPU
-        if (canUseCudfConnector && enableCudf_ &&
+        if (connectorId == connectorIds_.hive && canUseCudfConnector && enableCudf_ &&
             veloxCfg_->get<bool>(kCudfEnableTableScan, kCudfEnableTableScanDefault)) {
           connectorId = connectorIds_.cudfHive;
         }
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index 5477176ce85..6aabd1076aa 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -19,6 +19,7 @@
 
 #include "TypeUtils.h"
 #include "VariantToVectorConverter.h"
+#include "compute/delta/DeltaConnector.h"
 #include "jni/JniHashTable.h"
 #include "operators/hashjoin/HashTableBuilder.h"
 #include "operators/plannodes/RowVectorStream.h"
@@ -46,6 +47,11 @@ using namespace cudf_velox::connector::hive;
 namespace gluten {
 namespace {
 
+const std::string kDeltaTableFormat = "delta";
+const std::string kTableFormatKey = "table_format";
+const std::string kDeltaDvCardinality = "delta_dv_cardinality";
+const std::string kRowIndexFilterType = "row_index_filter_type";
+
 bool useCudfTableHandle(const std::vector<std::shared_ptr<SplitInfo>>& splitInfos) {
 #ifdef GLUTEN_ENABLE_GPU
   if (splitInfos.empty()) {
@@ -57,6 +63,21 @@ bool useCudfTableHandle(const std::vector<std::shared_ptr<SplitInfo>>& splitInfo
 #endif
 }
 
+bool isDeltaMetadata(const std::unordered_map<std::string, std::string>& metadata) {
+  auto tableFormatIt = metadata.find(kTableFormatKey);
+  return (tableFormatIt != metadata.end() && tableFormatIt->second == kDeltaTableFormat) ||
+      metadata.find(kDeltaDvCardinality) != metadata.end() || metadata.find(kRowIndexFilterType) != metadata.end();
+}
+
+bool isDeltaSplitInfo(const std::shared_ptr<SplitInfo>& splitInfo) {
+  for (const auto& metadata : splitInfo->metadataColumns) {
+    if (isDeltaMetadata(metadata)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 core::SortOrder toSortOrder(const ::substrait::SortField& sortField) {
   switch (sortField.direction()) {
     case ::substrait::SortField_SortDirection_SORT_DIRECTION_ASC_NULLS_FIRST:
@@ -1573,8 +1594,9 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
 
   connector::ConnectorTableHandlePtr tableHandle;
   auto remainingFilter = readRel.has_filter() ? exprConverter_->toVeloxExpr(readRel.filter(), baseSchema) : nullptr;
-  auto connectorId = connectorIds_.hive;
-  if (useCudfTableHandle(splitInfos_) && veloxCfg_->get<bool>(kCudfEnableTableScan, kCudfEnableTableScanDefault) &&
+  auto connectorId = isDeltaSplitInfo(splitInfo) ? connectorIds_.delta : connectorIds_.hive;
+  if (connectorId == connectorIds_.hive && useCudfTableHandle(splitInfos_) &&
+      veloxCfg_->get<bool>(kCudfEnableTableScan, kCudfEnableTableScanDefault) &&
       veloxCfg_->get<bool>(kCudfEnabled, kCudfEnabledDefault)) {
 #ifdef GLUTEN_ENABLE_GPU
     connectorId = connectorIds_.cudfHive;
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.h b/cpp/velox/substrait/SubstraitToVeloxPlan.h
index 373601916d4..f8dad3ed158 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.h
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.h
@@ -19,6 +19,7 @@
 
 #include "SubstraitToVeloxExpr.h"
 #include "TypeUtils.h"
+#include "compute/Runtime.h"
 #include "compute/VeloxConnectorIds.h"
 #include "velox/connectors/hive/FileProperties.h"
 #include "velox/connectors/hive/TableHandle.h"
@@ -50,6 +51,9 @@ struct SplitInfo {
   /// The metadata columns associated with partitioned table.
   std::vector<std::unordered_map<std::string, std::string>> metadataColumns;
 
+  /// Optional externally provided deletion vector payloads aligned with metadataColumns.
+  std::vector<std::optional<SplitPayloadBufferView>> deletionVectorPayloads;
+
   /// The file paths to be scanned.
   std::vector<std::string> paths;
 
diff --git a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
index d2513718411..3734222a08f 100644
--- a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
+++ b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
@@ -31,11 +31,17 @@ public class ColumnarBatchOutIterator extends ClosableIterator<ColumnarBatch>
     implements RuntimeAware {
   private final Runtime runtime;
   private final long iterHandle;
+  private final Object retainedReference;
 
   public ColumnarBatchOutIterator(Runtime runtime, long iterHandle) {
+    this(runtime, iterHandle, null);
+  }
+
+  public ColumnarBatchOutIterator(Runtime runtime, long iterHandle, Object retainedReference) {
     super();
     this.runtime = runtime;
     this.iterHandle = iterHandle;
+    this.retainedReference = retainedReference;
   }
 
   @Override
diff --git a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java
index 6d2c90896b2..d94409c4777 100644
--- a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java
+++ b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java
@@ -27,6 +27,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -78,17 +79,30 @@ public ColumnarBatchOutIterator createKernelWithBatchIterator(
       int partitionIndex,
       String spillDirPath)
       throws RuntimeException {
+    return createKernelWithBatchIterator(
+        wsPlan, splitInfo, null, iterList, partitionIndex, spillDirPath);
+  }
+
+  public ColumnarBatchOutIterator createKernelWithBatchIterator(
+      byte[] wsPlan,
+      byte[][] splitInfo,
+      ByteBuffer[][] splitPayloads,
+      ColumnarBatchInIterator[] iterList,
+      int partitionIndex,
+      String spillDirPath)
+      throws RuntimeException {
     final long itrHandle =
         jniWrapper.nativeCreateKernelWithIterator(
             wsPlan,
             splitInfo,
+            splitPayloads,
             iterList,
             TaskContext.get().stageId(),
             partitionIndex, // TaskContext.getPartitionId(),
             TaskContext.get().taskAttemptId(),
             DebugUtil.isDumpingEnabledForTask(),
             spillDirPath);
-    final ColumnarBatchOutIterator out = createOutIterator(runtime, itrHandle);
+    final ColumnarBatchOutIterator out = createOutIterator(runtime, itrHandle, splitPayloads);
     runtime
         .memoryManager()
         .addSpiller(
@@ -110,7 +124,8 @@ public long spill(MemoryTarget self, Spiller.Phase phase, long size) {
     return out;
   }
 
-  private ColumnarBatchOutIterator createOutIterator(Runtime runtime, long itrHandle) {
-    return new ColumnarBatchOutIterator(runtime, itrHandle);
+  private ColumnarBatchOutIterator createOutIterator(
+      Runtime runtime, long itrHandle, Object retainedReference) {
+    return new ColumnarBatchOutIterator(runtime, itrHandle, retainedReference);
   }
 }
diff --git a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/PlanEvaluatorJniWrapper.java b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/PlanEvaluatorJniWrapper.java
index a8082906798..c68aab4d757 100644
--- a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/PlanEvaluatorJniWrapper.java
+++ b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/PlanEvaluatorJniWrapper.java
@@ -20,6 +20,8 @@
 import org.apache.gluten.runtime.RuntimeAware;
 import org.apache.gluten.validate.NativePlanValidationInfo;
 
+import java.nio.ByteBuffer;
+
 /**
  * This class is implemented in JNI. This provides the Java interface to invoke functions in JNI.
  * This file is used to generate the .h files required for jni. Avoid all external dependencies in
@@ -72,6 +74,7 @@ public long rtHandle() {
   public native long nativeCreateKernelWithIterator(
       byte[] wsPlan,
       byte[][] splitInfo,
+      ByteBuffer[][] splitPayloads,
       ColumnarBatchInIterator[] batchItr,
       int stageId,
       int partitionId,
diff --git a/gluten-delta/src/main/scala/org/apache/gluten/execution/DeltaScanTransformer.scala b/gluten-delta/src/main/scala/org/apache/gluten/execution/DeltaScanTransformer.scala
index 1be03dd404a..0ed2f3a36e4 100644
--- a/gluten-delta/src/main/scala/org/apache/gluten/execution/DeltaScanTransformer.scala
+++ b/gluten-delta/src/main/scala/org/apache/gluten/execution/DeltaScanTransformer.scala
@@ -18,16 +18,28 @@ package org.apache.gluten.execution
 
 import org.apache.gluten.sql.shims.SparkShimLoader
 import org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat
+import org.apache.gluten.utils.DeltaDeletionVectorRegistry
 
+import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.connector.read.streaming.SparkDataStream
+import org.apache.spark.sql.delta.actions.AddFile
+import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor
+import org.apache.spark.sql.delta.deletionvectors.{RoaringBitmapArrayFormat, StoredBitmap}
+import org.apache.spark.sql.delta.stats.PreparedDeltaFileIndex
+import org.apache.spark.sql.delta.storage.dv.HadoopFileSystemDVStore
 import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.execution.datasources.HadoopFsRelation
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.collection.BitSet
 
+import org.apache.hadoop.fs.Path
+
+import scala.collection.mutable.ListBuffer
+import scala.util.control.NonFatal
+
 case class DeltaScanTransformer(
     @transient override val relation: HadoopFsRelation,
     @transient stream: Option[SparkDataStream],
@@ -55,16 +67,30 @@ case class DeltaScanTransformer(
 
   override lazy val fileFormat: ReadFileFormat = ReadFileFormat.ParquetReadFormat
 
+  private lazy val deltaDeletionVectorRegistration
+      : DeltaScanTransformer.DeletionVectorRegistration =
+    DeltaScanTransformer.registerDeletionVectorsFromFileFormat(relation)
+
+  private lazy val deltaDeletionVectorRegistryId: Option[String] =
+    deltaDeletionVectorRegistration.registryId
+
   override protected def doValidateInternal(): ValidationResult = {
-    if (
-      requiredSchema.fields.exists(
-        _.name == "__delta_internal_is_row_deleted") || requiredSchema.fields.exists(
-        _.name == "__delta_internal_row_index")
-    ) {
-      return ValidationResult.failed(s"Deletion vector is not supported in native.")
+    val validationResult = super.doValidateInternal()
+    if (!validationResult.ok()) {
+      return validationResult
     }
 
-    super.doValidateInternal()
+    if (!deltaDeletionVectorRegistration.isValid) {
+      return ValidationResult.failed(deltaDeletionVectorRegistration.failureReason)
+    }
+
+    ValidationResult.succeeded
+  }
+
+  override def getProperties: Map[String, String] = {
+    super.getProperties ++ deltaDeletionVectorRegistryId
+      .map(DeltaDeletionVectorRegistry.RegistryIdProperty -> _)
+      .toMap
   }
 
   override def doCanonicalize(): DeltaScanTransformer = {
@@ -90,6 +116,282 @@ case class DeltaScanTransformer(
 }
 
 object DeltaScanTransformer {
+  private val IfContainedFilterType = "IF_CONTAINED"
+
+  private[execution] case class DeletionVectorRegistration(
+      attempted: Boolean,
+      deletionVectorCount: Int,
+      registryId: Option[String],
+      failures: Seq[String]) {
+    def isValid: Boolean = failures.isEmpty && (deletionVectorCount == 0 || registryId.nonEmpty)
+
+    def failureReason: String = {
+      val details =
+        if (failures.isEmpty) {
+          "no deletion vector payloads were registered"
+        } else {
+          failures.take(3).mkString("; ")
+        }
+      s"Unable to materialize Delta deletion vector payloads for native scan: $details"
+    }
+  }
+
+  private val NotAttemptedDeletionVectorRegistration =
+    DeletionVectorRegistration(
+      attempted = false,
+      deletionVectorCount = 0,
+      registryId = None,
+      failures = Nil)
+
+  private def registerDeletionVectorsFromFileFormat(
+      relation: HadoopFsRelation): DeletionVectorRegistration = {
+    val broadcastRegistration = registerDeletionVectorsFromBroadcastMap(relation)
+    if (broadcastRegistration.attempted) {
+      broadcastRegistration
+    } else {
+      registerDeletionVectorsFromPreparedScan(relation)
+    }
+  }
+
+  private def registerDeletionVectorsFromBroadcastMap(
+      relation: HadoopFsRelation): DeletionVectorRegistration = {
+    val format = relation.fileFormat
+    val broadcastDvMap: Option[scala.collection.Map[_, _]] =
+      try {
+        Option(format.getClass.getMethod("broadcastDvMap").invoke(format))
+          .collect { case o: Option[_] => o }
+          .flatten
+          .collect { case b: Broadcast[_] => b.value }
+          .collect { case m: scala.collection.Map[_, _] => m }
+      } catch {
+        case _: NoSuchMethodException =>
+          None
+        case NonFatal(e) =>
+          return DeletionVectorRegistration(
+            attempted = true,
+            deletionVectorCount = 1,
+            registryId = None,
+            failures =
+              Seq(s"failed to read Delta deletion vector broadcast map: ${errorMessage(e)}"))
+      }
+
+    val uriToDvDescriptor: Map[java.net.URI, Any] =
+      broadcastDvMap
+        .map {
+          _.asInstanceOf[scala.collection.Map[Any, Any]]
+            .collect { case (uri: java.net.URI, value) => uri -> value }
+            .toMap
+        }
+        .getOrElse(Map.empty[java.net.URI, Any])
+
+    if (uriToDvDescriptor.isEmpty) {
+      return NotAttemptedDeletionVectorRegistration
+    }
+
+    val tablePath = tablePathFromFileFormat(relation).orNull
+    if (tablePath == null) {
+      return DeletionVectorRegistration(
+        attempted = true,
+        deletionVectorCount = uriToDvDescriptor.size,
+        registryId = None,
+        failures = Seq("unable to resolve Delta table path for broadcast deletion vectors")
+      )
+    }
+
+    val dvStore = new HadoopFileSystemDVStore(relation.sparkSession.sessionState.newHadoopConf())
+    val registeredEntries = ListBuffer.empty[(String, DeltaDeletionVectorRegistry.Entry)]
+    val failures = ListBuffer.empty[String]
+    uriToDvDescriptor.foreach {
+      case (uri, dvDescriptorWithFilterType) =>
+        try {
+          val descriptor = dvDescriptorWithFilterType.getClass
+            .getMethod("descriptor")
+            .invoke(dvDescriptorWithFilterType)
+            .asInstanceOf[DeletionVectorDescriptor]
+          val filterType = dvDescriptorWithFilterType.getClass
+            .getMethod("filterType")
+            .invoke(dvDescriptorWithFilterType)
+            .toString
+          val payload = materializePayload(descriptor, tablePath, dvStore)
+          val aliases = pathAliases(uri)
+          if (aliases.isEmpty) {
+            failures += s"no file path aliases for deletion vector $uri"
+          } else {
+            registeredEntries ++= aliases.map {
+              _ -> DeltaDeletionVectorRegistry.Entry(descriptor.cardinality, filterType, payload)
+            }
+          }
+        } catch {
+          case NonFatal(e) =>
+            failures += s"$uri: ${errorMessage(e)}"
+        }
+    }
+
+    deletionVectorRegistration(
+      attempted = true,
+      deletionVectorCount = uriToDvDescriptor.size,
+      registeredEntries = registeredEntries.toSeq,
+      failures = failures.toSeq)
+  }
+
+  private def tablePathFromFileFormat(relation: HadoopFsRelation): Option[Path] = {
+    val tablePathFromFormat =
+      try {
+        Option(relation.fileFormat.getClass.getMethod("tablePath").invoke(relation.fileFormat))
+          .collect { case o: Option[_] => o }
+          .flatten
+          .map(_.toString)
+      } catch {
+        case _: NoSuchMethodException => None
+        case NonFatal(_) => None
+      }
+    tablePathFromFormat
+      .orElse(relation.location.rootPaths.headOption.map(_.toString))
+      .map(new Path(_))
+  }
+
+  private def materializePayload(
+      descriptor: DeletionVectorDescriptor,
+      tablePath: Path,
+      dvStore: HadoopFileSystemDVStore): Array[Byte] = {
+    StoredBitmap
+      .create(descriptor, tablePath)
+      .load(dvStore)
+      .serializeAsByteArray(RoaringBitmapArrayFormat.Portable)
+  }
+
+  private def deletionVectorRegistration(
+      attempted: Boolean,
+      deletionVectorCount: Int,
+      registeredEntries: Seq[(String, DeltaDeletionVectorRegistry.Entry)],
+      failures: Seq[String]): DeletionVectorRegistration = {
+    val registryId =
+      if (registeredEntries.isEmpty) {
+        None
+      } else {
+        Some(DeltaDeletionVectorRegistry.register(registeredEntries.toMap))
+      }
+    DeletionVectorRegistration(attempted, deletionVectorCount, registryId, failures)
+  }
+
+  private def registerDeletionVectorsFromPreparedScan(
+      relation: HadoopFsRelation): DeletionVectorRegistration = {
+    relation.location match {
+      case preparedIndex: PreparedDeltaFileIndex =>
+        val tablePath =
+          Option(preparedIndex.path)
+            .orElse(relation.location.rootPaths.headOption)
+            .orNull
+        if (tablePath == null) {
+          return DeletionVectorRegistration(
+            attempted = true,
+            deletionVectorCount = preparedIndex.preparedScan.files.count(_.deletionVector != null),
+            registryId = None,
+            failures = Seq("unable to resolve Delta table path for prepared deletion vector scan")
+          )
+        }
+
+        val dvStore =
+          new HadoopFileSystemDVStore(relation.sparkSession.sessionState.newHadoopConf())
+        val preparedFiles = preparedIndex.preparedScan.files
+        registerDeletionVectorsFromAddFiles(preparedFiles.iterator, tablePath, dvStore)
+      case _ =>
+        NotAttemptedDeletionVectorRegistration
+    }
+  }
+
+  private def registerDeletionVectorsFromAddFiles(
+      files: Iterator[AddFile],
+      tablePath: Path,
+      dvStore: HadoopFileSystemDVStore): DeletionVectorRegistration = {
+    val registeredEntries = ListBuffer.empty[(String, DeltaDeletionVectorRegistry.Entry)]
+    val failures = ListBuffer.empty[String]
+    var deletionVectorCount = 0
+
+    files.foreach {
+      addFile =>
+        Option(addFile.deletionVector).foreach {
+          descriptor =>
+            deletionVectorCount += 1
+            try {
+              val payload = materializePayload(descriptor, tablePath, dvStore)
+              val absolutePath = new Path(tablePath, addFile.path)
+              val aliases = pathAliases(absolutePath.toUri, absolutePath.toString)
+              if (aliases.isEmpty) {
+                failures += s"no file path aliases for deletion vector ${addFile.path}"
+              } else {
+                registeredEntries ++= aliases.map {
+                  _ -> DeltaDeletionVectorRegistry.Entry(
+                    descriptor.cardinality,
+                    IfContainedFilterType,
+                    payload)
+                }
+              }
+            } catch {
+              case NonFatal(e) =>
+                failures += s"${addFile.path}: ${errorMessage(e)}"
+            }
+        }
+    }
+
+    deletionVectorRegistration(
+      attempted = deletionVectorCount > 0,
+      deletionVectorCount = deletionVectorCount,
+      registeredEntries = registeredEntries.toSeq,
+      failures = failures.toSeq)
+  }
+
+  private def errorMessage(error: Throwable): String = {
+    val message = Option(error.getMessage).filter(_.nonEmpty).getOrElse(error.toString)
+    s"${error.getClass.getSimpleName}: $message"
+  }
+
+  private def pathAliases(uri: java.net.URI, extraAliases: String*): Seq[String] = {
+    val decodedExtraAliases = extraAliases.map(percentUnescapePathName)
+    (Seq(uri.toASCIIString, uri.getPath, Option(uri.getPath).map(_.stripPrefix("/")).orNull) ++
+      extraAliases ++
+      decodedExtraAliases ++
+      extraAliases.map(_.stripPrefix("/")) ++
+      decodedExtraAliases.map(_.stripPrefix("/")))
+      .filter(_ != null)
+      .map(DeltaDeletionVectorRegistry.normalizePathKey)
+      .filter(_.nonEmpty)
+      .distinct
+  }
+
+  private def percentUnescapePathName(path: String): String = {
+    if (path == null || path.isEmpty) {
+      return path
+    }
+    var plaintextEndIdx = path.indexOf('%')
+    val length = path.length
+    if (plaintextEndIdx == -1 || plaintextEndIdx + 2 >= length) {
+      path
+    } else {
+      val sb = new java.lang.StringBuilder(length)
+      var plaintextStartIdx = 0
+      while (plaintextEndIdx != -1 && plaintextEndIdx + 2 < length) {
+        if (plaintextEndIdx > plaintextStartIdx) sb.append(path, plaintextStartIdx, plaintextEndIdx)
+        if (
+          java.lang.Character.digit(path.charAt(plaintextEndIdx + 1), 16) != -1 &&
+          java.lang.Character.digit(path.charAt(plaintextEndIdx + 2), 16) != -1
+        ) {
+          sb.append(
+            ((java.lang.Character.digit(path.charAt(plaintextEndIdx + 1), 16) << 4) |
+              java.lang.Character.digit(path.charAt(plaintextEndIdx + 2), 16)).toChar)
+          plaintextStartIdx = plaintextEndIdx + 3
+        } else {
+          sb.append('%')
+          plaintextStartIdx = plaintextEndIdx + 1
+        }
+        plaintextEndIdx = path.indexOf('%', plaintextStartIdx)
+      }
+      if (plaintextStartIdx < length) {
+        sb.append(path, plaintextStartIdx, length)
+      }
+      sb.toString
+    }
+  }
 
   def apply(scanExec: FileSourceScanExec): DeltaScanTransformer = {
     new DeltaScanTransformer(
diff --git a/gluten-delta/src/main/scala/org/apache/gluten/extension/DeltaPostTransformRules.scala b/gluten-delta/src/main/scala/org/apache/gluten/extension/DeltaPostTransformRules.scala
index e16a6d12fda..f6a414db0f3 100644
--- a/gluten-delta/src/main/scala/org/apache/gluten/extension/DeltaPostTransformRules.scala
+++ b/gluten-delta/src/main/scala/org/apache/gluten/extension/DeltaPostTransformRules.scala
@@ -16,28 +16,37 @@
  */
 package org.apache.gluten.extension
 
-import org.apache.gluten.execution.{DeltaScanTransformer, ProjectExecTransformer}
+import org.apache.gluten.backendsapi.BackendsApiManager
+import org.apache.gluten.execution.{DeltaScanTransformer, FilterExecTransformerBase, ProjectExecTransformer}
 import org.apache.gluten.extension.columnar.transition.RemoveTransitions
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, CreateNamedStruct, Expression, GetStructField, If, InputFileBlockLength, InputFileBlockStart, InputFileName, IsNull, LambdaFunction, Literal, NamedLambdaVariable}
-import org.apache.spark.sql.catalyst.expressions.{ArrayTransform, TransformKeys, TransformValues}
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, AttributeReference, Expression, InputFileBlockLength, InputFileBlockStart, InputFileName, NamedExpression}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.delta.{DeltaColumnMapping, DeltaParquetFileFormat, NoMapping}
-import org.apache.spark.sql.execution.{ProjectExec, SparkPlan}
+import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.FileFormat
-import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
+import org.apache.spark.sql.types.StructType
 
-import scala.collection.mutable
 import scala.collection.mutable.ListBuffer
 
 object DeltaPostTransformRules {
   def rules: Seq[Rule[SparkPlan]] =
-    RemoveTransitions :: pushDownInputFileExprRule :: columnMappingRule :: Nil
+    RemoveTransitions ::
+      nativeDeletionVectorRule ::
+      pushDownInputFileExprRule ::
+      columnMappingRule :: Nil
+
+  private val deletionVectorDeletedRowColumnName = "__delta_internal_is_row_deleted"
+  private val deletionVectorRowIndexColumnName = "__delta_internal_row_index"
+  private val deletionVectorInternalColumnNames =
+    Set(deletionVectorDeletedRowColumnName, deletionVectorRowIndexColumnName)
 
   private val COLUMN_MAPPING_RULE_TAG: TreeNodeTag[String] =
     TreeNodeTag[String]("org.apache.gluten.delta.column.mapping")
+  private val PRESERVE_DELETION_VECTOR_ROW_INDEX_TAG: TreeNodeTag[Boolean] =
+    TreeNodeTag[Boolean]("org.apache.gluten.delta.preserve.deletion.vector.row.index")
 
   private def notAppliedColumnMappingRule(plan: SparkPlan): Boolean = {
     plan.getTagValue(COLUMN_MAPPING_RULE_TAG).isEmpty
@@ -65,6 +74,87 @@ object DeltaPostTransformRules {
         child.copy(output = p.output)
     }
 
+  /**
+   * Spark Delta injects synthetic deletion-vector predicates and columns into the plan. Those are
+   * needed for the JVM reader path, but for the native Delta scan path they must be stripped or
+   * they will be applied twice with incompatible semantics.
+   */
+  val nativeDeletionVectorRule: Rule[SparkPlan] = (plan: SparkPlan) => {
+    tagRowIndexRequiredSubtrees(plan)
+    plan.transformUp {
+      case scan: DeltaScanTransformer =>
+        val cleanedDataFilters = scan.dataFilters.flatMap(stripDeletionVectorPredicate)
+        val cleanedPushDownFilters =
+          scan.pushDownFilters.map(_.flatMap(stripDeletionVectorPredicate))
+        val preserveRowIndex = shouldPreserveDeletionVectorRowIndex(scan)
+        val cleanedOutput = stripDeletionVectorInternalOutput(scan.output, preserveRowIndex)
+        val cleanedRequiredSchema =
+          stripDeletionVectorInternalSchema(scan.requiredSchema, preserveRowIndex)
+        if (
+          cleanedDataFilters == scan.dataFilters &&
+          cleanedPushDownFilters == scan.pushDownFilters &&
+          cleanedOutput == scan.output &&
+          cleanedRequiredSchema == scan.requiredSchema
+        ) {
+          scan
+        } else {
+          scan.copy(
+            output = cleanedOutput,
+            requiredSchema = cleanedRequiredSchema,
+            dataFilters = cleanedDataFilters,
+            pushDownFilters = cleanedPushDownFilters)
+        }
+      case project: ProjectExecTransformer if containsNativeDeltaScan(project.child) =>
+        val cleanedProjectList = stripDeletionVectorInternalProjectList(
+          project.projectList,
+          shouldPreserveDeletionVectorRowIndex(project))
+        if (cleanedProjectList == project.projectList) {
+          project
+        } else if (cleanedProjectList.isEmpty) {
+          project.child
+        } else {
+          ProjectExecTransformer(cleanedProjectList, project.child)
+        }
+      case project: ProjectExec if containsNativeDeltaScan(project.child) =>
+        val cleanedProjectList = stripDeletionVectorInternalProjectList(
+          project.projectList,
+          shouldPreserveDeletionVectorRowIndex(project))
+        if (cleanedProjectList == project.projectList) {
+          project
+        } else if (cleanedProjectList.isEmpty) {
+          project.child
+        } else {
+          ProjectExec(cleanedProjectList, project.child)
+        }
+      case filter: FilterExecTransformerBase if containsNativeDeltaScan(filter.child) =>
+        stripDeletionVectorPredicate(filter.cond) match {
+          case Some(cleanCondition) if cleanCondition != filter.cond =>
+            BackendsApiManager.getSparkPlanExecApiInstance
+              .genFilterExecTransformer(cleanCondition, filter.child)
+          case Some(_) =>
+            filter
+          case None =>
+            filter.child
+        }
+      case filter: FilterExec if containsNativeDeltaScan(filter.child) =>
+        stripDeletionVectorPredicate(filter.condition) match {
+          case Some(cleanCondition) if cleanCondition != filter.condition =>
+            FilterExec(cleanCondition, filter.child)
+          case Some(_) =>
+            filter
+          case None =>
+            filter.child
+        }
+    }
+  }
+
+  private def containsNativeDeltaScan(plan: SparkPlan): Boolean = {
+    plan.exists {
+      case _: DeltaScanTransformer => true
+      case _ => false
+    }
+  }
+
   private def isDeltaColumnMappingFileFormat(fileFormat: FileFormat): Boolean = fileFormat match {
     case d: DeltaParquetFileFormat if d.columnMappingMode != NoMapping =>
       true
@@ -79,6 +169,82 @@ object DeltaPostTransformRules {
     }
   }
 
+  private def referencesDeletionVectorInternalColumn(expr: Expression): Boolean = {
+    expr.references.exists(attr => deletionVectorInternalColumnNames.contains(attr.name))
+  }
+
+  private def referencesDeletionVectorRowIndex(expr: Expression): Boolean = {
+    expr.references.exists(_.name == deletionVectorRowIndexColumnName)
+  }
+
+  private def tagRowIndexRequiredSubtrees(plan: SparkPlan): Unit = {
+    def tagSubtree(subtree: SparkPlan): Unit = {
+      subtree.foreach(_.setTagValue(PRESERVE_DELETION_VECTOR_ROW_INDEX_TAG, true))
+    }
+
+    def visit(node: SparkPlan): Unit = {
+      val shouldPreserveRowIndex =
+        node.expressions.exists(containsIncrementMetricExpr) ||
+          node.expressions.exists(referencesDeletionVectorRowIndex)
+      if (shouldPreserveRowIndex) {
+        node.children.foreach(tagSubtree)
+      }
+      node.children.foreach(visit)
+    }
+
+    visit(plan)
+  }
+
+  private def shouldPreserveDeletionVectorRowIndex(plan: SparkPlan): Boolean = {
+    plan.getTagValue(PRESERVE_DELETION_VECTOR_ROW_INDEX_TAG).contains(true) ||
+    plan.expressions.exists(containsIncrementMetricExpr) ||
+    plan.expressions.exists(referencesDeletionVectorRowIndex)
+  }
+
+  private def shouldStripDeletionVectorInternalColumn(
+      columnName: String,
+      preserveRowIndex: Boolean): Boolean = {
+    columnName == deletionVectorDeletedRowColumnName ||
+    (!preserveRowIndex && columnName == deletionVectorRowIndexColumnName)
+  }
+
+  private def stripDeletionVectorInternalOutput(
+      output: Seq[Attribute],
+      preserveRowIndex: Boolean): Seq[Attribute] = {
+    output.filterNot(attr => shouldStripDeletionVectorInternalColumn(attr.name, preserveRowIndex))
+  }
+
+  private def stripDeletionVectorInternalProjectList(
+      projectList: Seq[NamedExpression],
+      preserveRowIndex: Boolean): Seq[NamedExpression] = {
+    projectList.filterNot(
+      expr => shouldStripDeletionVectorInternalColumn(expr.name, preserveRowIndex))
+  }
+
+  private def stripDeletionVectorInternalSchema(
+      schema: StructType,
+      preserveRowIndex: Boolean): StructType = {
+    StructType(
+      schema.filterNot(
+        field => shouldStripDeletionVectorInternalColumn(field.name, preserveRowIndex)))
+  }
+
+  private def stripDeletionVectorPredicate(expr: Expression): Option[Expression] = {
+    expr match {
+      case And(left, right) =>
+        (stripDeletionVectorPredicate(left), stripDeletionVectorPredicate(right)) match {
+          case (Some(cleanLeft), Some(cleanRight)) => Some(And(cleanLeft, cleanRight))
+          case (Some(cleanLeft), None) => Some(cleanLeft)
+          case (None, Some(cleanRight)) => Some(cleanRight)
+          case (None, None) => None
+        }
+      case other if referencesDeletionVectorInternalColumn(other) =>
+        None
+      case other =>
+        Some(other)
+    }
+  }
+
   private def isInputFileRelatedAttribute(attr: Attribute): Boolean = {
     attr match {
       case AttributeReference(name, _, _, _) =>
@@ -96,73 +262,6 @@ object DeltaPostTransformRules {
     }
   }
 
-  /**
-   * Checks whether two structurally compatible DataTypes have different struct field names at any
-   * nesting level.
-   */
-  private def nestedFieldNamesDiffer(logical: DataType, physical: DataType): Boolean = {
-    (logical, physical) match {
-      case (l: StructType, p: StructType) if l.length == p.length =>
-        l.zip(p).exists {
-          case (lf, pf) =>
-            lf.name != pf.name || nestedFieldNamesDiffer(lf.dataType, pf.dataType)
-        }
-      case (l: ArrayType, p: ArrayType) =>
-        nestedFieldNamesDiffer(l.elementType, p.elementType)
-      case (l: MapType, p: MapType) =>
-        nestedFieldNamesDiffer(l.keyType, p.keyType) ||
-        nestedFieldNamesDiffer(l.valueType, p.valueType)
-      case _ => false
-    }
-  }
-
-  /**
-   * Rebuilds an expression tree so that nested struct field names match the logical schema. Uses
-   * positional extraction (GetStructField) and reconstruction (CreateNamedStruct) instead of Cast,
-   * so correctness does not depend on Velox's cast_match_struct_by_name config.
-   */
-  private def reconcileFieldNames(
-      expr: Expression,
-      logical: DataType,
-      physical: DataType): Expression = {
-    (logical, physical) match {
-      case (l: StructType, p: StructType) if l.length == p.length =>
-        val rebuiltFields = l.zip(p).zipWithIndex.flatMap {
-          case ((lf, pf), i) =>
-            val extracted = GetStructField(expr, i, None)
-            val reconciled = reconcileFieldNames(extracted, lf.dataType, pf.dataType)
-            Seq(Literal(lf.name), reconciled)
-        }
-        val rebuilt = CreateNamedStruct(rebuiltFields)
-        If(IsNull(expr), Literal.create(null, l), rebuilt)
-      case (l: ArrayType, p: ArrayType) if nestedFieldNamesDiffer(l.elementType, p.elementType) =>
-        val lambdaVar = NamedLambdaVariable("element", p.elementType, p.containsNull)
-        val body = reconcileFieldNames(lambdaVar, l.elementType, p.elementType)
-        ArrayTransform(expr, LambdaFunction(body, Seq(lambdaVar)))
-      case (l: MapType, p: MapType) =>
-        val needKeys = nestedFieldNamesDiffer(l.keyType, p.keyType)
-        val needValues = nestedFieldNamesDiffer(l.valueType, p.valueType)
-        var result = expr
-        if (needValues) {
-          val keyVar = NamedLambdaVariable("key", p.keyType, false)
-          val valueVar = NamedLambdaVariable("value", p.valueType, p.valueContainsNull)
-          val body = reconcileFieldNames(valueVar, l.valueType, p.valueType)
-          result = TransformValues(result, LambdaFunction(body, Seq(keyVar, valueVar)))
-        }
-        if (needKeys) {
-          val keyVar = NamedLambdaVariable("key", p.keyType, false)
-          val valueVar = NamedLambdaVariable(
-            "value",
-            if (needValues) l.valueType else p.valueType,
-            p.valueContainsNull)
-          val body = reconcileFieldNames(keyVar, l.keyType, p.keyType)
-          result = TransformKeys(result, LambdaFunction(body, Seq(keyVar, valueVar)))
-        }
-        result
-      case _ => expr
-    }
-  }
-
   /**
    * This method is only used for Delta ColumnMapping FileFormat(e.g. nameMapping and idMapping)
    * transform the metadata of Delta into Parquet's, each plan should only be transformed once.
@@ -185,9 +284,8 @@ object DeltaPostTransformRules {
       )(SparkSession.active)
       // transform output's name into physical name so Reader can read data correctly
       // should keep the columns order the same as the origin output
-      case class ColumnMapping(logicalName: String, logicalType: DataType, physicalAttr: Attribute)
-      val columnMappings = ListBuffer.empty[ColumnMapping]
-      val seenNames = mutable.Set.empty[String]
+      val originColumnNames = ListBuffer.empty[String]
+      val transformedAttrs = ListBuffer.empty[Attribute]
       def mapAttribute(attr: Attribute) = {
         val newAttr = if (plan.isMetadataColumn(attr)) {
           attr
@@ -198,8 +296,9 @@ object DeltaPostTransformRules {
             .createPhysicalAttributes(Seq(attr), fmt.referenceSchema, fmt.columnMappingMode)
             .head
         }
-        if (seenNames.add(attr.name)) {
-          columnMappings += ColumnMapping(attr.name, attr.dataType, newAttr)
+        if (!originColumnNames.contains(attr.name)) {
+          transformedAttrs += newAttr
+          originColumnNames += attr.name
         }
         newAttr
       }
@@ -239,20 +338,9 @@ object DeltaPostTransformRules {
       scanExecTransformer.copyTagsFrom(plan)
       tagColumnMappingRule(scanExecTransformer)
 
-      // Alias physical names back to logical names. For struct-typed columns, Delta column
-      // mapping renames internal field names to physical UUIDs. A top-level Alias only restores
-      // the column name, not the struct's internal field names. We rebuild the struct with
-      // logical field names using positional extraction (GetStructField/CreateNamedStruct)
-      // instead of Cast, so correctness does not depend on any Velox cast config.
-      val expr = columnMappings.map {
-        cm =>
-          val projectedExpr: Expression =
-            if (nestedFieldNamesDiffer(cm.logicalType, cm.physicalAttr.dataType)) {
-              reconcileFieldNames(cm.physicalAttr, cm.logicalType, cm.physicalAttr.dataType)
-            } else {
-              cm.physicalAttr
-            }
-          Alias(projectedExpr, cm.logicalName)(exprId = cm.physicalAttr.exprId)
+      // alias physicalName into tableName
+      val expr = (transformedAttrs, originColumnNames).zipped.map {
+        (attr, columnName) => Alias(attr, columnName)(exprId = attr.exprId)
       }
       val projectExecTransformer = ProjectExecTransformer(expr.toSeq, scanExecTransformer)
       projectExecTransformer
diff --git a/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala b/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala
index 5fe1b4ba86e..be4fcbedb17 100644
--- a/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala
+++ b/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala
@@ -17,16 +17,66 @@
 package org.apache.gluten.extension
 
 import org.apache.gluten.execution.DeltaScanTransformer
+import org.apache.gluten.extension.columnar.FallbackTags
 import org.apache.gluten.extension.columnar.offload.OffloadSingleNode
 
 import org.apache.spark.sql.delta.DeltaParquetFileFormat
+import org.apache.spark.sql.delta.SnapshotDescriptor
+import org.apache.spark.sql.delta.commands.DeletionVectorUtils.deletionVectorsReadable
+import org.apache.spark.sql.delta.files.TahoeFileIndex
+import org.apache.spark.sql.delta.stats.PreparedDeltaFileIndex
 import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
+import org.apache.spark.util.SparkVersionUtil
 
 case class OffloadDeltaScan() extends OffloadSingleNode {
   override def offload(plan: SparkPlan): SparkPlan = plan match {
-    case scan: FileSourceScanExec
-        if scan.relation.fileFormat.getClass == classOf[DeltaParquetFileFormat] =>
+    case scan: FileSourceScanExec if isDeltaLogScan(scan) =>
+      FallbackTags.add(scan, "fallback Delta _delta_log scan")
+      scan
+    case scan: FileSourceScanExec if shouldFallbackSpark34DeletionVectorScan(scan) =>
+      FallbackTags.add(scan, "fallback Spark 3.4 Delta DV scan")
+      scan
+    case scan: FileSourceScanExec if isDeltaScan(scan) =>
       DeltaScanTransformer(scan)
     case other => other
   }
+
+  private def isDeltaScan(scan: FileSourceScanExec): Boolean = {
+    isDeltaFileIndex(scan) || isDeltaParquetScan(scan)
+  }
+
+  private def isDeltaParquetScan(scan: FileSourceScanExec): Boolean = {
+    val fileFormatClass = scan.relation.fileFormat.getClass
+    fileFormatClass == classOf[DeltaParquetFileFormat] ||
+    fileFormatClass.getSimpleName == "GlutenDeltaParquetFileFormat"
+  }
+
+  private def isDeltaFileIndex(scan: FileSourceScanExec): Boolean = {
+    scan.relation.location.isInstanceOf[TahoeFileIndex] ||
+    scan.relation.location.isInstanceOf[PreparedDeltaFileIndex]
+  }
+
+  private def isDeltaLogScan(scan: FileSourceScanExec): Boolean = {
+    scan.relation.location.rootPaths.exists {
+      path =>
+        val root = path.toString
+        root.contains("/_delta_log") || root.contains("\\_delta_log") || root.endsWith("_delta_log")
+    }
+  }
+
+  private def shouldFallbackSpark34DeletionVectorScan(scan: FileSourceScanExec): Boolean = {
+    if (SparkVersionUtil.gteSpark35) {
+      return false
+    }
+
+    scan.relation.location match {
+      case preparedIndex: PreparedDeltaFileIndex =>
+        preparedIndex.preparedScan.files.exists(_.deletionVector != null)
+      case index: TahoeFileIndex =>
+        val snapshot = index.asInstanceOf[SnapshotDescriptor]
+        deletionVectorsReadable(snapshot.protocol, snapshot.metadata)
+      case _ =>
+        false
+    }
+  }
 }
diff --git a/gluten-delta/src/test/scala/org/apache/gluten/execution/DeltaSuite.scala b/gluten-delta/src/test/scala/org/apache/gluten/execution/DeltaSuite.scala
index 031bf460347..fda594ef84d 100644
--- a/gluten-delta/src/test/scala/org/apache/gluten/execution/DeltaSuite.scala
+++ b/gluten-delta/src/test/scala/org/apache/gluten/execution/DeltaSuite.scala
@@ -18,7 +18,10 @@ package org.apache.gluten.execution
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.FileSourceScanExec
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.types._
+import org.apache.spark.util.SparkVersionUtil
 
 import scala.collection.JavaConverters._
 
@@ -37,6 +40,7 @@ abstract class DeltaSuite extends WholeStageTransformerSuite {
       .set("spark.memory.offHeap.size", "2g")
       .set("spark.unsafe.exceptionOnMemoryLeak", "true")
       .set("spark.sql.autoBroadcastJoinThreshold", "-1")
+      .set("spark.sql.ansi.enabled", "false")
       .set("spark.sql.sources.useV1SourceList", "avro")
       .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
       .set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
@@ -209,16 +213,40 @@ abstract class DeltaSuite extends WholeStageTransformerSuite {
           s"ALTER TABLE delta.`$path` SET TBLPROPERTIES ('delta.enableDeletionVectors' = true)")
         checkAnswer(spark.read.format("delta").load(path), df1.union(df2))
         spark.sql(s"DELETE FROM delta.`$path` WHERE id IN (${values2.mkString(", ")})")
-        import org.apache.spark.sql.execution.GlutenImplicits._
         val df = spark.read.format("delta").load(path)
-        assert(
-          df.fallbackSummary.fallbackNodeToReason
-            .flatMap(_.values)
-            .exists(_.contains("Deletion vector is not supported in native")))
+        val executedPlan = df.queryExecution.executedPlan
+        if (SparkVersionUtil.gteSpark35) {
+          assert(executedPlan.collect { case _: DeltaScanTransformer => true }.nonEmpty)
+          val planText = executedPlan.toString()
+          assert(!planText.contains("__delta_internal_is_row_deleted"))
+          assert(!planText.contains("__delta_internal_row_index"))
+        } else {
+          assert(executedPlan.collect { case _: DeltaScanTransformer => true }.isEmpty)
+        }
         checkAnswer(df, df1)
     }
   }
 
+  testWithMinSparkVersion("delta: _delta_log scan should fallback", "3.4") {
+    withTempPath {
+      p =>
+        import testImplicits._
+        val path = p.getCanonicalPath
+        Seq((1, "a"), (2, "b")).toDF("id", "value").write.format("delta").save(path)
+
+        val deltaLogDf = spark.read.json(s"$path/_delta_log/*.json")
+        val executedPlan = deltaLogDf.queryExecution.executedPlan
+
+        assert(executedPlan.collect { case _: FileSourceScanExecTransformerBase => true }.isEmpty)
+        assert(executedPlan.collect { case _: BatchScanExecTransformerBase => true }.isEmpty)
+        assert(executedPlan.collect {
+          case _: FileSourceScanExec => true
+          case _: BatchScanExec => true
+        }.nonEmpty)
+        assert(deltaLogDf.count() > 0)
+    }
+  }
+
   testWithMinSparkVersion("delta: push down input_file_name expression", "3.2") {
     withTable("source_table") {
       withTable("target_table") {
@@ -320,13 +348,13 @@ abstract class DeltaSuite extends WholeStageTransformerSuite {
     withSQLConf("spark.gluten.sql.columnar.scanOnly" -> "true") {
       withTable("delta_pf") {
         spark.sql(s"""
-                     |create table test (id int, name string) using delta
+                     |create table delta_pf (id int, name string) using delta
                      |""".stripMargin)
         spark.sql(s"""
-                     |insert into test values (1, "v1"), (2, "v2"), (3, "v1"), (4, "v2")
+                     |insert into delta_pf values (1, "v1"), (2, "v2"), (3, "v1"), (4, "v2")
                      |""".stripMargin)
         runQueryAndCompare(
-          "select id from test where name > 'v1'",
+          "select id from delta_pf where name > 'v1'",
           compareResult = true,
           noFallBack = false) {
           df =>
diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/utils/DeltaDeletionVectorRegistry.scala b/gluten-substrait/src/main/scala/org/apache/gluten/utils/DeltaDeletionVectorRegistry.scala
new file mode 100644
index 00000000000..a6f9bcb9612
--- /dev/null
+++ b/gluten-substrait/src/main/scala/org/apache/gluten/utils/DeltaDeletionVectorRegistry.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.utils
+
+import java.util.UUID
+import java.util.concurrent.ConcurrentHashMap
+
+object DeltaDeletionVectorRegistry {
+  val RegistryIdProperty = "gluten.delta.dv.registry.id"
+
+  final case class Entry(cardinality: Long, filterType: String, payload: Array[Byte])
+    extends Serializable
+
+  private val registry = new ConcurrentHashMap[String, Map[String, Entry]]()
+
+  def register(entries: Map[String, Entry]): String = {
+    val id = UUID.randomUUID().toString
+    registry.put(id, entries)
+    id
+  }
+
+  def get(id: String): Option[Map[String, Entry]] = Option(registry.get(id))
+
+  def normalizePathKey(path: String): String = {
+    path.replace('\\', '/').stripSuffix("/")
+  }
+}
diff --git a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenFallbackReporter.scala b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenFallbackReporter.scala
index 0ef0b6a28c3..2b36cac94b7 100644
--- a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenFallbackReporter.scala
+++ b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenFallbackReporter.scala
@@ -39,6 +39,9 @@ case class GlutenFallbackReporter(glutenConf: GlutenConfig, spark: SparkSession)
     if (!glutenConf.enableFallbackReport) {
       return plan
     }
+    if (GlutenFallbackReporter.containsInternalDeltaLogScan(plan)) {
+      return plan
+    }
     printFallbackReason(plan)
     if (GlutenUIUtils.uiEnabled(spark.sparkContext)) {
       postFallbackReason(plan)
@@ -96,3 +99,20 @@ case class GlutenFallbackReporter(glutenConf: GlutenConfig, spark: SparkSession)
     GlutenUIUtils.postEvent(sc, event)
   }
 }
+
+object GlutenFallbackReporter {
+  private[execution] def containsInternalDeltaLogScan(plan: SparkPlan): Boolean = {
+    plan.exists {
+      case scan: FileSourceScanExec =>
+        scan.relation.location.rootPaths.exists {
+          path =>
+            val root = path.toString
+            root.contains("/_delta_log") ||
+            root.contains("\\_delta_log") ||
+            root.endsWith("_delta_log")
+        }
+      case _ =>
+        false
+    }
+  }
+}
diff --git a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenQueryExecutionListener.scala b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenQueryExecutionListener.scala
index 30aac6a8f38..f2529da82f1 100644
--- a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenQueryExecutionListener.scala
+++ b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenQueryExecutionListener.scala
@@ -22,6 +22,7 @@ import org.apache.gluten.events.GlutenPlanFallbackEvent
 import org.apache.spark.SparkContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
+import org.apache.spark.sql.catalyst.plans.logical.CommandResult
 import org.apache.spark.sql.execution.ui.{GlutenUIUtils, SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart}
 
 import scala.collection.mutable
@@ -79,6 +80,9 @@ class GlutenQueryExecutionListener(sc: SparkContext) extends SparkListener with
       if (!enabledAtStart) {
         return
       }
+      if (shouldSkipInternalDeltaLogQuery(qe)) {
+        return
+      }
 
       val summary =
         GlutenImplicits.collectQueryExecutionFallbackSummary(qe.sparkSession, qe)
@@ -107,6 +111,15 @@ class GlutenQueryExecutionListener(sc: SparkContext) extends SparkListener with
           e)
     }
   }
+
+  private def shouldSkipInternalDeltaLogQuery(qe: QueryExecution): Boolean = {
+    qe.commandExecuted.exists {
+      case r: CommandResult =>
+        GlutenFallbackReporter.containsInternalDeltaLogScan(r.commandPhysicalPlan)
+      case _ =>
+        false
+    } || GlutenFallbackReporter.containsInternalDeltaLogScan(qe.executedPlan)
+  }
 }
 
 object GlutenQueryExecutionListener {

From d8c3f61983fd95fa3d576b9399bec9f10c46b428 Mon Sep 17 00:00:00 2001
From: Mohammad Linjawi <Mohammad.Linjawi@ibm.com>
Date: Sun, 24 May 2026 18:09:31 +0300
Subject: [PATCH 2/9] [VL][Delta] Fix DV scan CI coverage

---
 .../DeltaDeletionVectorHandoffSuite.scala     | 12 ++++++----
 .../gluten/extension/OffloadDeltaScan.scala   | 24 +++++++++++++++++++
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala b/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
index 46f87765c17..0b0158a4ede 100644
--- a/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
+++ b/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
@@ -55,13 +55,15 @@ class DeltaDeletionVectorHandoffSuite
 
         spark.sql(
           s"ALTER TABLE delta.`$path` SET TBLPROPERTIES ('delta.enableDeletionVectors' = true)")
+        spark.sql(s"DELETE FROM delta.`$path` WHERE id IN (3, 4)")
 
-        withSQLConf(DeltaSQLConf.DELETION_VECTORS_USE_METADATA_ROW_INDEX.key -> "false") {
-          spark.sql(s"DELETE FROM delta.`$path` WHERE id IN (3, 4)")
-
-          val log = DeltaLog.forTable(spark, new Path(path))
-          assert(log.update().allFiles.collect().exists(_.deletionVector != null))
+        val log = DeltaLog.forTable(spark, new Path(path))
+        assert(log.update().allFiles.collect().exists(_.deletionVector != null))
 
+        // This covers scan behavior over an existing DV. Delta 4 may choose a non-DV DELETE
+        // path when metadata row indexes are disabled during DML, so keep DV creation on the
+        // default path and disable metadata-row-index only for the read.
+        withSQLConf(DeltaSQLConf.DELETION_VECTORS_USE_METADATA_ROW_INDEX.key -> "false") {
           val df = spark.read.format("delta").load(path)
           val executedPlan = df.queryExecution.executedPlan
           assert(executedPlan.collect { case _: DeltaScanTransformer => true }.nonEmpty)
diff --git a/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala b/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala
index be4fcbedb17..eff54a83e8d 100644
--- a/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala
+++ b/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.delta.DeltaParquetFileFormat
 import org.apache.spark.sql.delta.SnapshotDescriptor
 import org.apache.spark.sql.delta.commands.DeletionVectorUtils.deletionVectorsReadable
 import org.apache.spark.sql.delta.files.TahoeFileIndex
+import org.apache.spark.sql.delta.sources.DeltaSQLConf
 import org.apache.spark.sql.delta.stats.PreparedDeltaFileIndex
 import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
 import org.apache.spark.util.SparkVersionUtil
@@ -36,6 +37,10 @@ case class OffloadDeltaScan() extends OffloadSingleNode {
     case scan: FileSourceScanExec if shouldFallbackSpark34DeletionVectorScan(scan) =>
       FallbackTags.add(scan, "fallback Spark 3.4 Delta DV scan")
       scan
+    case scan: FileSourceScanExec if shouldFallbackSpark35DeletionVectorScanWithoutMetadataRowIndex(
+          scan) =>
+      FallbackTags.add(scan, "fallback Spark 3.5 Delta DV scan without metadata row index")
+      scan
     case scan: FileSourceScanExec if isDeltaScan(scan) =>
       DeltaScanTransformer(scan)
     case other => other
@@ -69,6 +74,25 @@ case class OffloadDeltaScan() extends OffloadSingleNode {
       return false
     }
 
+    containsDeletionVector(scan)
+  }
+
+  private def shouldFallbackSpark35DeletionVectorScanWithoutMetadataRowIndex(
+      scan: FileSourceScanExec): Boolean = {
+    if (!SparkVersionUtil.gteSpark35 || SparkVersionUtil.gteSpark40) {
+      return false
+    }
+
+    // Delta 3.3/Spark 3.5 DML tests force this path and rely on Spark's injected
+    // row-index filter column for correctness. Keep it on Spark until the native path can
+    // prove the same contract for UPDATE-generated DVs.
+    val useMetadataRowIndex =
+      scan.relation.sparkSession.sessionState.conf.getConf(
+        DeltaSQLConf.DELETION_VECTORS_USE_METADATA_ROW_INDEX)
+    !useMetadataRowIndex && containsDeletionVector(scan)
+  }
+
+  private def containsDeletionVector(scan: FileSourceScanExec): Boolean = {
     scan.relation.location match {
       case preparedIndex: PreparedDeltaFileIndex =>
         preparedIndex.preparedScan.files.exists(_.deletionVector != null)

From f30d47a2cdaa45ba060c477d37a192f324035096 Mon Sep 17 00:00:00 2001
From: Mohammad Linjawi <Mohammad.Linjawi@ibm.com>
Date: Sun, 24 May 2026 18:22:40 +0300
Subject: [PATCH 3/9] [VL][Delta] Fix Scala format

---
 .../scala/org/apache/gluten/extension/OffloadDeltaScan.scala   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala b/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala
index eff54a83e8d..141505f306c 100644
--- a/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala
+++ b/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala
@@ -37,7 +37,8 @@ case class OffloadDeltaScan() extends OffloadSingleNode {
     case scan: FileSourceScanExec if shouldFallbackSpark34DeletionVectorScan(scan) =>
       FallbackTags.add(scan, "fallback Spark 3.4 Delta DV scan")
       scan
-    case scan: FileSourceScanExec if shouldFallbackSpark35DeletionVectorScanWithoutMetadataRowIndex(
+    case scan: FileSourceScanExec
+        if shouldFallbackSpark35DeletionVectorScanWithoutMetadataRowIndex(
           scan) =>
       FallbackTags.add(scan, "fallback Spark 3.5 Delta DV scan without metadata row index")
       scan

From d142569a5bd6b50444e7d8098856b9f6932c0291 Mon Sep 17 00:00:00 2001
From: Mohammad Linjawi <Mohammad.Linjawi@ibm.com>
Date: Sun, 24 May 2026 23:53:08 +0300
Subject: [PATCH 4/9] [VL][Delta] Fix DV scan CI fallbacks

---
 .../apache/spark/sql/delta/DeltaSuite.scala   |  4 ++--
 .../DeltaDeletionVectorHandoffSuite.scala     |  9 ++++----
 .../gluten/extension/OffloadDeltaScan.scala   | 22 ++++++++++---------
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala b/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala
index 2ef0a9b4acf..a60054031c6 100644
--- a/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala
+++ b/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaSuite.scala
@@ -197,7 +197,7 @@ class DeltaSuite
     checkAnswer(data.toDF(), Row(1) :: Row(2) :: Row(3) :: Row(4) :: Row(5) :: Row(6) :: Nil)
   }
 
-  test("native DV scan when metadata row index is disabled") {
+  test("DV scan without metadata row index falls back and stays correct") {
     withTempDir {
       tempDir =>
         val path = tempDir.getCanonicalPath
@@ -219,7 +219,7 @@ class DeltaSuite
 
           val df = spark.read.format("delta").load(path)
           val executedPlan = df.queryExecution.executedPlan
-          assert(executedPlan.collect { case _: DeltaScanTransformer => true }.nonEmpty)
+          assert(executedPlan.collect { case _: DeltaScanTransformer => true }.isEmpty)
           checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
         }
     }
diff --git a/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala b/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
index 0b0158a4ede..e550df266cf 100644
--- a/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
+++ b/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
@@ -42,7 +42,7 @@ class DeltaDeletionVectorHandoffSuite
 
   import testImplicits._
 
-  test("Spark 4 Delta DV scan should stay native when metadata row index is disabled") {
+  test("Spark 4 Delta DV scan should fall back when metadata row index is disabled") {
     withTempDir {
       tempDir =>
         val path = tempDir.getCanonicalPath
@@ -60,13 +60,12 @@ class DeltaDeletionVectorHandoffSuite
         val log = DeltaLog.forTable(spark, new Path(path))
         assert(log.update().allFiles.collect().exists(_.deletionVector != null))
 
-        // This covers scan behavior over an existing DV. Delta 4 may choose a non-DV DELETE
-        // path when metadata row indexes are disabled during DML, so keep DV creation on the
-        // default path and disable metadata-row-index only for the read.
+        // This covers scan behavior over an existing DV. Keep the no-metadata-row-index
+        // path on Spark until the native path can prove the same contract for DML DVs.
         withSQLConf(DeltaSQLConf.DELETION_VECTORS_USE_METADATA_ROW_INDEX.key -> "false") {
           val df = spark.read.format("delta").load(path)
           val executedPlan = df.queryExecution.executedPlan
-          assert(executedPlan.collect { case _: DeltaScanTransformer => true }.nonEmpty)
+          assert(executedPlan.collect { case _: DeltaScanTransformer => true }.isEmpty)
           checkAnswer(df, Seq((1, "a"), (2, "b")).toDF())
         }
     }
diff --git a/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala b/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala
index 141505f306c..ebafb0c08c3 100644
--- a/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala
+++ b/gluten-delta/src/main/scala/org/apache/gluten/extension/OffloadDeltaScan.scala
@@ -24,12 +24,14 @@ import org.apache.spark.sql.delta.DeltaParquetFileFormat
 import org.apache.spark.sql.delta.SnapshotDescriptor
 import org.apache.spark.sql.delta.commands.DeletionVectorUtils.deletionVectorsReadable
 import org.apache.spark.sql.delta.files.TahoeFileIndex
-import org.apache.spark.sql.delta.sources.DeltaSQLConf
 import org.apache.spark.sql.delta.stats.PreparedDeltaFileIndex
 import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
 import org.apache.spark.util.SparkVersionUtil
 
 case class OffloadDeltaScan() extends OffloadSingleNode {
+  private val DeletionVectorsUseMetadataRowIndexKey =
+    "spark.databricks.delta.deletionVectors.useMetadataRowIndex"
+
   override def offload(plan: SparkPlan): SparkPlan = plan match {
     case scan: FileSourceScanExec if isDeltaLogScan(scan) =>
       FallbackTags.add(scan, "fallback Delta _delta_log scan")
@@ -38,9 +40,8 @@ case class OffloadDeltaScan() extends OffloadSingleNode {
       FallbackTags.add(scan, "fallback Spark 3.4 Delta DV scan")
       scan
     case scan: FileSourceScanExec
-        if shouldFallbackSpark35DeletionVectorScanWithoutMetadataRowIndex(
-          scan) =>
-      FallbackTags.add(scan, "fallback Spark 3.5 Delta DV scan without metadata row index")
+        if shouldFallbackDeletionVectorScanWithoutMetadataRowIndex(scan) =>
+      FallbackTags.add(scan, "fallback Delta DV scan without metadata row index")
       scan
     case scan: FileSourceScanExec if isDeltaScan(scan) =>
       DeltaScanTransformer(scan)
@@ -78,18 +79,19 @@ case class OffloadDeltaScan() extends OffloadSingleNode {
     containsDeletionVector(scan)
   }
 
-  private def shouldFallbackSpark35DeletionVectorScanWithoutMetadataRowIndex(
+  private def shouldFallbackDeletionVectorScanWithoutMetadataRowIndex(
       scan: FileSourceScanExec): Boolean = {
-    if (!SparkVersionUtil.gteSpark35 || SparkVersionUtil.gteSpark40) {
+    if (!SparkVersionUtil.gteSpark35) {
       return false
     }
 
-    // Delta 3.3/Spark 3.5 DML tests force this path and rely on Spark's injected
+    // Delta DML tests force this path and rely on Spark's injected
     // row-index filter column for correctness. Keep it on Spark until the native path can
-    // prove the same contract for UPDATE-generated DVs.
+    // prove the same contract for DML-generated DVs.
     val useMetadataRowIndex =
-      scan.relation.sparkSession.sessionState.conf.getConf(
-        DeltaSQLConf.DELETION_VECTORS_USE_METADATA_ROW_INDEX)
+      scan.relation.sparkSession.sessionState.conf
+        .getConfString(DeletionVectorsUseMetadataRowIndexKey, "true")
+        .toBoolean
     !useMetadataRowIndex && containsDeletionVector(scan)
   }
 

From c6f05f0c535d53005816019a5df38830cd90b71c Mon Sep 17 00:00:00 2001
From: Mohammad Linjawi <Mohammad.Linjawi@ibm.com>
Date: Tue, 26 May 2026 18:11:42 +0300
Subject: [PATCH 5/9] [VL][Delta] Simplify DV scan payload handoff

---
 .../backendsapi/velox/VeloxIteratorApi.scala  |  76 +----
 cpp/velox/compute/WholeStageResultIterator.cc |   8 +-
 .../execution/DeltaScanTransformer.scala      | 315 ------------------
 .../utils/DeltaDeletionVectorRegistry.scala   |  41 ---
 4 files changed, 3 insertions(+), 437 deletions(-)
 delete mode 100644 gluten-substrait/src/main/scala/org/apache/gluten/utils/DeltaDeletionVectorRegistry.scala

diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
index 719bb8758cd..bd2bb12dc9e 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
@@ -26,7 +26,6 @@ import org.apache.gluten.sql.shims.SparkShimLoader
 import org.apache.gluten.substrait.plan.PlanNode
 import org.apache.gluten.substrait.rel.{LocalFilesBuilder, LocalFilesNode, SplitInfo}
 import org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat
-import org.apache.gluten.utils.DeltaDeletionVectorRegistry
 import org.apache.gluten.vectorized._
 
 import org.apache.spark.{Partition, SparkConf, TaskContext}
@@ -41,17 +40,14 @@ import org.apache.spark.sql.utils.SparkInputMetricsUtil.InputMetricsWrapper
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.SparkDirectoryUtil
 
-import org.apache.hadoop.fs.Path
-
 import java.lang.{Long => JLong}
 import java.nio.ByteBuffer
 import java.nio.charset.StandardCharsets
 import java.time.ZoneOffset
-import java.util.{ArrayList => JArrayList, HashMap => JHashMap, UUID}
+import java.util.UUID
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.util.Try
 
 class VeloxIteratorApi extends IteratorApi with Logging {
   private val deltaMetadataUtilsClassName =
@@ -102,8 +98,7 @@ class VeloxIteratorApi extends IteratorApi with Logging {
       .map(
         f => SparkShimLoader.getSparkShims.generateMetadataColumns(f, metadataColumnNames).asJava)
     val (otherMetadataColumns, deletionVectorPayloads) =
-      normalizeRegisteredDeltaSplitMetadata(partitionFiles, properties)
-        .orElse(normalizeDeltaSplitMetadata(partitionSchema.fields.length, partitionFiles))
+      normalizeDeltaSplitMetadata(partitionSchema.fields.length, partitionFiles)
         .getOrElse {
           (
             partitionFiles.map {
@@ -247,73 +242,6 @@ class VeloxIteratorApi extends IteratorApi with Logging {
     }
   }
 
-  private def normalizeRegisteredDeltaSplitMetadata(
-      partitionFiles: Seq[PartitionedFile],
-      properties: Map[String, String])
-      : Option[(Seq[java.util.Map[String, Object]], Array[Array[Byte]])] = {
-    properties
-      .get(DeltaDeletionVectorRegistry.RegistryIdProperty)
-      .flatMap(DeltaDeletionVectorRegistry.get)
-      .flatMap {
-        registeredEntries =>
-          val normalizedMetadataColumns = new JArrayList[java.util.Map[String, Object]]()
-          val deletionVectorPayloads = mutable.ArrayBuffer.empty[Array[Byte]]
-          var matchedDeletionVectors = 0
-          partitionFiles.foreach {
-            file =>
-              val metadata = new JHashMap[String, Object]()
-              val baseMetadata =
-                SparkShimLoader.getSparkShims.getOtherConstantMetadataColumnValues(file)
-              if (baseMetadata != null) {
-                metadata.putAll(baseMetadata)
-              }
-              lookupRegisteredDeltaDeletionVector(file, registeredEntries).foreach {
-                entry =>
-                  metadata.put("delta_dv_cardinality", Long.box(entry.cardinality))
-                  metadata.put("row_index_filter_type", entry.filterType)
-                  metadata.put("delta_dv_payload_index", Int.box(deletionVectorPayloads.length))
-                  deletionVectorPayloads += entry.payload
-                  matchedDeletionVectors += 1
-              }
-              normalizedMetadataColumns.add(metadata)
-          }
-          if (matchedDeletionVectors == 0) {
-            None
-          } else {
-            Some((normalizedMetadataColumns.asScala.toSeq, deletionVectorPayloads.toArray))
-          }
-      }
-  }
-
-  private def lookupRegisteredDeltaDeletionVector(
-      file: PartitionedFile,
-      registeredEntries: Map[String, DeltaDeletionVectorRegistry.Entry])
-      : Option[DeltaDeletionVectorRegistry.Entry] = {
-    deltaDeletionVectorPathCandidates(file).iterator
-      .map(registeredEntries.get)
-      .collectFirst { case Some(entry) => entry }
-  }
-
-  private def deltaDeletionVectorPathCandidates(file: PartitionedFile): Seq[String] = {
-    val rawPath = unescapePathName(file.filePath.toString)
-    val path = new Path(rawPath)
-    val pathUri = partitionedFilePathUri(file)
-    Seq(
-      pathUri.map(_.toASCIIString),
-      pathUri.map(_.getPath),
-      Some(rawPath),
-      Some(path.toUri.toASCIIString),
-      Some(path.toUri.getPath),
-      Some(rawPath.stripPrefix("/"))
-    ).flatten
-      .map(DeltaDeletionVectorRegistry.normalizePathKey(_))
-      .filter(_.nonEmpty)
-      .distinct
-  }
-
-  private def partitionedFilePathUri(file: PartitionedFile): Option[java.net.URI] =
-    Try(file.getClass.getMethod("pathUri").invoke(file).asInstanceOf[java.net.URI]).toOption
-
   /** Generate Iterator[ColumnarBatch] for first stage. */
   override def genFirstStageIterator(
       inputPartition: BaseGlutenPartition,
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc
index 990451ec410..46ac12b841d 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -15,7 +15,6 @@
  * limitations under the License.
  */
 #include "WholeStageResultIterator.h"
-#include <algorithm>
 #include <optional>
 #include "VeloxBackend.h"
 #include "VeloxPlanConverter.h"
@@ -232,11 +231,6 @@ WholeStageResultIterator::WholeStageResultIterator(
     const auto& metadataColumns = scanInfo->metadataColumns;
     const auto scanNodeConnectorId = connectorIdForScanNode(veloxPlan_, scanNodeIds_[scanInfoIdx]);
     const bool isDeltaScan = scanNodeConnectorId == connectorIds_.delta || isDeltaScanInfo(scanInfo);
-    const auto deltaMetadataFiles = std::count_if(
-        metadataColumns.begin(), metadataColumns.end(), [](const auto& metadata) { return isDeltaMetadata(metadata); });
-    LOG(INFO) << "WholeStageResultIterator scanInfo[" << scanInfoIdx << "] nodeId=" << scanNodeIds_[scanInfoIdx]
-              << " files=" << paths.size() << " connectorId=" << scanNodeConnectorId << " isDeltaScan=" << isDeltaScan
-              << " deltaMetadataFiles=" << deltaMetadataFiles;
 #ifdef GLUTEN_ENABLE_GPU
     // Under the pre-condition that all the split infos has same partition column and format.
     const auto canUseCudfConnector = scanInfo->canUseCudfConnector();
@@ -290,7 +284,7 @@ WholeStageResultIterator::WholeStageResultIterator(
             metadataColumn,
             properties[idx]);
       } else {
-        auto connectorId = scanNodeConnectorId.empty() ? connectorIds_.hive : scanNodeConnectorId;
+        auto connectorId = connectorIds_.hive;
 #ifdef GLUTEN_ENABLE_GPU
         if (connectorId == connectorIds_.hive && canUseCudfConnector && enableCudf_ &&
             veloxCfg_->get<bool>(kCudfEnableTableScan, kCudfEnableTableScanDefault)) {
diff --git a/gluten-delta/src/main/scala/org/apache/gluten/execution/DeltaScanTransformer.scala b/gluten-delta/src/main/scala/org/apache/gluten/execution/DeltaScanTransformer.scala
index 0ed2f3a36e4..5f2a5300130 100644
--- a/gluten-delta/src/main/scala/org/apache/gluten/execution/DeltaScanTransformer.scala
+++ b/gluten-delta/src/main/scala/org/apache/gluten/execution/DeltaScanTransformer.scala
@@ -18,28 +18,16 @@ package org.apache.gluten.execution
 
 import org.apache.gluten.sql.shims.SparkShimLoader
 import org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat
-import org.apache.gluten.utils.DeltaDeletionVectorRegistry
 
-import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.connector.read.streaming.SparkDataStream
-import org.apache.spark.sql.delta.actions.AddFile
-import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor
-import org.apache.spark.sql.delta.deletionvectors.{RoaringBitmapArrayFormat, StoredBitmap}
-import org.apache.spark.sql.delta.stats.PreparedDeltaFileIndex
-import org.apache.spark.sql.delta.storage.dv.HadoopFileSystemDVStore
 import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.execution.datasources.HadoopFsRelation
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.collection.BitSet
 
-import org.apache.hadoop.fs.Path
-
-import scala.collection.mutable.ListBuffer
-import scala.util.control.NonFatal
-
 case class DeltaScanTransformer(
     @transient override val relation: HadoopFsRelation,
     @transient stream: Option[SparkDataStream],
@@ -67,32 +55,6 @@ case class DeltaScanTransformer(
 
   override lazy val fileFormat: ReadFileFormat = ReadFileFormat.ParquetReadFormat
 
-  private lazy val deltaDeletionVectorRegistration
-      : DeltaScanTransformer.DeletionVectorRegistration =
-    DeltaScanTransformer.registerDeletionVectorsFromFileFormat(relation)
-
-  private lazy val deltaDeletionVectorRegistryId: Option[String] =
-    deltaDeletionVectorRegistration.registryId
-
-  override protected def doValidateInternal(): ValidationResult = {
-    val validationResult = super.doValidateInternal()
-    if (!validationResult.ok()) {
-      return validationResult
-    }
-
-    if (!deltaDeletionVectorRegistration.isValid) {
-      return ValidationResult.failed(deltaDeletionVectorRegistration.failureReason)
-    }
-
-    ValidationResult.succeeded
-  }
-
-  override def getProperties: Map[String, String] = {
-    super.getProperties ++ deltaDeletionVectorRegistryId
-      .map(DeltaDeletionVectorRegistry.RegistryIdProperty -> _)
-      .toMap
-  }
-
   override def doCanonicalize(): DeltaScanTransformer = {
     DeltaScanTransformer(
       relation,
@@ -116,283 +78,6 @@ case class DeltaScanTransformer(
 }
 
 object DeltaScanTransformer {
-  private val IfContainedFilterType = "IF_CONTAINED"
-
-  private[execution] case class DeletionVectorRegistration(
-      attempted: Boolean,
-      deletionVectorCount: Int,
-      registryId: Option[String],
-      failures: Seq[String]) {
-    def isValid: Boolean = failures.isEmpty && (deletionVectorCount == 0 || registryId.nonEmpty)
-
-    def failureReason: String = {
-      val details =
-        if (failures.isEmpty) {
-          "no deletion vector payloads were registered"
-        } else {
-          failures.take(3).mkString("; ")
-        }
-      s"Unable to materialize Delta deletion vector payloads for native scan: $details"
-    }
-  }
-
-  private val NotAttemptedDeletionVectorRegistration =
-    DeletionVectorRegistration(
-      attempted = false,
-      deletionVectorCount = 0,
-      registryId = None,
-      failures = Nil)
-
-  private def registerDeletionVectorsFromFileFormat(
-      relation: HadoopFsRelation): DeletionVectorRegistration = {
-    val broadcastRegistration = registerDeletionVectorsFromBroadcastMap(relation)
-    if (broadcastRegistration.attempted) {
-      broadcastRegistration
-    } else {
-      registerDeletionVectorsFromPreparedScan(relation)
-    }
-  }
-
-  private def registerDeletionVectorsFromBroadcastMap(
-      relation: HadoopFsRelation): DeletionVectorRegistration = {
-    val format = relation.fileFormat
-    val broadcastDvMap: Option[scala.collection.Map[_, _]] =
-      try {
-        Option(format.getClass.getMethod("broadcastDvMap").invoke(format))
-          .collect { case o: Option[_] => o }
-          .flatten
-          .collect { case b: Broadcast[_] => b.value }
-          .collect { case m: scala.collection.Map[_, _] => m }
-      } catch {
-        case _: NoSuchMethodException =>
-          None
-        case NonFatal(e) =>
-          return DeletionVectorRegistration(
-            attempted = true,
-            deletionVectorCount = 1,
-            registryId = None,
-            failures =
-              Seq(s"failed to read Delta deletion vector broadcast map: ${errorMessage(e)}"))
-      }
-
-    val uriToDvDescriptor: Map[java.net.URI, Any] =
-      broadcastDvMap
-        .map {
-          _.asInstanceOf[scala.collection.Map[Any, Any]]
-            .collect { case (uri: java.net.URI, value) => uri -> value }
-            .toMap
-        }
-        .getOrElse(Map.empty[java.net.URI, Any])
-
-    if (uriToDvDescriptor.isEmpty) {
-      return NotAttemptedDeletionVectorRegistration
-    }
-
-    val tablePath = tablePathFromFileFormat(relation).orNull
-    if (tablePath == null) {
-      return DeletionVectorRegistration(
-        attempted = true,
-        deletionVectorCount = uriToDvDescriptor.size,
-        registryId = None,
-        failures = Seq("unable to resolve Delta table path for broadcast deletion vectors")
-      )
-    }
-
-    val dvStore = new HadoopFileSystemDVStore(relation.sparkSession.sessionState.newHadoopConf())
-    val registeredEntries = ListBuffer.empty[(String, DeltaDeletionVectorRegistry.Entry)]
-    val failures = ListBuffer.empty[String]
-    uriToDvDescriptor.foreach {
-      case (uri, dvDescriptorWithFilterType) =>
-        try {
-          val descriptor = dvDescriptorWithFilterType.getClass
-            .getMethod("descriptor")
-            .invoke(dvDescriptorWithFilterType)
-            .asInstanceOf[DeletionVectorDescriptor]
-          val filterType = dvDescriptorWithFilterType.getClass
-            .getMethod("filterType")
-            .invoke(dvDescriptorWithFilterType)
-            .toString
-          val payload = materializePayload(descriptor, tablePath, dvStore)
-          val aliases = pathAliases(uri)
-          if (aliases.isEmpty) {
-            failures += s"no file path aliases for deletion vector $uri"
-          } else {
-            registeredEntries ++= aliases.map {
-              _ -> DeltaDeletionVectorRegistry.Entry(descriptor.cardinality, filterType, payload)
-            }
-          }
-        } catch {
-          case NonFatal(e) =>
-            failures += s"$uri: ${errorMessage(e)}"
-        }
-    }
-
-    deletionVectorRegistration(
-      attempted = true,
-      deletionVectorCount = uriToDvDescriptor.size,
-      registeredEntries = registeredEntries.toSeq,
-      failures = failures.toSeq)
-  }
-
-  private def tablePathFromFileFormat(relation: HadoopFsRelation): Option[Path] = {
-    val tablePathFromFormat =
-      try {
-        Option(relation.fileFormat.getClass.getMethod("tablePath").invoke(relation.fileFormat))
-          .collect { case o: Option[_] => o }
-          .flatten
-          .map(_.toString)
-      } catch {
-        case _: NoSuchMethodException => None
-        case NonFatal(_) => None
-      }
-    tablePathFromFormat
-      .orElse(relation.location.rootPaths.headOption.map(_.toString))
-      .map(new Path(_))
-  }
-
-  private def materializePayload(
-      descriptor: DeletionVectorDescriptor,
-      tablePath: Path,
-      dvStore: HadoopFileSystemDVStore): Array[Byte] = {
-    StoredBitmap
-      .create(descriptor, tablePath)
-      .load(dvStore)
-      .serializeAsByteArray(RoaringBitmapArrayFormat.Portable)
-  }
-
-  private def deletionVectorRegistration(
-      attempted: Boolean,
-      deletionVectorCount: Int,
-      registeredEntries: Seq[(String, DeltaDeletionVectorRegistry.Entry)],
-      failures: Seq[String]): DeletionVectorRegistration = {
-    val registryId =
-      if (registeredEntries.isEmpty) {
-        None
-      } else {
-        Some(DeltaDeletionVectorRegistry.register(registeredEntries.toMap))
-      }
-    DeletionVectorRegistration(attempted, deletionVectorCount, registryId, failures)
-  }
-
-  private def registerDeletionVectorsFromPreparedScan(
-      relation: HadoopFsRelation): DeletionVectorRegistration = {
-    relation.location match {
-      case preparedIndex: PreparedDeltaFileIndex =>
-        val tablePath =
-          Option(preparedIndex.path)
-            .orElse(relation.location.rootPaths.headOption)
-            .orNull
-        if (tablePath == null) {
-          return DeletionVectorRegistration(
-            attempted = true,
-            deletionVectorCount = preparedIndex.preparedScan.files.count(_.deletionVector != null),
-            registryId = None,
-            failures = Seq("unable to resolve Delta table path for prepared deletion vector scan")
-          )
-        }
-
-        val dvStore =
-          new HadoopFileSystemDVStore(relation.sparkSession.sessionState.newHadoopConf())
-        val preparedFiles = preparedIndex.preparedScan.files
-        registerDeletionVectorsFromAddFiles(preparedFiles.iterator, tablePath, dvStore)
-      case _ =>
-        NotAttemptedDeletionVectorRegistration
-    }
-  }
-
-  private def registerDeletionVectorsFromAddFiles(
-      files: Iterator[AddFile],
-      tablePath: Path,
-      dvStore: HadoopFileSystemDVStore): DeletionVectorRegistration = {
-    val registeredEntries = ListBuffer.empty[(String, DeltaDeletionVectorRegistry.Entry)]
-    val failures = ListBuffer.empty[String]
-    var deletionVectorCount = 0
-
-    files.foreach {
-      addFile =>
-        Option(addFile.deletionVector).foreach {
-          descriptor =>
-            deletionVectorCount += 1
-            try {
-              val payload = materializePayload(descriptor, tablePath, dvStore)
-              val absolutePath = new Path(tablePath, addFile.path)
-              val aliases = pathAliases(absolutePath.toUri, absolutePath.toString)
-              if (aliases.isEmpty) {
-                failures += s"no file path aliases for deletion vector ${addFile.path}"
-              } else {
-                registeredEntries ++= aliases.map {
-                  _ -> DeltaDeletionVectorRegistry.Entry(
-                    descriptor.cardinality,
-                    IfContainedFilterType,
-                    payload)
-                }
-              }
-            } catch {
-              case NonFatal(e) =>
-                failures += s"${addFile.path}: ${errorMessage(e)}"
-            }
-        }
-    }
-
-    deletionVectorRegistration(
-      attempted = deletionVectorCount > 0,
-      deletionVectorCount = deletionVectorCount,
-      registeredEntries = registeredEntries.toSeq,
-      failures = failures.toSeq)
-  }
-
-  private def errorMessage(error: Throwable): String = {
-    val message = Option(error.getMessage).filter(_.nonEmpty).getOrElse(error.toString)
-    s"${error.getClass.getSimpleName}: $message"
-  }
-
-  private def pathAliases(uri: java.net.URI, extraAliases: String*): Seq[String] = {
-    val decodedExtraAliases = extraAliases.map(percentUnescapePathName)
-    (Seq(uri.toASCIIString, uri.getPath, Option(uri.getPath).map(_.stripPrefix("/")).orNull) ++
-      extraAliases ++
-      decodedExtraAliases ++
-      extraAliases.map(_.stripPrefix("/")) ++
-      decodedExtraAliases.map(_.stripPrefix("/")))
-      .filter(_ != null)
-      .map(DeltaDeletionVectorRegistry.normalizePathKey)
-      .filter(_.nonEmpty)
-      .distinct
-  }
-
-  private def percentUnescapePathName(path: String): String = {
-    if (path == null || path.isEmpty) {
-      return path
-    }
-    var plaintextEndIdx = path.indexOf('%')
-    val length = path.length
-    if (plaintextEndIdx == -1 || plaintextEndIdx + 2 >= length) {
-      path
-    } else {
-      val sb = new java.lang.StringBuilder(length)
-      var plaintextStartIdx = 0
-      while (plaintextEndIdx != -1 && plaintextEndIdx + 2 < length) {
-        if (plaintextEndIdx > plaintextStartIdx) sb.append(path, plaintextStartIdx, plaintextEndIdx)
-        if (
-          java.lang.Character.digit(path.charAt(plaintextEndIdx + 1), 16) != -1 &&
-          java.lang.Character.digit(path.charAt(plaintextEndIdx + 2), 16) != -1
-        ) {
-          sb.append(
-            ((java.lang.Character.digit(path.charAt(plaintextEndIdx + 1), 16) << 4) |
-              java.lang.Character.digit(path.charAt(plaintextEndIdx + 2), 16)).toChar)
-          plaintextStartIdx = plaintextEndIdx + 3
-        } else {
-          sb.append('%')
-          plaintextStartIdx = plaintextEndIdx + 1
-        }
-        plaintextEndIdx = path.indexOf('%', plaintextStartIdx)
-      }
-      if (plaintextStartIdx < length) {
-        sb.append(path, plaintextStartIdx, length)
-      }
-      sb.toString
-    }
-  }
-
   def apply(scanExec: FileSourceScanExec): DeltaScanTransformer = {
     new DeltaScanTransformer(
       scanExec.relation,
diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/utils/DeltaDeletionVectorRegistry.scala b/gluten-substrait/src/main/scala/org/apache/gluten/utils/DeltaDeletionVectorRegistry.scala
deleted file mode 100644
index a6f9bcb9612..00000000000
--- a/gluten-substrait/src/main/scala/org/apache/gluten/utils/DeltaDeletionVectorRegistry.scala
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.gluten.utils
-
-import java.util.UUID
-import java.util.concurrent.ConcurrentHashMap
-
-object DeltaDeletionVectorRegistry {
-  val RegistryIdProperty = "gluten.delta.dv.registry.id"
-
-  final case class Entry(cardinality: Long, filterType: String, payload: Array[Byte])
-    extends Serializable
-
-  private val registry = new ConcurrentHashMap[String, Map[String, Entry]]()
-
-  def register(entries: Map[String, Entry]): String = {
-    val id = UUID.randomUUID().toString
-    registry.put(id, entries)
-    id
-  }
-
-  def get(id: String): Option[Map[String, Entry]] = Option(registry.get(id))
-
-  def normalizePathKey(path: String): String = {
-    path.replace('\\', '/').stripSuffix("/")
-  }
-}

From fa2833bf6471092fa630924e17e847177af8cab3 Mon Sep 17 00:00:00 2001
From: Mohammad Linjawi <Mohammad.Linjawi@ibm.com>
Date: Wed, 27 May 2026 15:39:17 +0300
Subject: [PATCH 6/9] [VL][Delta] Type DV scan split metadata

---
 .../velox/VeloxDeltaMetadataUtils.scala       |  53 +++++---
 .../sql/delta/stats/PrepareDeltaScan.scala    |  10 +-
 .../DeltaDeletionVectorHandoffSuite.scala     |  15 ++-
 .../velox/VeloxDeltaMetadataUtils.scala       |  53 +++++---
 .../DeltaDeletionVectorHandoffSuite.scala     |  15 ++-
 .../backendsapi/velox/VeloxIteratorApi.scala  |  42 +++++-
 cpp/velox/compute/VeloxPlanConverter.cc       |  65 ++++++---
 cpp/velox/compute/WholeStageResultIterator.cc |  58 +++-----
 cpp/velox/compute/delta/DeltaSplitInfo.h      |  42 ++++++
 cpp/velox/substrait/SubstraitToVeloxPlan.cc   |   9 +-
 cpp/velox/substrait/SubstraitToVeloxPlan.h    |   3 -
 .../vectorized/ColumnarBatchOutIterator.java  |  10 +-
 .../vectorized/NativePlanEvaluator.java       |   4 +-
 .../substrait/rel/DeltaLocalFilesBuilder.java |  56 ++++++++
 .../substrait/rel/DeltaLocalFilesNode.java    | 127 ++++++++++++++++++
 .../substrait/proto/substrait/algebra.proto   |  13 ++
 16 files changed, 454 insertions(+), 121 deletions(-)
 create mode 100644 cpp/velox/compute/delta/DeltaSplitInfo.h
 create mode 100644 gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesBuilder.java
 create mode 100644 gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesNode.java

diff --git a/backends-velox/src-delta33/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala b/backends-velox/src-delta33/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
index 699fe2d61b0..bd13f89681f 100644
--- a/backends-velox/src-delta33/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
+++ b/backends-velox/src-delta33/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
@@ -18,6 +18,7 @@ package org.apache.gluten.backendsapi.velox
 
 import org.apache.gluten.backendsapi.velox.VeloxIteratorApi.unescapePathName
 import org.apache.gluten.sql.shims.SparkShimLoader
+import org.apache.gluten.substrait.rel.DeltaLocalFilesNode.{DeltaFileReadOptions, RowIndexFilterType}
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor
@@ -34,15 +35,14 @@ import scala.util.Try
 import scala.util.control.NonFatal
 
 object VeloxDeltaMetadataUtils {
-  val DeltaDvCardinality = "delta_dv_cardinality"
-  val DeltaDvPayloadIndex = "delta_dv_payload_index"
-
   private val RowIndexFilterIdEncoded = "row_index_filter_id_encoded"
-  private val RowIndexFilterType = "row_index_filter_type"
+  private val RowIndexFilterTypeKey = "row_index_filter_type"
   private val RowIndexFilterTypeIfContained = "IF_CONTAINED"
+  private val RowIndexFilterTypeIfNotContained = "IF_NOT_CONTAINED"
 
   final class NormalizedSplitMetadata(
       val otherMetadataColumns: JList[JMap[String, Object]],
+      val deltaReadOptions: JList[DeltaFileReadOptions],
       val deletionVectorPayloads: Array[Array[Byte]])
     extends Serializable
 
@@ -68,26 +68,31 @@ object VeloxDeltaMetadataUtils {
       .serializeAsByteArray(RoaringBitmapArrayFormat.Portable)
   }
 
-  private def normalizeMetadataWithDescriptor(
-      metadata: JMap[String, Object],
-      descriptor: DeletionVectorDescriptor): JMap[String, Object] = {
+  private def normalizeMetadata(metadata: JMap[String, Object]): JMap[String, Object] = {
     val normalized = new JHashMap[String, Object]()
     if (metadata != null) {
       normalized.putAll(metadata)
     }
-    normalized.put(DeltaDvCardinality, Long.box(descriptor.cardinality))
     normalized.remove(RowIndexFilterIdEncoded)
-    if (!normalized.containsKey(RowIndexFilterType)) {
-      normalized.put(RowIndexFilterType, RowIndexFilterTypeIfContained)
-    }
+    normalized.remove(RowIndexFilterTypeKey)
     normalized
   }
 
+  private def parseRowIndexFilterType(
+      metadata: JMap[String, Object]): RowIndexFilterType = {
+    Option(metadata.get(RowIndexFilterTypeKey)).map(_.toString) match {
+      case Some(RowIndexFilterTypeIfContained) => RowIndexFilterType.IF_CONTAINED
+      case Some(RowIndexFilterTypeIfNotContained) => RowIndexFilterType.IF_NOT_CONTAINED
+      case _ => RowIndexFilterType.KEEP_ALL
+    }
+  }
+
   def normalizeSplitMetadata(
       partitionColumnCount: Int,
       files: JList[PartitionedFile]): NormalizedSplitMetadata = {
     val dvStore = new HadoopFileSystemDVStore(activeSpark.sessionState.newHadoopConf())
     val normalizedMetadataColumns = new JArrayList[JMap[String, Object]](files.size())
+    val deltaReadOptions = new JArrayList[DeltaFileReadOptions](files.size())
     val deletionVectorPayloads = scala.collection.mutable.ArrayBuffer.empty[Array[Byte]]
 
     files.asScala.foreach {
@@ -100,21 +105,37 @@ object VeloxDeltaMetadataUtils {
         }
 
         val descriptor = decodeDescriptor(metadataWithDecodedPayload)
+        val rowIndexFilterType = parseRowIndexFilterType(metadataWithDecodedPayload)
+        val normalizedMetadata = normalizeMetadata(metadataWithDecodedPayload)
 
         descriptor match {
           case Some(descriptor) =>
-            val normalized = normalizeMetadataWithDescriptor(metadataWithDecodedPayload, descriptor)
             val payloadTablePath = resolveTablePath(partitionColumnCount, file)
             val serializedPayload = serializePayload(dvStore, payloadTablePath, descriptor)
-            normalized.put(DeltaDvPayloadIndex, Int.box(deletionVectorPayloads.length))
+            deltaReadOptions.add(
+              new DeltaFileReadOptions(
+                rowIndexFilterType,
+                true,
+                descriptor.cardinality,
+                deletionVectorPayloads.length))
             deletionVectorPayloads += serializedPayload
-            normalizedMetadataColumns.add(normalized)
+            normalizedMetadataColumns.add(normalizedMetadata)
           case None =>
-            normalizedMetadataColumns.add(metadataWithDecodedPayload)
+            deltaReadOptions.add(
+              new DeltaFileReadOptions(rowIndexFilterType, false, 0L, -1))
+            normalizedMetadataColumns.add(normalizedMetadata)
         }
     }
 
-    new NormalizedSplitMetadata(normalizedMetadataColumns, deletionVectorPayloads.toArray)
+    val deltaOptions = if (deletionVectorPayloads.nonEmpty) {
+      deltaReadOptions
+    } else {
+      new JArrayList[DeltaFileReadOptions]()
+    }
+    new NormalizedSplitMetadata(
+      normalizedMetadataColumns,
+      deltaOptions,
+      deletionVectorPayloads.toArray)
   }
 
   private def activeSpark: SparkSession = {
diff --git a/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala b/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala
index 37a46147432..326df10bae3 100644
--- a/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala
+++ b/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala
@@ -22,7 +22,15 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.delta.{DeltaTable, OptimisticTransaction, PreprocessTableWithDVs}
 import org.apache.spark.sql.delta.sources.DeltaSQLConf
 
-/** Shadow Delta's PrepareDeltaScan to inject backend-specific DV preprocessing. */
+/**
+ * Delta 3.3 compatibility shim around Delta's PrepareDeltaScan.
+ *
+ * This preserves Delta's normal scan preparation first, including stats skipping, metadata-query
+ * optimization, and transaction read tracking. After that, Gluten runs DV preprocessing so the scan
+ * exposes Delta's internal DV row-deleted column and row-index metadata. The native physical rule
+ * later strips Spark's synthetic DV predicate once the materialized DV payload is attached to the
+ * native split, so Velox applies the DV filter exactly once.
+ */
 class PrepareDeltaScan(protected val spark: SparkSession)
   extends Rule[LogicalPlan]
   with PrepareDeltaScanBase
diff --git a/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala b/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
index 9a73b97d573..dbd226814cf 100644
--- a/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
+++ b/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
@@ -17,7 +17,7 @@
 package org.apache.spark.sql.delta
 
 import org.apache.gluten.backendsapi.velox.VeloxDeltaMetadataUtils
-import org.apache.gluten.backendsapi.velox.VeloxDeltaMetadataUtils.{DeltaDvCardinality, DeltaDvPayloadIndex}
+import org.apache.gluten.substrait.rel.DeltaLocalFilesNode.{RowIndexFilterType => GlutenRowIndexFilterType}
 
 import org.apache.spark.paths.SparkPath
 import org.apache.spark.sql.QueryTest
@@ -76,12 +76,16 @@ class DeltaDeletionVectorHandoffSuite
           partitionColumnCount = 0,
           files = Seq(partitionedFile).asJava)
         val metadata = normalized.otherMetadataColumns.get(0)
+        val deltaReadOptions = normalized.deltaReadOptions.get(0)
 
         assert(normalized.deletionVectorPayloads.length == 1)
         assert(normalized.deletionVectorPayloads.head.nonEmpty)
-        assert(metadata.get(DeltaDvPayloadIndex) == Int.box(0))
-        assert(metadata.get(DeltaDvCardinality) == Long.box(dataFile.deletionVector.cardinality))
+        assert(deltaReadOptions.hasDeletionVector())
+        assert(deltaReadOptions.deletionVectorPayloadIndex() == 0)
+        assert(deltaReadOptions.deletionVectorCardinality() == dataFile.deletionVector.cardinality)
+        assert(deltaReadOptions.rowIndexFilterType() == GlutenRowIndexFilterType.IF_CONTAINED)
         assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_ID_ENCODED))
+        assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_TYPE))
     }
   }
 
@@ -117,8 +121,9 @@ class DeltaDeletionVectorHandoffSuite
         val metadata = normalized.otherMetadataColumns.get(0)
 
         assert(normalized.deletionVectorPayloads.isEmpty)
-        assert(!metadata.containsKey(DeltaDvPayloadIndex))
-        assert(!metadata.containsKey(DeltaDvCardinality))
+        assert(normalized.deltaReadOptions.isEmpty)
+        assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_ID_ENCODED))
+        assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_TYPE))
     }
   }
 }
diff --git a/backends-velox/src-delta40/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala b/backends-velox/src-delta40/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
index 335b88e7fd7..00df6e602fb 100644
--- a/backends-velox/src-delta40/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
+++ b/backends-velox/src-delta40/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
@@ -18,6 +18,7 @@ package org.apache.gluten.backendsapi.velox
 
 import org.apache.gluten.backendsapi.velox.VeloxIteratorApi.unescapePathName
 import org.apache.gluten.sql.shims.SparkShimLoader
+import org.apache.gluten.substrait.rel.DeltaLocalFilesNode.{DeltaFileReadOptions, RowIndexFilterType}
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor
@@ -34,15 +35,14 @@ import scala.util.Try
 import scala.util.control.NonFatal
 
 object VeloxDeltaMetadataUtils {
-  val DeltaDvCardinality = "delta_dv_cardinality"
-  val DeltaDvPayloadIndex = "delta_dv_payload_index"
-
   private val RowIndexFilterIdEncoded = "row_index_filter_id_encoded"
-  private val RowIndexFilterType = "row_index_filter_type"
+  private val RowIndexFilterTypeKey = "row_index_filter_type"
   private val RowIndexFilterTypeIfContained = "IF_CONTAINED"
+  private val RowIndexFilterTypeIfNotContained = "IF_NOT_CONTAINED"
 
   final class NormalizedSplitMetadata(
       val otherMetadataColumns: JList[JMap[String, Object]],
+      val deltaReadOptions: JList[DeltaFileReadOptions],
       val deletionVectorPayloads: Array[Array[Byte]])
     extends Serializable
 
@@ -83,26 +83,31 @@ object VeloxDeltaMetadataUtils {
       .serializeAsByteArray(RoaringBitmapArrayFormat.Portable)
   }
 
-  private def normalizeMetadataWithDescriptor(
-      metadata: JMap[String, Object],
-      descriptor: DeletionVectorDescriptor): JMap[String, Object] = {
+  private def normalizeMetadata(metadata: JMap[String, Object]): JMap[String, Object] = {
     val normalized = new JHashMap[String, Object]()
     if (metadata != null) {
       normalized.putAll(metadata)
     }
-    normalized.put(DeltaDvCardinality, Long.box(descriptor.cardinality))
     normalized.remove(RowIndexFilterIdEncoded)
-    if (!normalized.containsKey(RowIndexFilterType)) {
-      normalized.put(RowIndexFilterType, RowIndexFilterTypeIfContained)
-    }
+    normalized.remove(RowIndexFilterTypeKey)
     normalized
   }
 
+  private def parseRowIndexFilterType(
+      metadata: JMap[String, Object]): RowIndexFilterType = {
+    Option(metadata.get(RowIndexFilterTypeKey)).map(_.toString) match {
+      case Some(RowIndexFilterTypeIfContained) => RowIndexFilterType.IF_CONTAINED
+      case Some(RowIndexFilterTypeIfNotContained) => RowIndexFilterType.IF_NOT_CONTAINED
+      case _ => RowIndexFilterType.KEEP_ALL
+    }
+  }
+
   def normalizeSplitMetadata(
       partitionColumnCount: Int,
       files: JList[PartitionedFile]): NormalizedSplitMetadata = {
     val dvStore = new HadoopFileSystemDVStore(activeSpark.sessionState.newHadoopConf())
     val normalizedMetadataColumns = new JArrayList[JMap[String, Object]](files.size())
+    val deltaReadOptions = new JArrayList[DeltaFileReadOptions](files.size())
     val deletionVectorPayloads = scala.collection.mutable.ArrayBuffer.empty[Array[Byte]]
 
     files.asScala.foreach {
@@ -115,21 +120,37 @@ object VeloxDeltaMetadataUtils {
         }
 
         val descriptor = decodeDescriptor(metadataWithDecodedPayload)
+        val rowIndexFilterType = parseRowIndexFilterType(metadataWithDecodedPayload)
+        val normalizedMetadata = normalizeMetadata(metadataWithDecodedPayload)
 
         descriptor match {
           case Some(descriptor) =>
-            val normalized = normalizeMetadataWithDescriptor(metadataWithDecodedPayload, descriptor)
             val payloadTablePath = resolveTablePath(partitionColumnCount, file)
             val serializedPayload = serializePayload(dvStore, payloadTablePath, descriptor)
-            normalized.put(DeltaDvPayloadIndex, Int.box(deletionVectorPayloads.length))
+            deltaReadOptions.add(
+              new DeltaFileReadOptions(
+                rowIndexFilterType,
+                true,
+                descriptor.cardinality,
+                deletionVectorPayloads.length))
             deletionVectorPayloads += serializedPayload
-            normalizedMetadataColumns.add(normalized)
+            normalizedMetadataColumns.add(normalizedMetadata)
           case None =>
-            normalizedMetadataColumns.add(metadataWithDecodedPayload)
+            deltaReadOptions.add(
+              new DeltaFileReadOptions(rowIndexFilterType, false, 0L, -1))
+            normalizedMetadataColumns.add(normalizedMetadata)
         }
     }
 
-    new NormalizedSplitMetadata(normalizedMetadataColumns, deletionVectorPayloads.toArray)
+    val deltaOptions = if (deletionVectorPayloads.nonEmpty) {
+      deltaReadOptions
+    } else {
+      new JArrayList[DeltaFileReadOptions]()
+    }
+    new NormalizedSplitMetadata(
+      normalizedMetadataColumns,
+      deltaOptions,
+      deletionVectorPayloads.toArray)
   }
 
   private def activeSpark: SparkSession = {
diff --git a/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala b/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
index e550df266cf..061619252a1 100644
--- a/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
+++ b/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
@@ -17,8 +17,8 @@
 package org.apache.spark.sql.delta
 
 import org.apache.gluten.backendsapi.velox.VeloxDeltaMetadataUtils
-import org.apache.gluten.backendsapi.velox.VeloxDeltaMetadataUtils.{DeltaDvCardinality, DeltaDvPayloadIndex}
 import org.apache.gluten.execution.DeltaScanTransformer
+import org.apache.gluten.substrait.rel.DeltaLocalFilesNode.{RowIndexFilterType => GlutenRowIndexFilterType}
 
 import org.apache.spark.paths.SparkPath
 import org.apache.spark.sql.QueryTest
@@ -107,12 +107,16 @@ class DeltaDeletionVectorHandoffSuite
           partitionColumnCount = 0,
           files = Seq(partitionedFile).asJava)
         val metadata = normalized.otherMetadataColumns.get(0)
+        val deltaReadOptions = normalized.deltaReadOptions.get(0)
 
         assert(normalized.deletionVectorPayloads.length == 1)
         assert(normalized.deletionVectorPayloads.head.nonEmpty)
-        assert(metadata.get(DeltaDvPayloadIndex) == Int.box(0))
-        assert(metadata.get(DeltaDvCardinality) == Long.box(dataFile.deletionVector.cardinality))
+        assert(deltaReadOptions.hasDeletionVector())
+        assert(deltaReadOptions.deletionVectorPayloadIndex() == 0)
+        assert(deltaReadOptions.deletionVectorCardinality() == dataFile.deletionVector.cardinality)
+        assert(deltaReadOptions.rowIndexFilterType() == GlutenRowIndexFilterType.IF_CONTAINED)
         assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_ID_ENCODED))
+        assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_TYPE))
 
         val df = spark.read.format("delta").load(path)
         checkAnswer(df, Seq((1, "a"), (2, "b")).toDF())
@@ -151,8 +155,9 @@ class DeltaDeletionVectorHandoffSuite
         val metadata = normalized.otherMetadataColumns.get(0)
 
         assert(normalized.deletionVectorPayloads.isEmpty)
-        assert(!metadata.containsKey(DeltaDvPayloadIndex))
-        assert(!metadata.containsKey(DeltaDvCardinality))
+        assert(normalized.deltaReadOptions.isEmpty)
+        assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_ID_ENCODED))
+        assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_TYPE))
     }
   }
 }
diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
index bd2bb12dc9e..4245c4baf88 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
@@ -24,7 +24,8 @@ import org.apache.gluten.iterator.Iterators
 import org.apache.gluten.metrics.{IMetrics, IteratorMetricsJniWrapper}
 import org.apache.gluten.sql.shims.SparkShimLoader
 import org.apache.gluten.substrait.plan.PlanNode
-import org.apache.gluten.substrait.rel.{LocalFilesBuilder, LocalFilesNode, SplitInfo}
+import org.apache.gluten.substrait.rel.{DeltaLocalFilesBuilder, LocalFilesBuilder, LocalFilesNode, SplitInfo}
+import org.apache.gluten.substrait.rel.DeltaLocalFilesNode.DeltaFileReadOptions
 import org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat
 import org.apache.gluten.vectorized._
 
@@ -50,6 +51,9 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 class VeloxIteratorApi extends IteratorApi with Logging {
+  private type NormalizedDeltaSplitMetadata =
+    (Seq[java.util.Map[String, Object]], Seq[DeltaFileReadOptions], Array[Array[Byte]])
+
   private val deltaMetadataUtilsClassName =
     "org.apache.gluten.backendsapi.velox.VeloxDeltaMetadataUtils$"
 
@@ -97,17 +101,34 @@ class VeloxIteratorApi extends IteratorApi with Logging {
     val metadataColumns = partitionFiles
       .map(
         f => SparkShimLoader.getSparkShims.generateMetadataColumns(f, metadataColumnNames).asJava)
-    val (otherMetadataColumns, deletionVectorPayloads) =
+    val (otherMetadataColumns, deltaReadOptions, deletionVectorPayloads) =
       normalizeDeltaSplitMetadata(partitionSchema.fields.length, partitionFiles)
         .getOrElse {
           (
             partitionFiles.map {
               f => SparkShimLoader.getSparkShims.getOtherConstantMetadataColumnValues(f)
             },
+            Seq.empty[DeltaFileReadOptions],
             Array.empty[Array[Byte]])
         }
 
-    val localFiles = setFileSchemaForLocalFiles(
+    val localFilesNode = if (deltaReadOptions.nonEmpty) {
+      DeltaLocalFilesBuilder.makeDeltaLocalFiles(
+        partitionIndex,
+        paths.asJava,
+        starts.asJava,
+        lengths.asJava,
+        fileSizes.asJava,
+        modificationTimes.asJava,
+        partitionColumns.map(_.asJava).asJava,
+        metadataColumns.asJava,
+        fileFormat,
+        locations.toList.asJava,
+        mapAsJavaMap(properties),
+        otherMetadataColumns.asJava,
+        deltaReadOptions.asJava
+      )
+    } else {
       LocalFilesBuilder.makeLocalFiles(
         partitionIndex,
         paths.asJava,
@@ -121,7 +142,11 @@ class VeloxIteratorApi extends IteratorApi with Logging {
         locations.toList.asJava,
         mapAsJavaMap(properties),
         otherMetadataColumns.asJava
-      ),
+      )
+    }
+
+    val localFiles = setFileSchemaForLocalFiles(
+      localFilesNode,
       dataSchema,
       fileFormat
     )
@@ -215,8 +240,7 @@ class VeloxIteratorApi extends IteratorApi with Logging {
 
   private def normalizeDeltaSplitMetadata(
       partitionColumnCount: Int,
-      partitionFiles: Seq[PartitionedFile])
-      : Option[(Seq[java.util.Map[String, Object]], Array[Array[Byte]])] = {
+      partitionFiles: Seq[PartitionedFile]): Option[NormalizedDeltaSplitMetadata] = {
     try {
       // scalastyle:off classforname
       val moduleClass = Class.forName(deltaMetadataUtilsClassName)
@@ -227,6 +251,7 @@ class VeloxIteratorApi extends IteratorApi with Logging {
       val normalized =
         normalizeMethod.invoke(module, Int.box(partitionColumnCount), partitionFiles.asJava)
       val metadataMethod = normalized.getClass.getMethod("otherMetadataColumns")
+      val deltaOptionsMethod = normalized.getClass.getMethod("deltaReadOptions")
       val payloadsMethod = normalized.getClass.getMethod("deletionVectorPayloads")
       Some(
         metadataMethod
@@ -234,6 +259,11 @@ class VeloxIteratorApi extends IteratorApi with Logging {
           .asInstanceOf[java.util.List[java.util.Map[String, Object]]]
           .asScala
           .toSeq,
+        deltaOptionsMethod
+          .invoke(normalized)
+          .asInstanceOf[java.util.List[DeltaFileReadOptions]]
+          .asScala
+          .toSeq,
         payloadsMethod.invoke(normalized).asInstanceOf[Array[Array[Byte]]]
       )
     } catch {
diff --git a/cpp/velox/compute/VeloxPlanConverter.cc b/cpp/velox/compute/VeloxPlanConverter.cc
index fe442046908..c4a4e725014 100644
--- a/cpp/velox/compute/VeloxPlanConverter.cc
+++ b/cpp/velox/compute/VeloxPlanConverter.cc
@@ -22,6 +22,7 @@
 #include <google/protobuf/any.pb.h>
 #include <google/protobuf/wrappers.pb.h>
 #include "config/GlutenConfig.h"
+#include "delta/DeltaSplitInfo.h"
 #include "iceberg/IcebergPlanConverter.h"
 #include "operators/plannodes/IteratorSplit.h"
 
@@ -51,8 +52,6 @@ VeloxPlanConverter::VeloxPlanConverter(
 }
 
 namespace {
-const std::string kDeltaDvPayloadIndex = "delta_dv_payload_index";
-
 std::optional<std::string> unpackMetadataValue(const google::protobuf::Any& value) {
   google::protobuf::BytesValue bytesValue;
   if (value.UnpackTo(&bytesValue)) {
@@ -82,6 +81,50 @@ std::optional<std::string> unpackMetadataValue(const google::protobuf::Any& valu
   return std::nullopt;
 }
 
+delta::DeltaRowIndexFilterType parseDeltaRowIndexFilterType(int filterType) {
+  switch (filterType) {
+    case 1:
+      return delta::DeltaRowIndexFilterType::kIfContained;
+    case 2:
+      return delta::DeltaRowIndexFilterType::kIfNotContained;
+    case 0:
+    default:
+      return delta::DeltaRowIndexFilterType::kKeepAll;
+  }
+}
+
+std::shared_ptr<DeltaSplitInfo> parseDeltaSplitInfo(
+    const substrait::ReadRel_LocalFiles_FileOrFiles& file,
+    std::shared_ptr<SplitInfo> splitInfo,
+    const std::vector<SplitPayloadBufferView>* splitPayloads) {
+  auto deltaSplitInfo = std::dynamic_pointer_cast<DeltaSplitInfo>(splitInfo)
+      ? std::dynamic_pointer_cast<DeltaSplitInfo>(splitInfo)
+      : std::make_shared<DeltaSplitInfo>(*splitInfo);
+
+  deltaSplitInfo->format = dwio::common::FileFormat::PARQUET;
+  const auto& deltaReadOptions = file.delta();
+  deltaSplitInfo->rowIndexFilterTypes.emplace_back(
+      parseDeltaRowIndexFilterType(deltaReadOptions.row_index_filter_type()));
+
+  if (!deltaReadOptions.has_deletion_vector()) {
+    deltaSplitInfo->deletionVectors.emplace_back(std::nullopt);
+    return deltaSplitInfo;
+  }
+
+  VELOX_USER_CHECK_NOT_NULL(splitPayloads, "Delta split has a deletion vector without an external payload buffer");
+  const auto payloadIndex = static_cast<size_t>(deltaReadOptions.deletion_vector_payload_index());
+  VELOX_USER_CHECK_LT(
+      payloadIndex,
+      splitPayloads->size(),
+      "Delta deletion vector payload index {} is out of range for {} payload buffers",
+      payloadIndex,
+      splitPayloads->size());
+  const auto cardinality = static_cast<uint64_t>(deltaReadOptions.deletion_vector_cardinality());
+  deltaSplitInfo->deletionVectors.emplace_back(
+      delta::DeltaDeletionVectorDescriptor::serialized(cardinality, splitPayloads->at(payloadIndex)));
+  return deltaSplitInfo;
+}
+
 std::shared_ptr<SplitInfo> parseScanSplitInfo(
     const facebook::velox::config::ConfigBase* veloxCfg,
     const google::protobuf::RepeatedPtrField<substrait::ReadRel_LocalFiles_FileOrFiles>& fileList,
@@ -96,7 +139,6 @@ std::shared_ptr<SplitInfo> parseScanSplitInfo(
   splitInfo->partitionColumns.reserve(fileList.size());
   splitInfo->properties.reserve(fileList.size());
   splitInfo->metadataColumns.reserve(fileList.size());
-  splitInfo->deletionVectorPayloads.reserve(fileList.size());
   for (const auto& file : fileList) {
     // Expect all Partitions share the same index.
     splitInfo->partitionIndex = file.partition_index();
@@ -116,20 +158,6 @@ std::shared_ptr<SplitInfo> parseScanSplitInfo(
         metadataColumnMap[otherMetadataColumn.key()] = std::move(*unpackedValue);
       }
     }
-    if (auto payloadIndexIt = metadataColumnMap.find(kDeltaDvPayloadIndex); payloadIndexIt != metadataColumnMap.end()) {
-      VELOX_USER_CHECK_NOT_NULL(splitPayloads, "Split payload index found without an external payload buffer");
-      const auto payloadIndex = static_cast<size_t>(std::stoul(payloadIndexIt->second));
-      VELOX_USER_CHECK_LT(
-          payloadIndex,
-          splitPayloads->size(),
-          "Split payload index {} is out of range for {} payload buffers",
-          payloadIndex,
-          splitPayloads->size());
-      splitInfo->deletionVectorPayloads.emplace_back(splitPayloads->at(payloadIndex));
-      metadataColumnMap.erase(payloadIndexIt);
-    } else {
-      splitInfo->deletionVectorPayloads.emplace_back(std::nullopt);
-    }
     splitInfo->metadataColumns.emplace_back(metadataColumnMap);
 
     splitInfo->paths.emplace_back(file.uri_file());
@@ -158,6 +186,9 @@ std::shared_ptr<SplitInfo> parseScanSplitInfo(
       case SubstraitFileFormatCase::kIceberg:
         splitInfo = IcebergPlanConverter::parseIcebergSplitInfo(file, std::move(splitInfo));
         break;
+      case SubstraitFileFormatCase::kDelta:
+        splitInfo = parseDeltaSplitInfo(file, std::move(splitInfo), splitPayloads);
+        break;
       default:
         splitInfo->format = dwio::common::FileFormat::UNKNOWN;
         break;
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc
index c5ac67cf44b..8499d56b352 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -21,6 +21,7 @@
 #include "VeloxRuntime.h"
 #include "compute/delta/DeltaConnector.h"
 #include "compute/delta/DeltaSplit.h"
+#include "compute/delta/DeltaSplitInfo.h"
 #include "config/VeloxConfig.h"
 #include "utils/ConfigExtractor.h"
 #include "velox/connectors/hive/HiveConfig.h"
@@ -74,13 +75,10 @@ const std::string kWriteIOTime = "writeIOWallNanos";
 const std::string kHiveDefaultPartition = "__HIVE_DEFAULT_PARTITION__";
 const std::string kDeltaTableFormat = "delta";
 const std::string kTableFormatKey = "table_format";
-const std::string kDeltaDvCardinality = "delta_dv_cardinality";
-const std::string kRowIndexFilterType = "row_index_filter_type";
 
 bool isDeltaMetadata(const std::unordered_map<std::string, std::string>& metadata) {
   auto tableFormatIt = metadata.find(kTableFormatKey);
-  return (tableFormatIt != metadata.end() && tableFormatIt->second == kDeltaTableFormat) ||
-      metadata.find(kDeltaDvCardinality) != metadata.end() || metadata.find(kRowIndexFilterType) != metadata.end();
+  return tableFormatIt != metadata.end() && tableFormatIt->second == kDeltaTableFormat;
 }
 
 bool isDeltaScanInfo(const std::shared_ptr<SplitInfo>& splitInfo) {
@@ -121,42 +119,6 @@ std::string connectorIdForScanNode(
   return tableScanNode->tableHandle()->connectorId();
 }
 
-std::optional<uint64_t> getOptionalUint64(
-    const std::unordered_map<std::string, std::string>& metadata,
-    const std::string& key) {
-  auto it = metadata.find(key);
-  if (it == metadata.end() || it->second.empty()) {
-    return std::nullopt;
-  }
-  return static_cast<uint64_t>(std::stoull(it->second));
-}
-
-std::optional<gluten::delta::DeltaDeletionVectorDescriptor> parseDeltaDeletionVector(
-    const std::unordered_map<std::string, std::string>& metadata,
-    std::optional<SplitPayloadBufferView> serializedPayloadView) {
-  if (!serializedPayloadView.has_value()) {
-    return std::nullopt;
-  }
-
-  const auto cardinality = getOptionalUint64(metadata, kDeltaDvCardinality);
-  return gluten::delta::DeltaDeletionVectorDescriptor::serialized(cardinality, serializedPayloadView);
-}
-
-gluten::delta::DeltaRowIndexFilterType parseDeltaRowIndexFilterType(
-    const std::unordered_map<std::string, std::string>& metadata) {
-  auto it = metadata.find(kRowIndexFilterType);
-  if (it == metadata.end()) {
-    return gluten::delta::DeltaRowIndexFilterType::kKeepAll;
-  }
-  if (it->second == "IF_CONTAINED") {
-    return gluten::delta::DeltaRowIndexFilterType::kIfContained;
-  }
-  if (it->second == "IF_NOT_CONTAINED") {
-    return gluten::delta::DeltaRowIndexFilterType::kIfNotContained;
-  }
-  return gluten::delta::DeltaRowIndexFilterType::kKeepAll;
-}
-
 } // namespace
 
 WholeStageResultIterator::WholeStageResultIterator(
@@ -233,7 +195,9 @@ WholeStageResultIterator::WholeStageResultIterator(
     const auto& partitionColumns = scanInfo->partitionColumns;
     const auto& metadataColumns = scanInfo->metadataColumns;
     const auto scanNodeConnectorId = connectorIdForScanNode(veloxPlan_, scanNodeIds_[scanInfoIdx]);
-    const bool isDeltaScan = scanNodeConnectorId == connectorIds_.delta || isDeltaScanInfo(scanInfo);
+    const auto deltaSplitInfo = std::dynamic_pointer_cast<DeltaSplitInfo>(scanInfo);
+    const bool isDeltaScan =
+        scanNodeConnectorId == connectorIds_.delta || deltaSplitInfo != nullptr || isDeltaScanInfo(scanInfo);
 #ifdef GLUTEN_ENABLE_GPU
     // Under the pre-condition that all the split infos has same partition column and format.
     const auto canUseCudfConnector = scanInfo->canUseCudfConnector();
@@ -269,6 +233,14 @@ WholeStageResultIterator::WholeStageResultIterator(
             properties[idx]);
       } else if (isDeltaScan) {
         std::unordered_map<std::string, std::string> customSplitInfo{{"table_format", kDeltaTableFormat}};
+        std::optional<gluten::delta::DeltaDeletionVectorDescriptor> deletionVector = std::nullopt;
+        auto rowIndexFilterType = gluten::delta::DeltaRowIndexFilterType::kKeepAll;
+        if (deltaSplitInfo != nullptr) {
+          VELOX_USER_CHECK_LT(idx, deltaSplitInfo->deletionVectors.size());
+          VELOX_USER_CHECK_LT(idx, deltaSplitInfo->rowIndexFilterTypes.size());
+          deletionVector = deltaSplitInfo->deletionVectors[idx];
+          rowIndexFilterType = deltaSplitInfo->rowIndexFilterTypes[idx];
+        }
         split = std::make_shared<gluten::delta::HiveDeltaSplit>(
             connectorIds_.delta,
             paths[idx],
@@ -281,9 +253,9 @@ WholeStageResultIterator::WholeStageResultIterator(
             nullptr,
             std::unordered_map<std::string, std::string>(),
             true,
-            parseDeltaDeletionVector(metadataColumn, scanInfo->deletionVectorPayloads[idx]),
+            deletionVector,
             std::nullopt,
-            parseDeltaRowIndexFilterType(metadataColumn),
+            rowIndexFilterType,
             metadataColumn,
             properties[idx]);
       } else {
diff --git a/cpp/velox/compute/delta/DeltaSplitInfo.h b/cpp/velox/compute/delta/DeltaSplitInfo.h
new file mode 100644
index 00000000000..623202412ce
--- /dev/null
+++ b/cpp/velox/compute/delta/DeltaSplitInfo.h
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <optional>
+#include <vector>
+
+#include "compute/delta/DeltaSplit.h"
+#include "substrait/SubstraitToVeloxPlan.h"
+
+namespace gluten {
+
+struct DeltaSplitInfo : SplitInfo {
+  std::vector<std::optional<delta::DeltaDeletionVectorDescriptor>> deletionVectors;
+  std::vector<delta::DeltaRowIndexFilterType> rowIndexFilterTypes;
+
+  DeltaSplitInfo(const SplitInfo& splitInfo) : SplitInfo(splitInfo) {
+    deletionVectors.reserve(splitInfo.paths.capacity());
+    rowIndexFilterTypes.reserve(splitInfo.paths.capacity());
+
+    const auto previousFileCount = splitInfo.paths.empty() ? 0 : splitInfo.paths.size() - 1;
+    deletionVectors.resize(previousFileCount, std::nullopt);
+    rowIndexFilterTypes.resize(previousFileCount, delta::DeltaRowIndexFilterType::kKeepAll);
+  }
+};
+
+} // namespace gluten
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index 6aabd1076aa..7d577c12a63 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -20,6 +20,7 @@
 #include "TypeUtils.h"
 #include "VariantToVectorConverter.h"
 #include "compute/delta/DeltaConnector.h"
+#include "compute/delta/DeltaSplitInfo.h"
 #include "jni/JniHashTable.h"
 #include "operators/hashjoin/HashTableBuilder.h"
 #include "operators/plannodes/RowVectorStream.h"
@@ -49,8 +50,6 @@ namespace {
 
 const std::string kDeltaTableFormat = "delta";
 const std::string kTableFormatKey = "table_format";
-const std::string kDeltaDvCardinality = "delta_dv_cardinality";
-const std::string kRowIndexFilterType = "row_index_filter_type";
 
 bool useCudfTableHandle(const std::vector<std::shared_ptr<SplitInfo>>& splitInfos) {
 #ifdef GLUTEN_ENABLE_GPU
@@ -65,11 +64,13 @@ bool useCudfTableHandle(const std::vector<std::shared_ptr<SplitInfo>>& splitInfo
 
 bool isDeltaMetadata(const std::unordered_map<std::string, std::string>& metadata) {
   auto tableFormatIt = metadata.find(kTableFormatKey);
-  return (tableFormatIt != metadata.end() && tableFormatIt->second == kDeltaTableFormat) ||
-      metadata.find(kDeltaDvCardinality) != metadata.end() || metadata.find(kRowIndexFilterType) != metadata.end();
+  return tableFormatIt != metadata.end() && tableFormatIt->second == kDeltaTableFormat;
 }
 
 bool isDeltaSplitInfo(const std::shared_ptr<SplitInfo>& splitInfo) {
+  if (std::dynamic_pointer_cast<DeltaSplitInfo>(splitInfo) != nullptr) {
+    return true;
+  }
   for (const auto& metadata : splitInfo->metadataColumns) {
     if (isDeltaMetadata(metadata)) {
       return true;
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.h b/cpp/velox/substrait/SubstraitToVeloxPlan.h
index f8dad3ed158..65a6f8e0872 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.h
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.h
@@ -51,9 +51,6 @@ struct SplitInfo {
   /// The metadata columns associated with partitioned table.
   std::vector<std::unordered_map<std::string, std::string>> metadataColumns;
 
-  /// Optional externally provided deletion vector payloads aligned with metadataColumns.
-  std::vector<std::optional<SplitPayloadBufferView>> deletionVectorPayloads;
-
   /// The file paths to be scanned.
   std::vector<std::string> paths;
 
diff --git a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
index 3734222a08f..9c682fd7bcc 100644
--- a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
+++ b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
@@ -26,22 +26,26 @@
 import org.apache.spark.sql.vectorized.ColumnarBatch;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 
 public class ColumnarBatchOutIterator extends ClosableIterator<ColumnarBatch>
     implements RuntimeAware {
   private final Runtime runtime;
   private final long iterHandle;
-  private final Object retainedReference;
+  // Keeps Java-owned direct buffers reachable while Velox holds raw native views into them.
+  @SuppressWarnings("unused")
+  private final ByteBuffer[][] retainedSplitPayloadBuffers;
 
   public ColumnarBatchOutIterator(Runtime runtime, long iterHandle) {
     this(runtime, iterHandle, null);
   }
 
-  public ColumnarBatchOutIterator(Runtime runtime, long iterHandle, Object retainedReference) {
+  public ColumnarBatchOutIterator(
+      Runtime runtime, long iterHandle, ByteBuffer[][] retainedSplitPayloadBuffers) {
     super();
     this.runtime = runtime;
     this.iterHandle = iterHandle;
-    this.retainedReference = retainedReference;
+    this.retainedSplitPayloadBuffers = retainedSplitPayloadBuffers;
   }
 
   @Override
diff --git a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java
index d94409c4777..1d025ba11cf 100644
--- a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java
+++ b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java
@@ -125,7 +125,7 @@ public long spill(MemoryTarget self, Spiller.Phase phase, long size) {
   }
 
   private ColumnarBatchOutIterator createOutIterator(
-      Runtime runtime, long itrHandle, Object retainedReference) {
-    return new ColumnarBatchOutIterator(runtime, itrHandle, retainedReference);
+      Runtime runtime, long itrHandle, ByteBuffer[][] retainedSplitPayloadBuffers) {
+    return new ColumnarBatchOutIterator(runtime, itrHandle, retainedSplitPayloadBuffers);
   }
 }
diff --git a/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesBuilder.java b/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesBuilder.java
new file mode 100644
index 00000000000..fc75285eddb
--- /dev/null
+++ b/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesBuilder.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.substrait.rel;
+
+import org.apache.gluten.substrait.rel.DeltaLocalFilesNode.DeltaFileReadOptions;
+
+import java.util.List;
+import java.util.Map;
+
+public class DeltaLocalFilesBuilder {
+  private DeltaLocalFilesBuilder() {}
+
+  public static DeltaLocalFilesNode makeDeltaLocalFiles(
+      Integer index,
+      List<String> paths,
+      List<Long> starts,
+      List<Long> lengths,
+      List<Long> fileSizes,
+      List<Long> modificationTimes,
+      List<Map<String, String>> partitionColumns,
+      List<Map<String, String>> metadataColumns,
+      LocalFilesNode.ReadFileFormat fileFormat,
+      List<String> preferredLocations,
+      Map<String, String> properties,
+      List<Map<String, Object>> otherMetadataColumns,
+      List<DeltaFileReadOptions> deltaReadOptions) {
+    return new DeltaLocalFilesNode(
+        index,
+        paths,
+        starts,
+        lengths,
+        fileSizes,
+        modificationTimes,
+        partitionColumns,
+        metadataColumns,
+        fileFormat,
+        preferredLocations,
+        properties,
+        otherMetadataColumns,
+        deltaReadOptions);
+  }
+}
diff --git a/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesNode.java b/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesNode.java
new file mode 100644
index 00000000000..47952593c3d
--- /dev/null
+++ b/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesNode.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.substrait.rel;
+
+import io.substrait.proto.ReadRel;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+public class DeltaLocalFilesNode extends LocalFilesNode {
+  private final List<DeltaFileReadOptions> deltaReadOptions = new ArrayList<>();
+
+  DeltaLocalFilesNode(
+      Integer index,
+      List<String> paths,
+      List<Long> starts,
+      List<Long> lengths,
+      List<Long> fileSizes,
+      List<Long> modificationTimes,
+      List<Map<String, String>> partitionColumns,
+      List<Map<String, String>> metadataColumns,
+      ReadFileFormat fileFormat,
+      List<String> preferredLocations,
+      Map<String, String> properties,
+      List<Map<String, Object>> otherMetadataColumns,
+      List<DeltaFileReadOptions> deltaReadOptions) {
+    super(
+        index,
+        paths,
+        starts,
+        lengths,
+        fileSizes,
+        modificationTimes,
+        partitionColumns,
+        metadataColumns,
+        fileFormat,
+        preferredLocations,
+        properties,
+        otherMetadataColumns);
+    this.deltaReadOptions.addAll(deltaReadOptions);
+  }
+
+  @Override
+  protected void processFileBuilder(ReadRel.LocalFiles.FileOrFiles.Builder fileBuilder, int index) {
+    DeltaFileReadOptions options = deltaReadOptions.get(index);
+    ReadRel.LocalFiles.FileOrFiles.DeltaReadOptions.Builder deltaBuilder =
+        ReadRel.LocalFiles.FileOrFiles.DeltaReadOptions.newBuilder()
+            .setRowIndexFilterType(toProtoRowIndexFilterType(options.rowIndexFilterType()))
+            .setHasDeletionVector(options.hasDeletionVector());
+
+    if (options.hasDeletionVector()) {
+      deltaBuilder
+          .setDeletionVectorCardinality(options.deletionVectorCardinality())
+          .setDeletionVectorPayloadIndex(options.deletionVectorPayloadIndex());
+    }
+
+    fileBuilder.setDelta(deltaBuilder.build());
+  }
+
+  private static ReadRel.LocalFiles.FileOrFiles.DeltaReadOptions.RowIndexFilterType
+      toProtoRowIndexFilterType(RowIndexFilterType rowIndexFilterType) {
+    switch (rowIndexFilterType) {
+      case IF_CONTAINED:
+        return ReadRel.LocalFiles.FileOrFiles.DeltaReadOptions.RowIndexFilterType.IF_CONTAINED;
+      case IF_NOT_CONTAINED:
+        return ReadRel.LocalFiles.FileOrFiles.DeltaReadOptions.RowIndexFilterType.IF_NOT_CONTAINED;
+      case KEEP_ALL:
+      default:
+        return ReadRel.LocalFiles.FileOrFiles.DeltaReadOptions.RowIndexFilterType.KEEP_ALL;
+    }
+  }
+
+  public enum RowIndexFilterType {
+    KEEP_ALL,
+    IF_CONTAINED,
+    IF_NOT_CONTAINED
+  }
+
+  public static class DeltaFileReadOptions {
+    private final RowIndexFilterType rowIndexFilterType;
+    private final boolean hasDeletionVector;
+    private final long deletionVectorCardinality;
+    private final int deletionVectorPayloadIndex;
+
+    public DeltaFileReadOptions(
+        RowIndexFilterType rowIndexFilterType,
+        boolean hasDeletionVector,
+        long deletionVectorCardinality,
+        int deletionVectorPayloadIndex) {
+      this.rowIndexFilterType = rowIndexFilterType;
+      this.hasDeletionVector = hasDeletionVector;
+      this.deletionVectorCardinality = deletionVectorCardinality;
+      this.deletionVectorPayloadIndex = deletionVectorPayloadIndex;
+    }
+
+    public RowIndexFilterType rowIndexFilterType() {
+      return rowIndexFilterType;
+    }
+
+    public boolean hasDeletionVector() {
+      return hasDeletionVector;
+    }
+
+    public long deletionVectorCardinality() {
+      return deletionVectorCardinality;
+    }
+
+    public int deletionVectorPayloadIndex() {
+      return deletionVectorPayloadIndex;
+    }
+  }
+}
diff --git a/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto b/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto
index 2bfb68e0979..a74f41a21eb 100644
--- a/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto
+++ b/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto
@@ -197,6 +197,18 @@ message ReadRel {
         repeated DeleteFile delete_files = 3;
       }
 
+      message DeltaReadOptions {
+        enum RowIndexFilterType {
+          KEEP_ALL = 0;
+          IF_CONTAINED = 1;
+          IF_NOT_CONTAINED = 2;
+        }
+        RowIndexFilterType row_index_filter_type = 1;
+        bool has_deletion_vector = 2;
+        uint64 deletion_vector_cardinality = 3;
+        uint32 deletion_vector_payload_index = 4;
+      }
+
       // File reading options
       oneof file_format {
         ParquetReadOptions parquet = 9;
@@ -207,6 +219,7 @@ message ReadRel {
         TextReadOptions text = 14;
         JsonReadOptions json = 15;
         IcebergReadOptions iceberg = 16;
+        DeltaReadOptions delta = 22;
       }
 
       message partitionColumn {

From 32af86b037c66e0742fff7a5902202ad79be15f6 Mon Sep 17 00:00:00 2001
From: Mohammad Linjawi <Mohammad.Linjawi@ibm.com>
Date: Wed, 27 May 2026 17:11:40 +0300
Subject: [PATCH 7/9] [VL][Delta] Fix DV split metadata serialization

---
 .../apache/gluten/vectorized/ColumnarBatchOutIterator.java   | 1 +
 .../org/apache/gluten/substrait/rel/DeltaLocalFilesNode.java | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
index 9c682fd7bcc..cdcca05a8d2 100644
--- a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
+++ b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
@@ -32,6 +32,7 @@ public class ColumnarBatchOutIterator extends ClosableIterator<ColumnarBatch>
     implements RuntimeAware {
   private final Runtime runtime;
   private final long iterHandle;
+
   // Keeps Java-owned direct buffers reachable while Velox holds raw native views into them.
   @SuppressWarnings("unused")
   private final ByteBuffer[][] retainedSplitPayloadBuffers;
diff --git a/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesNode.java b/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesNode.java
index 47952593c3d..4fcca1fde17 100644
--- a/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesNode.java
+++ b/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesNode.java
@@ -18,6 +18,7 @@
 
 import io.substrait.proto.ReadRel;
 
+import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
@@ -91,7 +92,9 @@ public enum RowIndexFilterType {
     IF_NOT_CONTAINED
   }
 
-  public static class DeltaFileReadOptions {
+  public static class DeltaFileReadOptions implements Serializable {
+    private static final long serialVersionUID = 1L;
+
     private final RowIndexFilterType rowIndexFilterType;
     private final boolean hasDeletionVector;
     private final long deletionVectorCardinality;

From 5e5aadf6a5b90d205cc5d38db0516eb45fff6a66 Mon Sep 17 00:00:00 2001
From: Mohammad Linjawi <Mohammad.Linjawi@ibm.com>
Date: Thu, 28 May 2026 16:08:31 +0300
Subject: [PATCH 8/9] [VL][Delta] Embed DV payload in scan read options

---
 .../component/VeloxDeltaComponent.scala       | 25 +++++++
 .../velox/VeloxDeltaMetadataUtils.scala       | 18 ++---
 ...eprocessDeltaScanWithDeletionVectors.scala | 36 ++++++++++
 .../sql/delta/stats/PrepareDeltaScan.scala    | 66 -------------------
 .../DeltaDeletionVectorHandoffSuite.scala     |  5 +-
 .../velox/VeloxDeltaMetadataUtils.scala       | 18 ++---
 .../DeltaDeletionVectorHandoffSuite.scala     |  5 +-
 .../backendsapi/velox/VeloxIteratorApi.scala  | 40 ++---------
 .../velox/VeloxSplitInfoWithPayloads.scala    | 31 ---------
 cpp/core/compute/Runtime.h                    |  2 -
 cpp/core/jni/JniWrapper.cc                    | 25 -------
 cpp/velox/compute/VeloxPlanConverter.cc       | 44 ++++++-------
 cpp/velox/compute/VeloxPlanConverter.h        |  3 +-
 cpp/velox/compute/VeloxRuntime.cc             | 11 +---
 cpp/velox/compute/VeloxRuntime.h              |  3 -
 cpp/velox/compute/delta/DeltaSplitInfo.h      |  4 ++
 .../vectorized/ColumnarBatchOutIterator.java  | 11 ----
 .../vectorized/NativePlanEvaluator.java       | 21 +-----
 .../vectorized/PlanEvaluatorJniWrapper.java   |  3 -
 .../substrait/rel/DeltaLocalFilesNode.java    | 14 ++--
 .../substrait/proto/substrait/algebra.proto   |  2 +-
 21 files changed, 121 insertions(+), 266 deletions(-)
 create mode 100644 backends-velox/src-delta33/main/scala/org/apache/gluten/extension/PreprocessDeltaScanWithDeletionVectors.scala
 delete mode 100644 backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala
 delete mode 100644 backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSplitInfoWithPayloads.scala

diff --git a/backends-velox/src-delta/main/scala/org/apache/gluten/component/VeloxDeltaComponent.scala b/backends-velox/src-delta/main/scala/org/apache/gluten/component/VeloxDeltaComponent.scala
index 0587e8b07f7..4c6254d273d 100644
--- a/backends-velox/src-delta/main/scala/org/apache/gluten/component/VeloxDeltaComponent.scala
+++ b/backends-velox/src-delta/main/scala/org/apache/gluten/component/VeloxDeltaComponent.scala
@@ -23,9 +23,15 @@ import org.apache.gluten.extension.columnar.heuristic.HeuristicTransform
 import org.apache.gluten.extension.columnar.validator.Validators
 import org.apache.gluten.extension.injector.Injector
 
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.util.SparkReflectionUtil
 
 class VeloxDeltaComponent extends Component {
+  private val deltaDvPreprocessRuleClassName =
+    "org.apache.gluten.extension.PreprocessDeltaScanWithDeletionVectors"
+
   override def name(): String = "velox-delta"
 
   override def dependencies(): Seq[Class[_ <: Component]] = classOf[VeloxBackend] :: Nil
@@ -36,6 +42,7 @@ class VeloxDeltaComponent extends Component {
 
   override def injectRules(injector: Injector): Unit = {
     val legacy = injector.gluten.legacy
+    injector.spark.injectOptimizerRule(deltaDvPreprocessRule)
     legacy.injectTransform {
       c =>
         val offload = Seq(OffloadDeltaScan(), OffloadDeltaProject(), OffloadDeltaFilter())
@@ -46,4 +53,22 @@ class VeloxDeltaComponent extends Component {
     }
     DeltaPostTransformRules.rules.foreach(r => legacy.injectPostTransform(_ => r))
   }
+
+  private def deltaDvPreprocessRule(spark: SparkSession): Rule[LogicalPlan] = {
+    if (!SparkReflectionUtil.isClassPresent(deltaDvPreprocessRuleClassName)) {
+      return VeloxDeltaComponent.IdentityRule
+    }
+
+    Class
+      .forName(deltaDvPreprocessRuleClassName)
+      .getConstructor(classOf[SparkSession])
+      .newInstance(spark)
+      .asInstanceOf[Rule[LogicalPlan]]
+  }
+}
+
+object VeloxDeltaComponent {
+  private object IdentityRule extends Rule[LogicalPlan] {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan
+  }
 }
diff --git a/backends-velox/src-delta33/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala b/backends-velox/src-delta33/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
index bd13f89681f..d1733beb117 100644
--- a/backends-velox/src-delta33/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
+++ b/backends-velox/src-delta33/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
@@ -42,8 +42,7 @@ object VeloxDeltaMetadataUtils {
 
   final class NormalizedSplitMetadata(
       val otherMetadataColumns: JList[JMap[String, Object]],
-      val deltaReadOptions: JList[DeltaFileReadOptions],
-      val deletionVectorPayloads: Array[Array[Byte]])
+      val deltaReadOptions: JList[DeltaFileReadOptions])
     extends Serializable
 
   private def decodeDescriptor(
@@ -93,7 +92,7 @@ object VeloxDeltaMetadataUtils {
     val dvStore = new HadoopFileSystemDVStore(activeSpark.sessionState.newHadoopConf())
     val normalizedMetadataColumns = new JArrayList[JMap[String, Object]](files.size())
     val deltaReadOptions = new JArrayList[DeltaFileReadOptions](files.size())
-    val deletionVectorPayloads = scala.collection.mutable.ArrayBuffer.empty[Array[Byte]]
+    var hasDeletionVectors = false
 
     files.asScala.foreach {
       file =>
@@ -110,6 +109,7 @@ object VeloxDeltaMetadataUtils {
 
         descriptor match {
           case Some(descriptor) =>
+            hasDeletionVectors = true
             val payloadTablePath = resolveTablePath(partitionColumnCount, file)
             val serializedPayload = serializePayload(dvStore, payloadTablePath, descriptor)
             deltaReadOptions.add(
@@ -117,25 +117,21 @@ object VeloxDeltaMetadataUtils {
                 rowIndexFilterType,
                 true,
                 descriptor.cardinality,
-                deletionVectorPayloads.length))
-            deletionVectorPayloads += serializedPayload
+                serializedPayload))
             normalizedMetadataColumns.add(normalizedMetadata)
           case None =>
             deltaReadOptions.add(
-              new DeltaFileReadOptions(rowIndexFilterType, false, 0L, -1))
+              new DeltaFileReadOptions(rowIndexFilterType, false, 0L, Array.emptyByteArray))
             normalizedMetadataColumns.add(normalizedMetadata)
         }
     }
 
-    val deltaOptions = if (deletionVectorPayloads.nonEmpty) {
+    val deltaOptions = if (hasDeletionVectors) {
       deltaReadOptions
     } else {
       new JArrayList[DeltaFileReadOptions]()
     }
-    new NormalizedSplitMetadata(
-      normalizedMetadataColumns,
-      deltaOptions,
-      deletionVectorPayloads.toArray)
+    new NormalizedSplitMetadata(normalizedMetadataColumns, deltaOptions)
   }
 
   private def activeSpark: SparkSession = {
diff --git a/backends-velox/src-delta33/main/scala/org/apache/gluten/extension/PreprocessDeltaScanWithDeletionVectors.scala b/backends-velox/src-delta33/main/scala/org/apache/gluten/extension/PreprocessDeltaScanWithDeletionVectors.scala
new file mode 100644
index 00000000000..26a98865627
--- /dev/null
+++ b/backends-velox/src-delta33/main/scala/org/apache/gluten/extension/PreprocessDeltaScanWithDeletionVectors.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.extension
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.delta.PreprocessTableWithDVs
+
+/**
+ * Delta 3.3 compatibility rule for DV scan metadata.
+ *
+ * Delta's own PrepareDeltaScan still runs normally. This Gluten-scoped rule only adds the
+ * backend-visible DV metadata columns after Delta has prepared the scan, so the physical Delta scan
+ * handoff can materialize the per-file DV payload for Velox without replacing Delta classes.
+ */
+class PreprocessDeltaScanWithDeletionVectors(protected val spark: SparkSession)
+  extends Rule[LogicalPlan]
+  with PreprocessTableWithDVs {
+
+  override def apply(plan: LogicalPlan): LogicalPlan = preprocessTablesWithDVs(plan)
+}
diff --git a/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala b/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala
deleted file mode 100644
index 326df10bae3..00000000000
--- a/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.delta.stats
-
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, V2WriteCommand}
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.delta.{DeltaTable, OptimisticTransaction, PreprocessTableWithDVs}
-import org.apache.spark.sql.delta.sources.DeltaSQLConf
-
-/**
- * Delta 3.3 compatibility shim around Delta's PrepareDeltaScan.
- *
- * This preserves Delta's normal scan preparation first, including stats skipping, metadata-query
- * optimization, and transaction read tracking. After that, Gluten runs DV preprocessing so the scan
- * exposes Delta's internal DV row-deleted column and row-index metadata. The native physical rule
- * later strips Spark's synthetic DV predicate once the materialized DV payload is attached to the
- * native split, so Velox applies the DV filter exactly once.
- */
-class PrepareDeltaScan(protected val spark: SparkSession)
-  extends Rule[LogicalPlan]
-  with PrepareDeltaScanBase
-  with PreprocessTableWithDVs {
-
-  override def apply(plan0: LogicalPlan): LogicalPlan = {
-    var plan = plan0
-
-    val isSubquery = isSubqueryRoot(plan)
-    val isDataSourceV2 = plan.isInstanceOf[V2WriteCommand]
-    if (isSubquery || isDataSourceV2) {
-      return plan
-    }
-
-    val updatedPlan = if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_STATS_SKIPPING)) {
-      if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED)) {
-        plan = optimizeQueryWithMetadata(plan)
-      }
-      prepareDeltaScan(plan)
-    } else {
-      OptimisticTransaction.getActive.foreach {
-        txn =>
-          val logsInPlan = plan.collect { case DeltaTable(fileIndex) => fileIndex.deltaLog }
-          if (logsInPlan.exists(_.isSameLogAs(txn.deltaLog))) {
-            txn.readWholeTable()
-          }
-      }
-      plan
-    }
-
-    preprocessTablesWithDVs(updatedPlan)
-  }
-}
diff --git a/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala b/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
index dbd226814cf..bc32b9b3e72 100644
--- a/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
+++ b/backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
@@ -78,10 +78,8 @@ class DeltaDeletionVectorHandoffSuite
         val metadata = normalized.otherMetadataColumns.get(0)
         val deltaReadOptions = normalized.deltaReadOptions.get(0)
 
-        assert(normalized.deletionVectorPayloads.length == 1)
-        assert(normalized.deletionVectorPayloads.head.nonEmpty)
         assert(deltaReadOptions.hasDeletionVector())
-        assert(deltaReadOptions.deletionVectorPayloadIndex() == 0)
+        assert(deltaReadOptions.serializedDeletionVector().nonEmpty)
         assert(deltaReadOptions.deletionVectorCardinality() == dataFile.deletionVector.cardinality)
         assert(deltaReadOptions.rowIndexFilterType() == GlutenRowIndexFilterType.IF_CONTAINED)
         assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_ID_ENCODED))
@@ -120,7 +118,6 @@ class DeltaDeletionVectorHandoffSuite
           files = Seq(partitionedFile).asJava)
         val metadata = normalized.otherMetadataColumns.get(0)
 
-        assert(normalized.deletionVectorPayloads.isEmpty)
         assert(normalized.deltaReadOptions.isEmpty)
         assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_ID_ENCODED))
         assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_TYPE))
diff --git a/backends-velox/src-delta40/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala b/backends-velox/src-delta40/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
index 00df6e602fb..af5f7c8b40d 100644
--- a/backends-velox/src-delta40/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
+++ b/backends-velox/src-delta40/main/scala/org/apache/gluten/backendsapi/velox/VeloxDeltaMetadataUtils.scala
@@ -42,8 +42,7 @@ object VeloxDeltaMetadataUtils {
 
   final class NormalizedSplitMetadata(
       val otherMetadataColumns: JList[JMap[String, Object]],
-      val deltaReadOptions: JList[DeltaFileReadOptions],
-      val deletionVectorPayloads: Array[Array[Byte]])
+      val deltaReadOptions: JList[DeltaFileReadOptions])
     extends Serializable
 
   private def decodeDescriptor(
@@ -108,7 +107,7 @@ object VeloxDeltaMetadataUtils {
     val dvStore = new HadoopFileSystemDVStore(activeSpark.sessionState.newHadoopConf())
     val normalizedMetadataColumns = new JArrayList[JMap[String, Object]](files.size())
     val deltaReadOptions = new JArrayList[DeltaFileReadOptions](files.size())
-    val deletionVectorPayloads = scala.collection.mutable.ArrayBuffer.empty[Array[Byte]]
+    var hasDeletionVectors = false
 
     files.asScala.foreach {
       file =>
@@ -125,6 +124,7 @@ object VeloxDeltaMetadataUtils {
 
         descriptor match {
           case Some(descriptor) =>
+            hasDeletionVectors = true
             val payloadTablePath = resolveTablePath(partitionColumnCount, file)
             val serializedPayload = serializePayload(dvStore, payloadTablePath, descriptor)
             deltaReadOptions.add(
@@ -132,25 +132,21 @@ object VeloxDeltaMetadataUtils {
                 rowIndexFilterType,
                 true,
                 descriptor.cardinality,
-                deletionVectorPayloads.length))
-            deletionVectorPayloads += serializedPayload
+                serializedPayload))
             normalizedMetadataColumns.add(normalizedMetadata)
           case None =>
             deltaReadOptions.add(
-              new DeltaFileReadOptions(rowIndexFilterType, false, 0L, -1))
+              new DeltaFileReadOptions(rowIndexFilterType, false, 0L, Array.emptyByteArray))
             normalizedMetadataColumns.add(normalizedMetadata)
         }
     }
 
-    val deltaOptions = if (deletionVectorPayloads.nonEmpty) {
+    val deltaOptions = if (hasDeletionVectors) {
       deltaReadOptions
     } else {
       new JArrayList[DeltaFileReadOptions]()
     }
-    new NormalizedSplitMetadata(
-      normalizedMetadataColumns,
-      deltaOptions,
-      deletionVectorPayloads.toArray)
+    new NormalizedSplitMetadata(normalizedMetadataColumns, deltaOptions)
   }
 
   private def activeSpark: SparkSession = {
diff --git a/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala b/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
index 061619252a1..975d38384f9 100644
--- a/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
+++ b/backends-velox/src-delta40/test/scala/org/apache/spark/sql/delta/DeltaDeletionVectorHandoffSuite.scala
@@ -109,10 +109,8 @@ class DeltaDeletionVectorHandoffSuite
         val metadata = normalized.otherMetadataColumns.get(0)
         val deltaReadOptions = normalized.deltaReadOptions.get(0)
 
-        assert(normalized.deletionVectorPayloads.length == 1)
-        assert(normalized.deletionVectorPayloads.head.nonEmpty)
         assert(deltaReadOptions.hasDeletionVector())
-        assert(deltaReadOptions.deletionVectorPayloadIndex() == 0)
+        assert(deltaReadOptions.serializedDeletionVector().nonEmpty)
         assert(deltaReadOptions.deletionVectorCardinality() == dataFile.deletionVector.cardinality)
         assert(deltaReadOptions.rowIndexFilterType() == GlutenRowIndexFilterType.IF_CONTAINED)
         assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_ID_ENCODED))
@@ -154,7 +152,6 @@ class DeltaDeletionVectorHandoffSuite
           files = Seq(partitionedFile).asJava)
         val metadata = normalized.otherMetadataColumns.get(0)
 
-        assert(normalized.deletionVectorPayloads.isEmpty)
         assert(normalized.deltaReadOptions.isEmpty)
         assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_ID_ENCODED))
         assert(!metadata.containsKey(GlutenDeltaParquetFileFormat.FILE_ROW_INDEX_FILTER_TYPE))
diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
index 4245c4baf88..c116cadab57 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala
@@ -42,7 +42,6 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.SparkDirectoryUtil
 
 import java.lang.{Long => JLong}
-import java.nio.ByteBuffer
 import java.nio.charset.StandardCharsets
 import java.time.ZoneOffset
 import java.util.UUID
@@ -52,7 +51,7 @@ import scala.collection.mutable
 
 class VeloxIteratorApi extends IteratorApi with Logging {
   private type NormalizedDeltaSplitMetadata =
-    (Seq[java.util.Map[String, Object]], Seq[DeltaFileReadOptions], Array[Array[Byte]])
+    (Seq[java.util.Map[String, Object]], Seq[DeltaFileReadOptions])
 
   private val deltaMetadataUtilsClassName =
     "org.apache.gluten.backendsapi.velox.VeloxDeltaMetadataUtils$"
@@ -101,15 +100,14 @@ class VeloxIteratorApi extends IteratorApi with Logging {
     val metadataColumns = partitionFiles
       .map(
         f => SparkShimLoader.getSparkShims.generateMetadataColumns(f, metadataColumnNames).asJava)
-    val (otherMetadataColumns, deltaReadOptions, deletionVectorPayloads) =
+    val (otherMetadataColumns, deltaReadOptions) =
       normalizeDeltaSplitMetadata(partitionSchema.fields.length, partitionFiles)
         .getOrElse {
           (
             partitionFiles.map {
               f => SparkShimLoader.getSparkShims.getOtherConstantMetadataColumnValues(f)
             },
-            Seq.empty[DeltaFileReadOptions],
-            Array.empty[Array[Byte]])
+            Seq.empty[DeltaFileReadOptions])
         }
 
     val localFilesNode = if (deltaReadOptions.nonEmpty) {
@@ -151,11 +149,7 @@ class VeloxIteratorApi extends IteratorApi with Logging {
       fileFormat
     )
 
-    if (deletionVectorPayloads.nonEmpty) {
-      VeloxSplitInfoWithPayloads(localFiles, deletionVectorPayloads)
-    } else {
-      localFiles
-    }
+    localFiles
   }
 
   /** Generate native row partition. */
@@ -220,24 +214,6 @@ class VeloxIteratorApi extends IteratorApi with Logging {
     NativePlanEvaluator.injectWriteFilesTempPath(path, fileName)
   }
 
-  private def buildSplitPayloadBuffers(splitInfos: Array[SplitInfo]): Array[Array[ByteBuffer]] = {
-    val payloadBuffers = splitInfos.map {
-      case splitInfoWithPayloads: VeloxSplitInfoWithPayloads
-          if splitInfoWithPayloads.deletionVectorPayloads.nonEmpty =>
-        splitInfoWithPayloads.deletionVectorPayloads.map(toDirectByteBuffer)
-      case _ =>
-        null
-    }
-    if (payloadBuffers.exists(_ != null)) payloadBuffers else null
-  }
-
-  private def toDirectByteBuffer(bytes: Array[Byte]): ByteBuffer = {
-    val directBuffer = ByteBuffer.allocateDirect(bytes.length)
-    directBuffer.put(bytes)
-    directBuffer.flip()
-    directBuffer
-  }
-
   private def normalizeDeltaSplitMetadata(
       partitionColumnCount: Int,
       partitionFiles: Seq[PartitionedFile]): Option[NormalizedDeltaSplitMetadata] = {
@@ -252,7 +228,6 @@ class VeloxIteratorApi extends IteratorApi with Logging {
         normalizeMethod.invoke(module, Int.box(partitionColumnCount), partitionFiles.asJava)
       val metadataMethod = normalized.getClass.getMethod("otherMetadataColumns")
       val deltaOptionsMethod = normalized.getClass.getMethod("deltaReadOptions")
-      val payloadsMethod = normalized.getClass.getMethod("deletionVectorPayloads")
       Some(
         metadataMethod
           .invoke(normalized)
@@ -263,8 +238,7 @@ class VeloxIteratorApi extends IteratorApi with Logging {
           .invoke(normalized)
           .asInstanceOf[java.util.List[DeltaFileReadOptions]]
           .asScala
-          .toSeq,
-        payloadsMethod.invoke(normalized).asInstanceOf[Array[Array[Byte]]]
+          .toSeq
       )
     } catch {
       case _: ClassNotFoundException | _: NoSuchMethodException =>
@@ -298,8 +272,6 @@ class VeloxIteratorApi extends IteratorApi with Logging {
       .splitInfos
       .map(splitInfo => splitInfo.toProtobuf.toByteArray)
       .toArray
-    val splitPayloadBuffers =
-      buildSplitPayloadBuffers(inputPartition.asInstanceOf[GlutenPartition].splitInfos)
     val spillDirPath = SparkDirectoryUtil
       .get()
       .namespace("gluten-spill")
@@ -309,7 +281,6 @@ class VeloxIteratorApi extends IteratorApi with Logging {
       transKernel.createKernelWithBatchIterator(
         inputPartition.plan,
         if (splitInfoByteArray.nonEmpty) splitInfoByteArray else null,
-        splitPayloadBuffers,
         if (columnarNativeIterators.nonEmpty) columnarNativeIterators.toArray else null,
         partitionIndex,
         BackendsApiManager.getSparkPlanExecApiInstance.rewriteSpillPath(spillDirPath)
@@ -364,7 +335,6 @@ class VeloxIteratorApi extends IteratorApi with Logging {
       transKernel.createKernelWithBatchIterator(
         rootNode.toProtobuf.toByteArray,
         null,
-        null,
         if (columnarNativeIterator.nonEmpty) columnarNativeIterator.toArray else null,
         partitionIndex,
         BackendsApiManager.getSparkPlanExecApiInstance.rewriteSpillPath(spillDirPath)
diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSplitInfoWithPayloads.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSplitInfoWithPayloads.scala
deleted file mode 100644
index c34fd89da7d..00000000000
--- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSplitInfoWithPayloads.scala
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.gluten.backendsapi.velox
-
-import org.apache.gluten.substrait.rel.SplitInfo
-
-import com.google.protobuf.Message
-
-final case class VeloxSplitInfoWithPayloads(
-    delegate: SplitInfo,
-    deletionVectorPayloads: Array[Array[Byte]])
-  extends SplitInfo {
-
-  override def preferredLocations(): java.util.List[String] = delegate.preferredLocations()
-
-  override def toProtobuf(): Message = delegate.toProtobuf()
-}
diff --git a/cpp/core/compute/Runtime.h b/cpp/core/compute/Runtime.h
index b8d6fd3e18a..4ab944898bd 100644
--- a/cpp/core/compute/Runtime.h
+++ b/cpp/core/compute/Runtime.h
@@ -97,8 +97,6 @@ class Runtime : public std::enable_shared_from_this<Runtime> {
     throw GlutenException("Not implemented");
   }
 
-  virtual void setSplitPayloads(int32_t idx, std::vector<SplitPayloadBufferView> payloads) {}
-
   virtual std::string planString(bool details, const std::unordered_map<std::string, std::string>& sessionConf) {
     throw GlutenException("Not implemented");
   }
diff --git a/cpp/core/jni/JniWrapper.cc b/cpp/core/jni/JniWrapper.cc
index 2acc2952565..c0678b33d2c 100644
--- a/cpp/core/jni/JniWrapper.cc
+++ b/cpp/core/jni/JniWrapper.cc
@@ -464,7 +464,6 @@ Java_org_apache_gluten_vectorized_PlanEvaluatorJniWrapper_nativeCreateKernelWith
     jobject wrapper,
     jbyteArray planArr,
     jobjectArray splitInfosArr,
-    jobjectArray splitPayloadsArr,
     jobjectArray batchItrArray,
     jint stageId,
     jint partitionId,
@@ -495,30 +494,6 @@ Java_org_apache_gluten_vectorized_PlanEvaluatorJniWrapper_nativeCreateKernelWith
       auto safeSplitArray = getByteArrayElementsSafe(env, splitInfoArray);
       auto splitInfoData = safeSplitArray.elems();
 
-      if (splitPayloadsArr != nullptr) {
-        jobjectArray splitPayloadArray = static_cast<jobjectArray>(env->GetObjectArrayElement(splitPayloadsArr, i));
-        if (splitPayloadArray != nullptr) {
-          std::vector<SplitPayloadBufferView> splitPayloads;
-          splitPayloads.reserve(env->GetArrayLength(splitPayloadArray));
-          for (jsize payloadIndex = 0, payloadCount = env->GetArrayLength(splitPayloadArray);
-               payloadIndex < payloadCount;
-               ++payloadIndex) {
-            jobject payloadBuffer = env->GetObjectArrayElement(splitPayloadArray, payloadIndex);
-            GLUTEN_CHECK(payloadBuffer != nullptr, "Split payload buffer must not be null");
-            auto* payloadData = reinterpret_cast<const uint8_t*>(env->GetDirectBufferAddress(payloadBuffer));
-            const auto payloadCapacity = env->GetDirectBufferCapacity(payloadBuffer);
-            GLUTEN_CHECK(payloadData != nullptr, "Split payload buffer must be a direct ByteBuffer");
-            GLUTEN_CHECK(
-                payloadCapacity >= 0 && payloadCapacity <= std::numeric_limits<int32_t>::max(),
-                "Split payload buffer capacity must fit int32_t");
-            splitPayloads.push_back({payloadData, static_cast<int32_t>(payloadCapacity)});
-            env->DeleteLocalRef(payloadBuffer);
-          }
-          ctx->setSplitPayloads(i, std::move(splitPayloads));
-          env->DeleteLocalRef(splitPayloadArray);
-        }
-      }
-
       ctx->parseSplitInfo(splitInfoData, splitInfoSize, i);
     }
   }
diff --git a/cpp/velox/compute/VeloxPlanConverter.cc b/cpp/velox/compute/VeloxPlanConverter.cc
index c4a4e725014..befa4e9dbd1 100644
--- a/cpp/velox/compute/VeloxPlanConverter.cc
+++ b/cpp/velox/compute/VeloxPlanConverter.cc
@@ -17,6 +17,7 @@
 
 #include "VeloxPlanConverter.h"
 #include <filesystem>
+#include <limits>
 #include <optional>
 
 #include <google/protobuf/any.pb.h>
@@ -95,8 +96,7 @@ delta::DeltaRowIndexFilterType parseDeltaRowIndexFilterType(int filterType) {
 
 std::shared_ptr<DeltaSplitInfo> parseDeltaSplitInfo(
     const substrait::ReadRel_LocalFiles_FileOrFiles& file,
-    std::shared_ptr<SplitInfo> splitInfo,
-    const std::vector<SplitPayloadBufferView>* splitPayloads) {
+    std::shared_ptr<SplitInfo> splitInfo) {
   auto deltaSplitInfo = std::dynamic_pointer_cast<DeltaSplitInfo>(splitInfo)
       ? std::dynamic_pointer_cast<DeltaSplitInfo>(splitInfo)
       : std::make_shared<DeltaSplitInfo>(*splitInfo);
@@ -111,24 +111,25 @@ std::shared_ptr<DeltaSplitInfo> parseDeltaSplitInfo(
     return deltaSplitInfo;
   }
 
-  VELOX_USER_CHECK_NOT_NULL(splitPayloads, "Delta split has a deletion vector without an external payload buffer");
-  const auto payloadIndex = static_cast<size_t>(deltaReadOptions.deletion_vector_payload_index());
-  VELOX_USER_CHECK_LT(
-      payloadIndex,
-      splitPayloads->size(),
-      "Delta deletion vector payload index {} is out of range for {} payload buffers",
-      payloadIndex,
-      splitPayloads->size());
+  auto serializedPayload = deltaReadOptions.serialized_deletion_vector();
+  VELOX_USER_CHECK(!serializedPayload.empty(), "Delta split has a deletion vector without a serialized payload");
+  VELOX_USER_CHECK_LE(
+      serializedPayload.size(),
+      static_cast<size_t>(std::numeric_limits<int32_t>::max()),
+      "Delta deletion vector serialized payload is too large");
   const auto cardinality = static_cast<uint64_t>(deltaReadOptions.deletion_vector_cardinality());
+  auto payload = std::make_shared<std::string>(std::move(serializedPayload));
+  const SplitPayloadBufferView payloadView{
+      reinterpret_cast<const uint8_t*>(payload->data()), static_cast<int32_t>(payload->size())};
   deltaSplitInfo->deletionVectors.emplace_back(
-      delta::DeltaDeletionVectorDescriptor::serialized(cardinality, splitPayloads->at(payloadIndex)));
+      delta::DeltaDeletionVectorDescriptor::serialized(cardinality, payloadView));
+  deltaSplitInfo->deletionVectorPayloads.emplace_back(std::move(payload));
   return deltaSplitInfo;
 }
 
 std::shared_ptr<SplitInfo> parseScanSplitInfo(
     const facebook::velox::config::ConfigBase* veloxCfg,
-    const google::protobuf::RepeatedPtrField<substrait::ReadRel_LocalFiles_FileOrFiles>& fileList,
-    const std::vector<SplitPayloadBufferView>* splitPayloads) {
+    const google::protobuf::RepeatedPtrField<substrait::ReadRel_LocalFiles_FileOrFiles>& fileList) {
   using SubstraitFileFormatCase = ::substrait::ReadRel_LocalFiles_FileOrFiles::FileFormatCase;
 
   auto splitInfo = std::make_shared<SplitInfo>();
@@ -187,7 +188,7 @@ std::shared_ptr<SplitInfo> parseScanSplitInfo(
         splitInfo = IcebergPlanConverter::parseIcebergSplitInfo(file, std::move(splitInfo));
         break;
       case SubstraitFileFormatCase::kDelta:
-        splitInfo = parseDeltaSplitInfo(file, std::move(splitInfo), splitPayloads);
+        splitInfo = parseDeltaSplitInfo(file, std::move(splitInfo));
         break;
       default:
         splitInfo->format = dwio::common::FileFormat::UNKNOWN;
@@ -224,16 +225,12 @@ std::shared_ptr<SplitInfo> parseScanSplitInfo(
 void parseLocalFileNodes(
     SubstraitToVeloxPlanConverter* planConverter,
     const facebook::velox::config::ConfigBase* veloxCfg,
-    std::vector<::substrait::ReadRel_LocalFiles>& localFiles,
-    const std::unordered_map<int32_t, std::vector<SplitPayloadBufferView>>& splitPayloads) {
+    std::vector<::substrait::ReadRel_LocalFiles>& localFiles) {
   std::vector<std::shared_ptr<SplitInfo>> splitInfos;
   splitInfos.reserve(localFiles.size());
-  for (size_t splitIndex = 0; splitIndex < localFiles.size(); ++splitIndex) {
-    const auto& localFile = localFiles[splitIndex];
+  for (const auto& localFile : localFiles) {
     const auto& fileList = localFile.items();
-    auto payloadIt = splitPayloads.find(splitIndex);
-    splitInfos.push_back(
-        parseScanSplitInfo(veloxCfg, fileList, payloadIt == splitPayloads.end() ? nullptr : &payloadIt->second));
+    splitInfos.push_back(parseScanSplitInfo(veloxCfg, fileList));
   }
 
   planConverter->setSplitInfos(std::move(splitInfos));
@@ -242,10 +239,9 @@ void parseLocalFileNodes(
 
 std::shared_ptr<const facebook::velox::core::PlanNode> VeloxPlanConverter::toVeloxPlan(
     const ::substrait::Plan& substraitPlan,
-    std::vector<::substrait::ReadRel_LocalFiles> localFiles,
-    const std::unordered_map<int32_t, std::vector<SplitPayloadBufferView>>& splitPayloads) {
+    std::vector<::substrait::ReadRel_LocalFiles> localFiles) {
   if (!validationMode_) {
-    parseLocalFileNodes(&substraitVeloxPlanConverter_, veloxCfg_, localFiles, splitPayloads);
+    parseLocalFileNodes(&substraitVeloxPlanConverter_, veloxCfg_, localFiles);
   }
 
   return substraitVeloxPlanConverter_.toVeloxPlan(substraitPlan);
diff --git a/cpp/velox/compute/VeloxPlanConverter.h b/cpp/velox/compute/VeloxPlanConverter.h
index fa1ec0f9e04..1aee2c36bd1 100644
--- a/cpp/velox/compute/VeloxPlanConverter.h
+++ b/cpp/velox/compute/VeloxPlanConverter.h
@@ -41,8 +41,7 @@ class VeloxPlanConverter {
 
   std::shared_ptr<const facebook::velox::core::PlanNode> toVeloxPlan(
       const ::substrait::Plan& substraitPlan,
-      std::vector<::substrait::ReadRel_LocalFiles> localFiles,
-      const std::unordered_map<int32_t, std::vector<SplitPayloadBufferView>>& splitPayloads = {});
+      std::vector<::substrait::ReadRel_LocalFiles> localFiles);
 
   const std::unordered_map<facebook::velox::core::PlanNodeId, std::shared_ptr<SplitInfo>>& splitInfos() {
     return substraitVeloxPlanConverter_.splitInfos();
diff --git a/cpp/velox/compute/VeloxRuntime.cc b/cpp/velox/compute/VeloxRuntime.cc
index ca51e6f35ba..62e6820e9c3 100644
--- a/cpp/velox/compute/VeloxRuntime.cc
+++ b/cpp/velox/compute/VeloxRuntime.cc
@@ -363,13 +363,6 @@ void VeloxRuntime::parseSplitInfo(const uint8_t* data, int32_t size, int32_t spl
   localFiles_.push_back(localFile);
 }
 
-void VeloxRuntime::setSplitPayloads(int32_t splitIndex, std::vector<SplitPayloadBufferView> payloads) {
-  if (payloads.empty()) {
-    return;
-  }
-  splitPayloads_[splitIndex] = std::move(payloads);
-}
-
 void VeloxRuntime::getInfoAndIds(
     const std::unordered_map<velox::core::PlanNodeId, std::shared_ptr<SplitInfo>>& splitInfoMap,
     const std::unordered_set<velox::core::PlanNodeId>& leafPlanNodeIds,
@@ -405,7 +398,7 @@ std::string VeloxRuntime::planString(bool details, const std::unordered_map<std:
   auto veloxMemoryPool = gluten::defaultLeafVeloxMemoryPool();
   VeloxPlanConverter veloxPlanConverter(
       veloxMemoryPool.get(), veloxCfg_.get(), {}, connectorIds_, std::nullopt, std::nullopt, true);
-  auto veloxPlan = veloxPlanConverter.toVeloxPlan(substraitPlan_, localFiles_, splitPayloads_);
+  auto veloxPlan = veloxPlanConverter.toVeloxPlan(substraitPlan_, localFiles_);
   return veloxPlan->toString(details, true);
 }
 
@@ -427,7 +420,7 @@ std::shared_ptr<ResultIterator> VeloxRuntime::createResultIterator(
       connectorIds_,
       *localWriteFilesTempPath(),
       *localWriteFileName());
-  veloxPlan_ = veloxPlanConverter.toVeloxPlan(substraitPlan_, std::move(localFiles_), splitPayloads_);
+  veloxPlan_ = veloxPlanConverter.toVeloxPlan(substraitPlan_, std::move(localFiles_));
   LOG_IF(INFO, debugModeEnabled_ && taskInfo_.has_value())
       << "############### Velox plan for task " << taskInfo_.value() << " ###############" << std::endl
       << veloxPlan_->toString(true, true);
diff --git a/cpp/velox/compute/VeloxRuntime.h b/cpp/velox/compute/VeloxRuntime.h
index cfba81db92e..37f4da33439 100644
--- a/cpp/velox/compute/VeloxRuntime.h
+++ b/cpp/velox/compute/VeloxRuntime.h
@@ -56,8 +56,6 @@ class VeloxRuntime final : public Runtime {
 
   void parseSplitInfo(const uint8_t* data, int32_t size, int32_t splitIndex) override;
 
-  void setSplitPayloads(int32_t splitIndex, std::vector<SplitPayloadBufferView> payloads) override;
-
   VeloxMemoryManager* memoryManager() override;
 
   // FIXME This is not thread-safe?
@@ -161,7 +159,6 @@ class VeloxRuntime final : public Runtime {
   std::unique_ptr<folly::Executor> spillExecutor_;
   std::unique_ptr<folly::Executor> ioExecutor_;
   VeloxConnectorIds connectorIds_;
-  std::unordered_map<int32_t, std::vector<SplitPayloadBufferView>> splitPayloads_;
 
   std::unordered_map<int32_t, std::shared_ptr<VeloxColumnarBatch>> emptySchemaBatchLoopUp_;
 };
diff --git a/cpp/velox/compute/delta/DeltaSplitInfo.h b/cpp/velox/compute/delta/DeltaSplitInfo.h
index 623202412ce..c02e52f6b88 100644
--- a/cpp/velox/compute/delta/DeltaSplitInfo.h
+++ b/cpp/velox/compute/delta/DeltaSplitInfo.h
@@ -17,7 +17,9 @@
 
 #pragma once
 
+#include <memory>
 #include <optional>
+#include <string>
 #include <vector>
 
 #include "compute/delta/DeltaSplit.h"
@@ -26,11 +28,13 @@
 namespace gluten {
 
 struct DeltaSplitInfo : SplitInfo {
+  std::vector<std::shared_ptr<std::string>> deletionVectorPayloads;
   std::vector<std::optional<delta::DeltaDeletionVectorDescriptor>> deletionVectors;
   std::vector<delta::DeltaRowIndexFilterType> rowIndexFilterTypes;
 
   DeltaSplitInfo(const SplitInfo& splitInfo) : SplitInfo(splitInfo) {
     deletionVectors.reserve(splitInfo.paths.capacity());
+    deletionVectorPayloads.reserve(splitInfo.paths.capacity());
     rowIndexFilterTypes.reserve(splitInfo.paths.capacity());
 
     const auto previousFileCount = splitInfo.paths.empty() ? 0 : splitInfo.paths.size() - 1;
diff --git a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
index cdcca05a8d2..d2513718411 100644
--- a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
+++ b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
@@ -26,27 +26,16 @@
 import org.apache.spark.sql.vectorized.ColumnarBatch;
 
 import java.io.IOException;
-import java.nio.ByteBuffer;
 
 public class ColumnarBatchOutIterator extends ClosableIterator<ColumnarBatch>
     implements RuntimeAware {
   private final Runtime runtime;
   private final long iterHandle;
 
-  // Keeps Java-owned direct buffers reachable while Velox holds raw native views into them.
-  @SuppressWarnings("unused")
-  private final ByteBuffer[][] retainedSplitPayloadBuffers;
-
   public ColumnarBatchOutIterator(Runtime runtime, long iterHandle) {
-    this(runtime, iterHandle, null);
-  }
-
-  public ColumnarBatchOutIterator(
-      Runtime runtime, long iterHandle, ByteBuffer[][] retainedSplitPayloadBuffers) {
     super();
     this.runtime = runtime;
     this.iterHandle = iterHandle;
-    this.retainedSplitPayloadBuffers = retainedSplitPayloadBuffers;
   }
 
   @Override
diff --git a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java
index 1d025ba11cf..6d2c90896b2 100644
--- a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java
+++ b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java
@@ -27,7 +27,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -79,30 +78,17 @@ public ColumnarBatchOutIterator createKernelWithBatchIterator(
       int partitionIndex,
       String spillDirPath)
       throws RuntimeException {
-    return createKernelWithBatchIterator(
-        wsPlan, splitInfo, null, iterList, partitionIndex, spillDirPath);
-  }
-
-  public ColumnarBatchOutIterator createKernelWithBatchIterator(
-      byte[] wsPlan,
-      byte[][] splitInfo,
-      ByteBuffer[][] splitPayloads,
-      ColumnarBatchInIterator[] iterList,
-      int partitionIndex,
-      String spillDirPath)
-      throws RuntimeException {
     final long itrHandle =
         jniWrapper.nativeCreateKernelWithIterator(
             wsPlan,
             splitInfo,
-            splitPayloads,
             iterList,
             TaskContext.get().stageId(),
             partitionIndex, // TaskContext.getPartitionId(),
             TaskContext.get().taskAttemptId(),
             DebugUtil.isDumpingEnabledForTask(),
             spillDirPath);
-    final ColumnarBatchOutIterator out = createOutIterator(runtime, itrHandle, splitPayloads);
+    final ColumnarBatchOutIterator out = createOutIterator(runtime, itrHandle);
     runtime
         .memoryManager()
         .addSpiller(
@@ -124,8 +110,7 @@ public long spill(MemoryTarget self, Spiller.Phase phase, long size) {
     return out;
   }
 
-  private ColumnarBatchOutIterator createOutIterator(
-      Runtime runtime, long itrHandle, ByteBuffer[][] retainedSplitPayloadBuffers) {
-    return new ColumnarBatchOutIterator(runtime, itrHandle, retainedSplitPayloadBuffers);
+  private ColumnarBatchOutIterator createOutIterator(Runtime runtime, long itrHandle) {
+    return new ColumnarBatchOutIterator(runtime, itrHandle);
   }
 }
diff --git a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/PlanEvaluatorJniWrapper.java b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/PlanEvaluatorJniWrapper.java
index c68aab4d757..a8082906798 100644
--- a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/PlanEvaluatorJniWrapper.java
+++ b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/PlanEvaluatorJniWrapper.java
@@ -20,8 +20,6 @@
 import org.apache.gluten.runtime.RuntimeAware;
 import org.apache.gluten.validate.NativePlanValidationInfo;
 
-import java.nio.ByteBuffer;
-
 /**
  * This class is implemented in JNI. This provides the Java interface to invoke functions in JNI.
  * This file is used to generate the .h files required for jni. Avoid all external dependencies in
@@ -74,7 +72,6 @@ public long rtHandle() {
   public native long nativeCreateKernelWithIterator(
       byte[] wsPlan,
       byte[][] splitInfo,
-      ByteBuffer[][] splitPayloads,
       ColumnarBatchInIterator[] batchItr,
       int stageId,
       int partitionId,
diff --git a/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesNode.java b/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesNode.java
index 4fcca1fde17..dd34838261c 100644
--- a/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesNode.java
+++ b/gluten-substrait/src/main/java/org/apache/gluten/substrait/rel/DeltaLocalFilesNode.java
@@ -16,6 +16,7 @@
  */
 package org.apache.gluten.substrait.rel;
 
+import com.google.protobuf.ByteString;
 import io.substrait.proto.ReadRel;
 
 import java.io.Serializable;
@@ -67,7 +68,7 @@ protected void processFileBuilder(ReadRel.LocalFiles.FileOrFiles.Builder fileBui
     if (options.hasDeletionVector()) {
       deltaBuilder
           .setDeletionVectorCardinality(options.deletionVectorCardinality())
-          .setDeletionVectorPayloadIndex(options.deletionVectorPayloadIndex());
+          .setSerializedDeletionVector(ByteString.copyFrom(options.serializedDeletionVector()));
     }
 
     fileBuilder.setDelta(deltaBuilder.build());
@@ -98,17 +99,18 @@ public static class DeltaFileReadOptions implements Serializable {
     private final RowIndexFilterType rowIndexFilterType;
     private final boolean hasDeletionVector;
     private final long deletionVectorCardinality;
-    private final int deletionVectorPayloadIndex;
+    private final byte[] serializedDeletionVector;
 
     public DeltaFileReadOptions(
         RowIndexFilterType rowIndexFilterType,
         boolean hasDeletionVector,
         long deletionVectorCardinality,
-        int deletionVectorPayloadIndex) {
+        byte[] serializedDeletionVector) {
       this.rowIndexFilterType = rowIndexFilterType;
       this.hasDeletionVector = hasDeletionVector;
       this.deletionVectorCardinality = deletionVectorCardinality;
-      this.deletionVectorPayloadIndex = deletionVectorPayloadIndex;
+      this.serializedDeletionVector =
+          serializedDeletionVector == null ? new byte[0] : serializedDeletionVector;
     }
 
     public RowIndexFilterType rowIndexFilterType() {
@@ -123,8 +125,8 @@ public long deletionVectorCardinality() {
       return deletionVectorCardinality;
     }
 
-    public int deletionVectorPayloadIndex() {
-      return deletionVectorPayloadIndex;
+    public byte[] serializedDeletionVector() {
+      return serializedDeletionVector;
     }
   }
 }
diff --git a/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto b/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto
index a74f41a21eb..02c7f4cc5c6 100644
--- a/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto
+++ b/gluten-substrait/src/main/resources/substrait/proto/substrait/algebra.proto
@@ -206,7 +206,7 @@ message ReadRel {
         RowIndexFilterType row_index_filter_type = 1;
         bool has_deletion_vector = 2;
         uint64 deletion_vector_cardinality = 3;
-        uint32 deletion_vector_payload_index = 4;
+        bytes serialized_deletion_vector = 4;
       }
 
       // File reading options

From 6291be97169e4847670efc4a427eb9c75cd0f23c Mon Sep 17 00:00:00 2001
From: Mohammad Linjawi <Mohammad.Linjawi@ibm.com>
Date: Thu, 28 May 2026 18:08:38 +0300
Subject: [PATCH 9/9] [VL][Delta] Limit DV scan preprocessing scope

---
 .../spark/sql/delta/PreprocessTableWithDVs.scala   | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala b/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala
index 4c353c4a575..56bae355005 100644
--- a/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala
+++ b/backends-velox/src-delta33/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.types.StructType
  */
 trait PreprocessTableWithDVs extends SubqueryTransformerHelper {
   def preprocessTablesWithDVs(plan: LogicalPlan): LogicalPlan = {
-    transformWithSubqueries(plan) { case ScanWithDeletionVectors(dvScan) => dvScan }
+    plan.transformDown { case ScanWithDeletionVectors(dvScan) => dvScan }
   }
 }
 
@@ -78,9 +78,9 @@ object ScanWithDeletionVectors {
       return None
     }
 
-    require(
-      !index.isInstanceOf[TahoeLogFileIndex],
-      "Cannot work with a non-pinned table snapshot of the TahoeFileIndex")
+    if (index.isInstanceOf[TahoeLogFileIndex]) {
+      return None
+    }
 
     if (fileFormat.hasTablePath) {
       return None
@@ -109,9 +109,9 @@ object ScanWithDeletionVectors {
       return None
     }
 
-    require(
-      !index.isInstanceOf[TahoeLogFileIndex],
-      "Cannot work with a non-pinned table snapshot of the TahoeFileIndex")
+    if (index.isInstanceOf[TahoeLogFileIndex]) {
+      return None
+    }
 
     if (fileFormat.hasTablePath) {
       return None