diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/FileSourceOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/FileSourceOptions.scala
index 22f84a1cad63d..0747a8045e7d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/FileSourceOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/FileSourceOptions.scala
@@ -45,6 +45,14 @@ class FileSourceOptions(
 
   val ignoreMissingFiles: Boolean = parameters.get(IGNORE_MISSING_FILES).map(_.toBoolean)
     .getOrElse(SQLConf.get.ignoreMissingFiles)
+
+  /**
+   * Whether the data source may read tar archives (.tar/.tar.gz/.tgz) by streaming their entries.
+   * Gated by [[SQLConf.ARCHIVE_FORMAT_READER_ENABLED]] and resolved at construction (on the driver,
+   * where SQLConf is instantiated) so the value is stable once the options are serialized to
+   * executors. Only the CSV data source currently honors this.
+   */
+  val archiveFormatEnabled: Boolean = SQLConf.get.getConf(SQLConf.ARCHIVE_FORMAT_READER_ENABLED)
 }
 
 object FileSourceOptions {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 0aed28e92558f..7971795c29bab 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2695,6 +2695,16 @@ object SQLConf {
     .bytesConf(ByteUnit.BYTE)
     .createWithDefaultString("128MB") // parquet.block.size
 
+  val ARCHIVE_FORMAT_READER_ENABLED = buildConf("spark.sql.files.archive.reader.enabled")
+    .doc("When true, the CSV data source can read tar archives (.tar, .tar.gz, .tgz): each " +
+      "archive is read as a single split and its entries are streamed through the CSV parser " +
+      "(never unpacked to disk), as if the entries were separate CSV files. Only the CSV data " +
+      "source supports reading archives.")
+    .version("5.0.0")
+    .withBindingPolicy(ConfigBindingPolicy.SESSION)
+    .booleanConf
+    .createWithDefault(false)
+
   val FILES_OPEN_COST_IN_BYTES = buildConf("spark.sql.files.openCostInBytes")
     .internal()
     .doc("The estimated cost to open a file, measured by the number of bytes could be scanned in" +
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 1261200a9173c..e6673c9069f42 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -293,6 +293,10 @@
       <artifactId>bcprov-jdk18on</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-compress</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.bouncycastle</groupId>
       <artifactId>bcpkix-jdk18on</artifactId>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ArchiveReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ArchiveReader.scala
new file mode 100644
index 0000000000000..e9737c87e1a2c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ArchiveReader.scala
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.io.{Closeable, InputStream}
+import java.util.Locale
+import java.util.zip.GZIPInputStream
+
+import scala.util.control.NonFatal
+
+import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream}
+import org.apache.commons.io.ByteOrderMark
+import org.apache.commons.io.input.{BOMInputStream, CloseShieldInputStream}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.io.Text
+import org.apache.hadoop.util.LineReader
+
+import org.apache.spark.TaskContext
+import org.apache.spark.util.HadoopFSUtils
+
+/**
+ * Streaming reader for a single archive file. The archive is opened once and decompressed/unpacked
+ * as a stream -- entries are never materialized to local disk. [[readEntries]] hands each entry's
+ * bytes to a caller-supplied parse function as a bounded [[InputStream]] and concatenates the
+ * per-entry results into a single iterator, advancing to the next entry only once the current one
+ * is fully consumed. At most one entry is in flight at a time, so memory stays bounded regardless
+ * of archive size.
+ *
+ * This is format-agnostic: a data source whose per-file reader can consume an `InputStream` wires
+ * up archive support by calling [[readEntries]] from its read/inference paths and supplying a
+ * `parseEntry` that turns one entry stream into rows (or tokens). Formats that need random access
+ * within a file (e.g. Parquet/ORC footers) cannot use this streaming path.
+ *
+ * A concrete subclass implements [[readEntries]] for a specific archive format. Obtain the reader
+ * for a path via `ArchiveReader(path)`, which selects the implementation by file extension; new
+ * archive formats are added by writing another subclass rather than modifying existing ones.
+ */
+abstract class ArchiveReader(path: Path) {
+
+  /**
+   * Streams the archive entry by entry, applying `parseEntry` to each non-skipped entry's
+   * `(name, stream)` and concatenating the results into a single iterator. The next entry is opened
+   * only once the current entry's iterator is exhausted, so nothing is buffered to disk and at most
+   * one entry's bytes are read at a time. The archive stream is closed when the returned iterator
+   * is exhausted, when [[Closeable.close]] is called on it, and (defensively) on task completion.
+   */
+  def readEntries[T](
+      conf: Configuration)(
+      parseEntry: (String, InputStream) => Iterator[T]): Iterator[T]
+}
+
+object ArchiveReader {
+
+  /**
+   * Whether `path` names an archive this reader can stream. Dispatched purely on the file
+   * extension -- `.tar`, `.tar.gz`, or `.tgz` -- since the bytes are not inspected here.
+   */
+  def isArchivePath(path: Path): Boolean = {
+    val name = path.getName.toLowerCase(Locale.ROOT)
+    name.endsWith(".tar") || name.endsWith(".tar.gz") || name.endsWith(".tgz")
+  }
+
+  /**
+   * Returns the [[ArchiveReader]] implementation for `path`, selected by its file extension. Only
+   * paths for which [[isArchivePath]] is true are supported; new archive formats add a case here.
+   */
+  def apply(path: Path): ArchiveReader = new TarArchiveReader(path)
+
+  /**
+   * Splits one already-decompressed archive entry's bytes into lines. The reusable, format-agnostic
+   * line source for archive entries; the entry stream is not closed here (the reader owns the
+   * underlying stream).
+   *
+   * @param in bytes of one archive entry.
+   * @param lineSeparatorInRead the explicit read line separator, or `None` to detect line breaks.
+   * @return an iterator over the entry's lines as [[Text]], without the trailing separator.
+   */
+  def lineIterator(in: InputStream, lineSeparatorInRead: Option[Array[Byte]]): Iterator[Text] = {
+    // A leading byte-order mark is stripped (LineReader does not strip it on its own) so the lines
+    // match the non-archive read path.
+    val bomInputStream = BOMInputStream.builder()
+      .setInputStream(in)
+      .setByteOrderMarks(
+        ByteOrderMark.UTF_8,
+        ByteOrderMark.UTF_16LE,
+        ByteOrderMark.UTF_16BE,
+        ByteOrderMark.UTF_32LE,
+        ByteOrderMark.UTF_32BE)
+      .setInclude(false)
+      .get()
+    val reader = lineSeparatorInRead match {
+      case Some(sep) => new LineReader(bomInputStream, sep)
+      case _ => new LineReader(bomInputStream)
+    }
+    new Iterator[Text] {
+      private val text = new Text()
+      private var finished = false
+      private var hasValue = false
+
+      override def hasNext: Boolean = {
+        if (!finished && !hasValue) {
+          finished = reader.readLine(text) == 0
+          hasValue = !finished
+        }
+        !finished
+      }
+
+      override def next(): Text = {
+        if (!hasNext) throw new NoSuchElementException
+        hasValue = false
+        text
+      }
+    }
+  }
+}
+
+/**
+ * [[ArchiveReader]] for tar archives: plain `.tar`, gzipped `.tar.gz`, and `.tgz`.
+ *
+ * Gzip handling: Hadoop's `CompressionCodecFactory` matches the trailing `.gz` extension and
+ * auto-decompresses `.tar.gz` via `CodecStreams`, so we just wrap that stream in
+ * `TarArchiveInputStream`. `.tgz` is not a registered Hadoop codec extension, so the gzip layer is
+ * unwrapped explicitly here.
+ */
+class TarArchiveReader(path: Path) extends ArchiveReader(path) {
+
+  // Paths Hadoop's codec factory won't auto-decompress: we apply the gzip layer here.
+  private def needsExplicitGunzip: Boolean =
+    path.getName.toLowerCase(Locale.ROOT).endsWith(".tgz")
+
+  /**
+   * Whether an entry is not a real data file and must be skipped: a directory, or a name Spark's
+   * own file listing would filter out. Reusing [[HadoopFSUtils.shouldFilterOutPathName]] (the
+   * `InMemoryFileIndex` filter) keeps archive reads in parity with reading the same entries as
+   * loose files: `.`-prefixed sidecars (macOS `._x`, `.DS_Store`) and `_`-prefixed markers
+   * (`_SUCCESS`, `_committed_*`) are skipped, while data files are kept.
+   */
+  private def shouldSkipEntry(entry: TarArchiveEntry): Boolean = {
+    if (entry.isDirectory) return true
+    val name = entry.getName
+    val basename = name.substring(name.lastIndexOf('/') + 1)
+    HadoopFSUtils.shouldFilterOutPathName(basename)
+  }
+
+  /** Opens the archive as a tar stream, transparently decompressing `.tar.gz` / `.tgz`. */
+  private def openTarStream(conf: Configuration): TarArchiveInputStream = {
+    val base = CodecStreams.createInputStreamWithCloseResource(conf, path)
+    val tarBytes = if (needsExplicitGunzip) new GZIPInputStream(base) else base
+    new TarArchiveInputStream(tarBytes)
+  }
+
+  /**
+   * Wraps the shared tar stream as a view over exactly the current entry's bytes
+   * (`TarArchiveInputStream.read` returns -1 at the entry boundary). [[CloseShieldInputStream]]
+   * ignores `close()`, so a parser closing its input does not close the underlying archive; any
+   * unread remainder of an entry is skipped by `getNextEntry()` when advancing.
+   */
+  private def entryStream(tar: TarArchiveInputStream): InputStream =
+    CloseShieldInputStream.wrap(tar)
+
+  override def readEntries[T](
+      conf: Configuration)(
+      parseEntry: (String, InputStream) => Iterator[T]): Iterator[T] = {
+    val tar = openTarStream(conf)
+    var closed = false
+
+    def cleanup(): Unit = {
+      if (!closed) {
+        closed = true
+        try tar.close() catch { case NonFatal(_) => }
+      }
+    }
+
+    Option(TaskContext.get()).foreach(_.addTaskCompletionListener[Unit](_ => cleanup()))
+
+    new Iterator[T] with Closeable {
+      private var currentIter: Iterator[T] = Iterator.empty
+      private var done = false
+
+      // Move to the next entry whose iterator has elements (releasing each exhausted entry's
+      // reader and skipping any unread bytes), or mark the stream done once entries run out.
+      // Advancing here -- driven by `hasNext` -- rather than eagerly after producing a row in
+      // `next` is essential for parsers that reuse a single mutable row and look ahead on
+      // `hasNext`: probing the current entry right after returning a row would overwrite that row's
+      // contents before the caller has copied it.
+      private def advance(): Unit = {
+        while (!done && !currentIter.hasNext) {
+          currentIter match {
+            case c: Closeable => try c.close() catch { case NonFatal(_) => }
+            case _ =>
+          }
+          var entry = tar.getNextEntry
+          while (entry != null && shouldSkipEntry(entry)) entry = tar.getNextEntry
+          if (entry == null) {
+            done = true
+            cleanup()
+          } else {
+            currentIter = parseEntry(entry.getName, entryStream(tar))
+          }
+        }
+      }
+
+      // Open the first entry eagerly so construction reflects the archive's first entry.
+      advance()
+
+      override def hasNext: Boolean = {
+        advance()
+        !done && currentIter.hasNext
+      }
+
+      override def next(): T = {
+        if (!hasNext) throw new NoSuchElementException
+        currentIter.next()
+      }
+
+      override def close(): Unit = {
+        done = true
+        currentIter = Iterator.empty
+        cleanup()
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
index 596edc8beaa34..76a34630a4d5d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources.csv
 
-import java.io.{FileNotFoundException, IOException}
+import java.io.{FileNotFoundException, InputStream, IOException}
 import java.nio.charset.{Charset, StandardCharsets}
 
 import scala.util.control.NonFatal
@@ -71,7 +71,14 @@ abstract class CSVDataSource extends Serializable {
     parsedOptions.singleVariantColumn match {
       case Some(columnName) => Some(StructType(Array(StructField(columnName, VariantType))))
       case None =>
-        if (inputPaths.nonEmpty) {
+        if (parsedOptions.archiveFormatEnabled &&
+            inputPaths.exists(f => ArchiveReader.isArchivePath(f.getPath))) {
+          // Schema inference is not yet supported for tar archives. Returning None makes Spark
+          // raise its standard "Unable to infer schema ... It must be specified manually" error
+          // (UNABLE_TO_INFER_SCHEMA), so reading an archive requires an explicit `.schema(...)`.
+          // Inferring a schema by streaming archive entries is planned as a follow-up.
+          None
+        } else if (inputPaths.nonEmpty) {
           Some(infer(sparkSession, inputPaths, parsedOptions))
         } else {
           None
@@ -83,6 +90,46 @@ abstract class CSVDataSource extends Serializable {
       sparkSession: SparkSession,
       inputPaths: Seq[FileStatus],
       parsedOptions: CSVOptions): StructType
+
+  /**
+   * Streams a tar archive (`.tar`/`.tar.gz`/`.tgz`) entry by entry through the CSV parser without
+   * unpacking it to disk. The whole archive is a single split (see `CSVFileFormat.isSplitable`); a
+   * fresh header checker and parser are built per entry so each entry is parsed exactly like a
+   * standalone CSV file -- its header, if any, validated and dropped independently. The
+   * mode-specific implementation turns one entry into rows via `parseStream` / `parseIterator`.
+   *
+   * @param getParser builds a fresh [[UnivocityParser]].
+   * @param getHeaderChecker builds a fresh [[CSVHeaderChecker]] for `(isStartOfFile, source)`.
+   */
+  def readArchive(
+      conf: Configuration,
+      file: PartitionedFile,
+      getParser: () => UnivocityParser,
+      getHeaderChecker: (Boolean, String) => CSVHeaderChecker,
+      requiredSchema: StructType): Iterator[InternalRow]
+
+  /**
+   * Shared driver used by the [[readArchive]] implementations: streams each non-skipped entry's
+   * `(parser, headerChecker, stream)` -- a fresh parser/header checker per entry -- through
+   * `parseEntry`. The header checker `source` (`CSV archive entry: <archive>!/<entryName>`) names
+   * the entry in error messages.
+   */
+  protected def streamArchiveEntries(
+      conf: Configuration,
+      file: PartitionedFile,
+      getParser: () => UnivocityParser,
+      getHeaderChecker: (Boolean, String) => CSVHeaderChecker)(
+      parseEntry: (UnivocityParser, CSVHeaderChecker, InputStream) => Iterator[InternalRow])
+    : Iterator[InternalRow] = {
+    ArchiveReader(file.toPath).readEntries(conf) { (entryName, in) =>
+      val headerChecker =
+        getHeaderChecker(true, s"CSV archive entry: ${file.urlEncodedPath}!/$entryName")
+      val parser = getParser()
+      headerChecker.setHeaderForSingleVariantColumn =
+        CSVDataSource.setHeaderForSingleVariantColumn(conf, file, parser)
+      parseEntry(parser, headerChecker, in)
+    }
+  }
 }
 
 object CSVDataSource extends Logging {
@@ -144,6 +191,36 @@ object TextInputCSVDataSource extends CSVDataSource {
     UnivocityParser.parseIterator(lines, parser, headerChecker, requiredSchema)
   }
 
+  override def readArchive(
+      conf: Configuration,
+      file: PartitionedFile,
+      getParser: () => UnivocityParser,
+      getHeaderChecker: (Boolean, String) => CSVHeaderChecker,
+      requiredSchema: StructType): Iterator[InternalRow] =
+    // Stream each tar entry through the line-based parser, treating the entry exactly like a
+    // standalone CSV file (a fresh parser/header checker is built per entry).
+    streamArchiveEntries(conf, file, getParser, getHeaderChecker) { (parser, headerChecker, in) =>
+      UnivocityParser.parseIterator(
+        entryLines(in, parser.options), parser, headerChecker, requiredSchema)
+    }
+
+  /**
+   * Decodes one archive entry's bytes into the same CSV line strings the non-archive [[readFile]]
+   * path feeds to the parser: [[ArchiveReader.lineIterator]] splits the entry into lines (honoring
+   * a custom line separator) and each line is decoded with the configured charset. Like `readFile`,
+   * the decoded lines are fed to `UnivocityParser.parseIterator` without a re-appended terminator.
+   *
+   * @param in bytes of one already-decompressed archive entry; not closed here (the archive owns
+   *           the underlying stream).
+   * @param options CSV options supplying the read line separator and charset.
+   * @return an iterator over the entry's lines.
+   */
+  private def entryLines(in: InputStream, options: CSVOptions): Iterator[String] = {
+    ArchiveReader.lineIterator(in, options.lineSeparatorInRead).map { line =>
+      new String(line.getBytes, 0, line.getLength, options.charset)
+    }
+  }
+
   override def infer(
       sparkSession: SparkSession,
       inputPaths: Seq[FileStatus],
@@ -227,6 +304,18 @@ object MultiLineCSVDataSource extends CSVDataSource with Logging {
       requiredSchema)
   }
 
+  override def readArchive(
+      conf: Configuration,
+      file: PartitionedFile,
+      getParser: () => UnivocityParser,
+      getHeaderChecker: (Boolean, String) => CSVHeaderChecker,
+      requiredSchema: StructType): Iterator[InternalRow] =
+    // Stream each tar entry whole through the multi-line parser (a fresh parser/header checker is
+    // built per entry).
+    streamArchiveEntries(conf, file, getParser, getHeaderChecker) { (parser, headerChecker, in) =>
+      UnivocityParser.parseStream(in, parser, headerChecker, requiredSchema)
+    }
+
   override def infer(
       sparkSession: SparkSession,
       inputPaths: Seq[FileStatus],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
index 77a0c53ae4699..ab570de8d998f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -44,6 +44,11 @@ case class CSVFileFormat() extends TextBasedFileFormat with DataSourceRegister {
       options: Map[String, String],
       path: Path): Boolean = {
     val parsedOptions = getCsvOptions(sparkSession, options)
+    // A tar archive is decompressed/unpacked as a sequential stream, so it must be read as a
+    // single split rather than carved into byte ranges.
+    if (parsedOptions.archiveFormatEnabled && ArchiveReader.isArchivePath(path)) {
+      return false
+    }
     CSVDataSource(parsedOptions).isSplitable && super.isSplitable(sparkSession, options, path)
   }
 
@@ -119,24 +124,26 @@ case class CSVFileFormat() extends TextBasedFileFormat with DataSourceRegister {
         dataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
       val actualRequiredSchema = StructType(
         requiredSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
-      val parser = new UnivocityParser(
-        actualDataSchema,
-        actualRequiredSchema,
-        parsedOptions,
-        actualFilters)
       // Use column pruning when specified by Catalyst, except when one or more columns have
       // existence default value(s), since in that case we instruct the CSV parser to disable column
       // pruning and instead read each entire row in order to correctly assign the default value(s).
       val schema = if (isColumnPruningEnabled) actualRequiredSchema else actualDataSchema
-      val isStartOfFile = file.start == 0
-      val headerChecker = new CSVHeaderChecker(
-        schema, parsedOptions, source = s"CSV file: ${file.urlEncodedPath}", isStartOfFile)
-      CSVDataSource(parsedOptions).readFile(
-        conf,
-        file,
-        parser,
-        headerChecker,
-        requiredSchema)
+
+      def newParser(): UnivocityParser =
+        new UnivocityParser(actualDataSchema, actualRequiredSchema, parsedOptions, actualFilters)
+      def getHeaderChecker(isStartOfFile: Boolean, source: String): CSVHeaderChecker =
+        new CSVHeaderChecker(schema, parsedOptions, source, isStartOfFile)
+
+      // A tar archive (always a single split, see `isSplitable`) is streamed entry by entry when
+      // archive reads are enabled; otherwise the file is parsed directly.
+      if (parsedOptions.archiveFormatEnabled && ArchiveReader.isArchivePath(file.toPath)) {
+        CSVDataSource(parsedOptions).readArchive(
+          conf, file, () => newParser(), getHeaderChecker, requiredSchema)
+      } else {
+        val parser = newParser()
+        val headerChecker = getHeaderChecker(file.start == 0, s"CSV file: ${file.urlEncodedPath}")
+        CSVDataSource(parsedOptions).readFile(conf, file, parser, headerChecker, requiredSchema)
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ArchiveReadSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ArchiveReadSuiteBase.scala
new file mode 100644
index 0000000000000..9d0300ec41177
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ArchiveReadSuiteBase.scala
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.io.File
+import java.nio.file.Files
+
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.util.Utils
+
+/**
+ * Format- and archive-agnostic end-to-end tests for reading archives of data files through the
+ * streaming [[ArchiveReader]] path. Entries are streamed (never unpacked to disk), and the central
+ * contract verified throughout is parity with reading the same files from a directory.
+ *
+ * A concrete suite binds the abstract hooks below by mixing in a file-format trait (e.g.
+ * [[org.apache.spark.sql.execution.datasources.CSVArchiveReadBase]]) and an archive-format trait
+ * (e.g. [[TarArchiveReadBase]]), so the same tests run for every (file format, archive format)
+ * pair we support. New formats are added by writing the per-format trait once, not by duplicating
+ * these tests:
+ * {{{
+ *   class CSVHeaderTarArchiveReadSuite
+ *     extends ArchiveReadSuiteBase with CSVHeaderArchiveReadBase with TarArchiveReadBase
+ * }}}
+ */
+trait ArchiveReadSuiteBase extends QueryTest with SharedSparkSession {
+
+  override def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.ARCHIVE_FORMAT_READER_ENABLED.key, "true")
+
+  import testImplicits._
+
+  // ----- file-format hooks (bound by e.g. CSVArchiveReadBase) ----------------
+
+  /** The `DataFrameReader`/`DataFrameWriter` format name, e.g. "csv". */
+  protected def format: String
+
+  /** Extension of a single data file of [[format]] inside an archive, e.g. "csv". */
+  protected def fileExtension: String
+
+  /** Read options applied to every read (e.g. CSV `header`). */
+  protected def readOptions: Map[String, String]
+
+  /** Schema used to read the sample data produced by [[sampleDf]]. */
+  protected def readSchema: String
+
+  /** Encodes `df` as the bytes of a single data file of [[format]], honoring `writeOptions`. */
+  protected def encodeFile(df: DataFrame, writeOptions: Map[String, String]): Array[Byte]
+
+  /** Encodes `df` as a single data file using only the format's default write options. */
+  protected final def encodeFile(df: DataFrame): Array[Byte] = encodeFile(df, Map.empty)
+
+  // ----- archive-format hooks (bound by e.g. TarArchiveReadBase) -------------
+
+  /** Archive extensions to exercise, e.g. Seq("tar", "tar.gz", "tgz"). The head is the default. */
+  protected def archiveExtensions: Seq[String]
+
+  /** Writes `entries` (name -> bytes) into the archive at `dest`; compression follows the ext. */
+  protected def writeArchive(dest: File, entries: Seq[(String, Array[Byte])]): Unit
+
+  /** Writes bytes that are not a readable archive at `dest` (of [[corruptArchiveExtension]]). */
+  protected def writeCorruptArchive(dest: File): Unit
+
+  /** An archive extension whose reader fails on corrupt bytes (used by the corrupt-file tests). */
+  protected def corruptArchiveExtension: String
+
+  // ----- helpers -------------------------------------------------------------
+
+  /** Sample two-column data; the column names line up with [[readSchema]]. */
+  protected def sampleDf(rows: (Int, String)*): DataFrame = rows.toDF("id", "name")
+
+  /** Entry file name for the i-th data file in an archive. */
+  protected def entryName(i: Int): String = s"part-$i.$fileExtension"
+
+  /** Provides an archive-extensioned path inside a fresh temp dir to `f`. */
+  protected def withArchiveFile(
+      extension: String = archiveExtensions.head)(f: File => Unit): Unit = {
+    val dir = Utils.createTempDir(namePrefix = "archive-test")
+    try f(new File(dir, s"archive.$extension")) finally Utils.deleteRecursively(dir)
+  }
+
+  /** Reads `path` with the format, [[readOptions]] (plus `extraOptions`), and `schema`. */
+  protected def read(
+      path: String,
+      extraOptions: Map[String, String] = Map.empty,
+      schema: String = readSchema): DataFrame =
+    spark.read.format(format).options(readOptions ++ extraOptions).schema(schema).load(path)
+
+  /**
+   * Writes `entries` both into an archive and as loose files in a directory, then asserts the
+   * archive read produces exactly the same rows as the directory read.
+   */
+  protected def assertArchiveMatchesDir(
+      entries: Seq[(String, Array[Byte])],
+      extraOptions: Map[String, String] = Map.empty,
+      schema: String = readSchema): Unit = {
+    withArchiveFile() { archive =>
+      writeArchive(archive, entries)
+      val fromArchive = read(archive.getCanonicalPath, extraOptions, schema)
+      withTempDir { dir =>
+        entries.foreach { case (name, b) => Files.write(new File(dir, name).toPath, b) }
+        checkAnswer(fromArchive, read(dir.getCanonicalPath, extraOptions, schema).collect().toSeq)
+      }
+    }
+  }
+
+  // ----- tests ---------------------------------------------------------------
+
+  test("read an archive of multiple entries matches the union of the inputs") {
+    archiveExtensions.foreach { ext =>
+      withArchiveFile(ext) { archive =>
+        val parts = Seq(
+          sampleDf((1, "Alice"), (2, "Bob")),
+          sampleDf((3, "Carol")),
+          sampleDf((4, "Dan"), (5, "Eve")))
+        writeArchive(
+          archive, parts.zipWithIndex.map { case (p, i) => entryName(i) -> encodeFile(p) })
+        checkAnswer(read(archive.getCanonicalPath), parts.reduce(_ union _))
+      }
+    }
+  }
+
+  test("archive entries parse like a directory of the same files") {
+    val parts = Seq(sampleDf((1, "Alice"), (2, "Bob")), sampleDf((3, "Carol")))
+    assertArchiveMatchesDir(parts.zipWithIndex.map { case (p, i) => entryName(i) -> encodeFile(p) })
+  }
+
+  test("column pruning selects a subset of columns") {
+    withArchiveFile() { archive =>
+      val data = sampleDf((1, "Alice"), (2, "Bob"))
+      writeArchive(archive, Seq(entryName(0) -> encodeFile(data)))
+      checkAnswer(read(archive.getCanonicalPath).select("name"), Seq(Row("Alice"), Row("Bob")))
+    }
+  }
+
+  test("multiple entries and multiple loose files under a partitioned dir, plus an empty archive") {
+    withTempDir { rootDir =>
+      val partitionDir = new File(rootDir, "dt=2024-01-01")
+      assert(partitionDir.mkdirs())
+
+      val inArchive = Seq(sampleDf((1, "in-archive-a")), sampleDf((2, "in-archive-b")))
+      val loose = Seq(sampleDf((3, "loose-a")), sampleDf((4, "loose-b")))
+      val ext = archiveExtensions.head
+
+      writeArchive(
+        new File(partitionDir, s"data.$ext"),
+        inArchive.zipWithIndex.map { case (p, i) => entryName(i) -> encodeFile(p) })
+      // An empty archive in the same directory must contribute no rows.
+      writeArchive(new File(partitionDir, s"empty.$ext"), Seq.empty)
+      loose.zipWithIndex.foreach { case (p, i) =>
+        Files.write(new File(partitionDir, s"loose-$i.$fileExtension").toPath, encodeFile(p))
+      }
+
+      val expected = (inArchive ++ loose).reduce(_ union _)
+      checkAnswer(read(rootDir.getCanonicalPath).select("id", "name"), expected)
+    }
+  }
+
+  test("a directory of only empty archives yields no rows") {
+    withTempDir { dir =>
+      archiveExtensions.foreach { ext =>
+        writeArchive(new File(dir, s"empty-${ext.replace('.', '_')}.$ext"), Seq.empty)
+      }
+      checkAnswer(read(dir.getCanonicalPath), Seq.empty[Row])
+    }
+  }
+
+  test("an empty archive yields no rows") {
+    withArchiveFile() { archive =>
+      writeArchive(archive, Seq.empty)
+      checkAnswer(read(archive.getCanonicalPath), Seq.empty[Row])
+    }
+  }
+
+  test("an archive always yields a single partition regardless of size") {
+    withArchiveFile() { archive =>
+      val big = sampleDf((1 to 1000).map(i => (i, s"value-$i")): _*)
+      writeArchive(archive, (0 until 4).map(i => entryName(i) -> encodeFile(big)))
+      withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key -> "1024") {
+        val readDf = read(archive.getCanonicalPath)
+        assert(readDf.rdd.getNumPartitions == 1,
+          s"archive should be a single partition; got ${readDf.rdd.getNumPartitions}")
+        assert(readDf.count() == 4L * big.count())
+      }
+    }
+  }
+
+  Seq(true, false).foreach { ignoreCorrupt =>
+    test(s"ignoreCorruptFiles=$ignoreCorrupt controls whether a corrupt archive is skipped") {
+      withArchiveFile(corruptArchiveExtension) { archive =>
+        writeCorruptArchive(archive)
+        withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> ignoreCorrupt.toString) {
+          if (ignoreCorrupt) {
+            checkAnswer(read(archive.getCanonicalPath), Seq.empty[Row])
+          } else {
+            intercept[SparkException](read(archive.getCanonicalPath).collect())
+          }
+        }
+      }
+    }
+  }
+
+  test("a corrupt archive among good ones is skipped whole, not per entry (ignoreCorruptFiles)") {
+    withTempDir { dir =>
+      val good = sampleDf((1, "Alice"), (2, "Bob"))
+      writeArchive(new File(dir, s"good.${archiveExtensions.head}"),
+        Seq(entryName(0) -> encodeFile(good)))
+      writeCorruptArchive(new File(dir, s"bad.$corruptArchiveExtension"))
+      // A tar is one non-splittable unit, so corrupt handling is archive-granular: the corrupt
+      // archive is skipped in its entirety while the good archive's rows are still returned.
+      withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "true") {
+        checkAnswer(read(dir.getCanonicalPath), good)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ArchiveReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ArchiveReaderSuite.scala
new file mode 100644
index 0000000000000..48482aa1f5b3e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ArchiveReaderSuite.scala
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.io.{ByteArrayOutputStream, Closeable, File, FileOutputStream, InputStream, OutputStream}
+import java.nio.charset.StandardCharsets
+import java.util.Properties
+import java.util.zip.GZIPOutputStream
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveOutputStream}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.{SparkFunSuite, TaskContext, TaskContextImpl}
+
+/**
+ * Unit tests for the streaming [[ArchiveReader]] core: `isArchivePath` dispatch and `readEntries`
+ * (entry ordering, gzip handling, dir/dotfile skipping, lazy advance, the non-closing entry
+ * stream, and cleanup). Nothing here touches local disk -- entries are consumed as streams.
+ */
+class ArchiveReaderSuite extends SparkFunSuite {
+
+  private case class Entry(name: String, data: Array[Byte], isDir: Boolean = false)
+
+  private def writeTar(file: File, entries: Seq[Entry]): Unit =
+    writeTarTo(new FileOutputStream(file), entries)
+
+  /** Write a gzipped tar, used to verify the `.tar.gz` / `.tgz` archive paths. */
+  private def writeTarGz(file: File, entries: Seq[Entry]): Unit =
+    writeTarTo(new GZIPOutputStream(new FileOutputStream(file)), entries)
+
+  private def writeTarTo(rawOut: OutputStream, entries: Seq[Entry]): Unit = {
+    val out = new TarArchiveOutputStream(rawOut)
+    try {
+      entries.foreach { e =>
+        // TarArchiveEntry treats a trailing slash in the name as a directory marker.
+        val rawName = if (e.isDir && !e.name.endsWith("/")) e.name + "/" else e.name
+        val tarEntry = new TarArchiveEntry(rawName)
+        if (!e.isDir) tarEntry.setSize(e.data.length.toLong)
+        out.putArchiveEntry(tarEntry)
+        if (!e.isDir) out.write(e.data)
+        out.closeArchiveEntry()
+      }
+      out.finish()
+    } finally out.close()
+  }
+
+  private def textEntry(name: String, body: String): Entry =
+    Entry(name, body.getBytes(StandardCharsets.UTF_8))
+
+  private def readAll(in: InputStream): Array[Byte] = {
+    val out = new ByteArrayOutputStream()
+    val buf = new Array[Byte](4096)
+    var n = in.read(buf)
+    while (n >= 0) {
+      out.write(buf, 0, n)
+      n = in.read(buf)
+    }
+    out.toByteArray
+  }
+
+  /** Drains every entry into `(name, decodedText)` pairs through `ArchiveReader.readEntries`. */
+  private def collect(file: File): Seq[(String, String)] =
+    ArchiveReader(new Path(file.toURI)).readEntries(new Configuration()) { (name, in) =>
+      Iterator.single((name, new String(readAll(in), StandardCharsets.UTF_8)))
+    }.toList
+
+  // ----- isArchivePath ------------------------------------------------------
+
+  test("isArchivePath: positive cases") {
+    Seq(
+      "foo.tar", "FOO.TAR", "/a/b/c/x.tar", "weird.TaR",
+      "foo.tar.gz", "FOO.TAR.GZ", "mixed.Tar.Gz", "/a/b/c/x.tar.gz",
+      "foo.tgz", "FOO.TGZ", "/a/b/c/x.tgz"
+    ).foreach { p =>
+      assert(ArchiveReader.isArchivePath(new Path(p)), s"expected archive match for $p")
+    }
+  }
+
+  test("isArchivePath: negative cases") {
+    Seq("foo.csv", "foo.gz", "foo", "dir/", "foo.tarball", "data.zip",
+        "foo.tar.bz2", "foo.targz").foreach { p =>
+      assert(!ArchiveReader.isArchivePath(new Path(p)), s"expected non-match for $p")
+    }
+  }
+
+  // ----- readEntries --------------------------------------------------------
+
+  test("readEntries: empty tar yields empty iterator") {
+    withTempDir { dir =>
+      val tar = new File(dir, "empty.tar")
+      writeTar(tar, Seq.empty)
+      assert(collect(tar).isEmpty)
+    }
+  }
+
+  test("readEntries: single entry exposes its name and bytes") {
+    withTempDir { dir =>
+      val tar = new File(dir, "single.tar")
+      writeTar(tar, Seq(textEntry("only.csv", "hello\n")))
+      assert(collect(tar) == Seq("only.csv" -> "hello\n"))
+    }
+  }
+
+  test("readEntries: multiple entries chained in tar order") {
+    withTempDir { dir =>
+      val tar = new File(dir, "multi.tar")
+      writeTar(tar, Seq(textEntry("a.csv", "a"), textEntry("b.csv", "b"), textEntry("c.csv", "c")))
+      assert(collect(tar) == Seq("a.csv" -> "a", "b.csv" -> "b", "c.csv" -> "c"))
+    }
+  }
+
+  test("readEntries: gzipped tar (.tar.gz) via Hadoop codec factory") {
+    withTempDir { dir =>
+      val tarGz = new File(dir, "data.tar.gz")
+      writeTarGz(tarGz, Seq(textEntry("a.csv", "a"), textEntry("b.csv", "b")))
+      assert(collect(tarGz) == Seq("a.csv" -> "a", "b.csv" -> "b"))
+    }
+  }
+
+  test("readEntries: gzipped tar (.tgz) via explicit GZIPInputStream wrap") {
+    withTempDir { dir =>
+      val tgz = new File(dir, "data.tgz")
+      writeTarGz(tgz, Seq(textEntry("a.csv", "a"), textEntry("b.csv", "b")))
+      assert(collect(tgz) == Seq("a.csv" -> "a", "b.csv" -> "b"))
+    }
+  }
+
+  test("readEntries: directory entries are skipped") {
+    withTempDir { dir =>
+      val tar = new File(dir, "dirs.tar")
+      writeTar(tar, Seq(
+        Entry("subdir", Array.emptyByteArray, isDir = true),
+        textEntry("subdir/data.csv", "x")))
+      assert(collect(tar) == Seq("subdir/data.csv" -> "x"))
+    }
+  }
+
+  test("readEntries: dotfile entries (e.g. macOS ._foo) are skipped") {
+    withTempDir { dir =>
+      val tar = new File(dir, "dots.tar")
+      writeTar(tar, Seq(
+        textEntry("._real.csv", "junk"),          // macOS AppleDouble sidecar
+        textEntry(".hidden", "ignored"),          // bare dotfile
+        textEntry("real.csv", "kept"),
+        textEntry("nested/._sidecar", "junk2")))  // dotfile in a subdir
+      assert(collect(tar) == Seq("real.csv" -> "kept"))
+    }
+  }
+
+  test("readEntries: advances lazily, one entry at a time") {
+    withTempDir { dir =>
+      val tar = new File(dir, "lazy.tar")
+      writeTar(tar, Seq(textEntry("a.csv", "a"), textEntry("b.csv", "b"), textEntry("c.csv", "c")))
+
+      val opened = ArrayBuffer[String]()
+      // parseEntry yields a single element without reading the stream, so each invocation maps to
+      // exactly one consumed output element -- letting us observe when the next entry is opened.
+      val it = ArchiveReader(new Path(tar.toURI)).readEntries(new Configuration()) { (name, _) =>
+        opened += name
+        Iterator.single(name)
+      }
+
+      // Construction opens only the first entry; later entries open on demand as iteration
+      // crosses each entry boundary (never all upfront).
+      assert(opened.toList == List("a.csv"))
+      assert(it.hasNext)
+      assert(it.next() == "a.csv")
+      // Entry 0 is still in flight until its element is consumed, so entry 1 stays unopened.
+      assert(opened.toList == List("a.csv"))
+      assert(it.next() == "b.csv")
+      assert(opened.toList == List("a.csv", "b.csv"))
+      assert(it.next() == "c.csv")
+      assert(opened.toList == List("a.csv", "b.csv", "c.csv"))
+      assert(!it.hasNext)
+      assert(opened.size == 3)
+    }
+  }
+
+  test("readEntries: a parseEntry that closes its stream still advances to the next entry") {
+    withTempDir { dir =>
+      val tar = new File(dir, "close.tar")
+      writeTar(tar, Seq(textEntry("a.csv", "a"), textEntry("b.csv", "b")))
+
+      val seen = ArrayBuffer[String]()
+      val it = ArchiveReader(new Path(tar.toURI)).readEntries(new Configuration()) { (name, in) =>
+        val body = new String(readAll(in), StandardCharsets.UTF_8)
+        in.close() // must NOT close the underlying archive
+        seen += body
+        Iterator.single(name)
+      }
+      assert(it.toList == List("a.csv", "b.csv"))
+      assert(seen.toList == List("a", "b"))
+    }
+  }
+
+  test("readEntries: close() is safe, idempotent, and stops iteration") {
+    withTempDir { dir =>
+      val tar = new File(dir, "closeable.tar")
+      writeTar(tar, Seq(textEntry("a.csv", "a"), textEntry("b.csv", "b")))
+
+      val it = ArchiveReader(new Path(tar.toURI)).readEntries(new Configuration()) { (name, _) =>
+        Iterator.single(name)
+      }
+      assert(it.hasNext)
+      it.asInstanceOf[Closeable].close()
+      it.asInstanceOf[Closeable].close() // idempotent
+      assert(!it.hasNext)
+    }
+  }
+
+  test("readEntries: TaskContext completion cleans up without error") {
+    withTempDir { dir =>
+      val tar = new File(dir, "ctx.tar")
+      writeTar(tar, Seq(textEntry("a.csv", "a"), textEntry("b.csv", "b")))
+
+      val ctx = new TaskContextImpl(
+        stageId = 0,
+        stageAttemptNumber = 0,
+        partitionId = 0,
+        taskAttemptId = 1L,
+        attemptNumber = 0,
+        numPartitions = 0,
+        taskMemoryManager = null,
+        localProperties = new Properties,
+        metricsSystem = null,
+        cpus = 1)
+      TaskContext.setTaskContext(ctx)
+      try {
+        val it = ArchiveReader(new Path(tar.toURI)).readEntries(new Configuration()) { (name, _) =>
+          Iterator.single(name)
+        }
+        assert(it.hasNext)
+        it.next() // open the archive and register the completion listener
+        // Simulate task completion without exhausting/closing the iterator.
+        ctx.markTaskCompleted(None)
+      } finally {
+        TaskContext.unset()
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVArchiveReadBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVArchiveReadBase.scala
new file mode 100644
index 0000000000000..93dc8db5750f0
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVArchiveReadBase.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.util.Utils
+
+/**
+ * Binds [[ArchiveReadSuiteBase]]'s file-format hooks to CSV. The header-mode-specific tests live in
+ * the [[CSVHeaderArchiveReadBase]] and [[CSVHeaderlessArchiveReadBase]] sub-traits, so the shared
+ * archive tests from [[ArchiveReadSuiteBase]] run for both modes.
+ */
+trait CSVArchiveReadBase extends ArchiveReadSuiteBase {
+
+  /** Whether the archived CSV files are written and read with a header row. */
+  protected def header: Boolean
+
+  override protected def format: String = "csv"
+
+  override protected def fileExtension: String = "csv"
+
+  override protected def readOptions: Map[String, String] = Map("header" -> header.toString)
+
+  override protected def readSchema: String = "id INT, name STRING"
+
+  override protected def encodeFile(
+      df: DataFrame,
+      writeOptions: Map[String, String]): Array[Byte] = {
+    val dir = Utils.createTempDir(namePrefix = "archive-test-encode")
+    try {
+      df.coalesce(1).write.format("csv")
+        .options(Map("header" -> header.toString) ++ writeOptions)
+        .mode("overwrite").save(dir.getCanonicalPath)
+      val parts = dir.listFiles().filter { f =>
+        f.isFile && !f.getName.startsWith("_") && !f.getName.startsWith(".") &&
+          !f.getName.endsWith(".crc")
+      }
+      assert(parts.length == 1,
+        s"expected exactly one data file, got: ${parts.map(_.getName).toList}")
+      Files.readAllBytes(parts.head.toPath)
+    } finally Utils.deleteRecursively(dir)
+  }
+
+  /** Raw CSV bytes, for tests that need precise control over the row layout. */
+  protected def csvBytes(s: String): Array[Byte] = s.getBytes(StandardCharsets.UTF_8)
+
+  test("CSV: reading an archive without a schema fails (inference not yet supported)") {
+    // Schema inference for archives is a follow-up; until then an explicit schema is required, and
+    // an inference attempt raises Spark's standard UNABLE_TO_INFER_SCHEMA error.
+    withArchiveFile() { archive =>
+      writeArchive(archive, Seq(entryName(0) -> encodeFile(sampleDf((1, "Alice"), (2, "Bob")))))
+      val e = intercept[AnalysisException] {
+        spark.read.format(format).options(readOptions).load(archive.getCanonicalPath)
+      }
+      assert(e.getCondition == "UNABLE_TO_INFER_SCHEMA",
+        s"expected UNABLE_TO_INFER_SCHEMA, got ${e.getCondition}: ${e.getMessage}")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVHeaderArchiveReadBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVHeaderArchiveReadBase.scala
new file mode 100644
index 0000000000000..32ad6e818076f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVHeaderArchiveReadBase.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+/**
+ * [[CSVArchiveReadBase]] reading CSV files that carry a header row, plus header-specific archive
+ * tests (mismatched headers, and delimiter/multiline cases whose first row is a header).
+ */
+trait CSVHeaderArchiveReadBase extends CSVArchiveReadBase {
+
+  import testImplicits._
+
+  override protected def header: Boolean = true
+
+  test("CSV: entries with mismatched headers behave like standalone files") {
+    assertArchiveMatchesDir(
+      Seq(
+        entryName(0) -> encodeFile(sampleDf((1, "Alice"), (2, "Bob"))),
+        // A different second-column header: the schema's "name" column is absent from this entry.
+        entryName(1) -> encodeFile(Seq((3, "Carol")).toDF("id", "nickname"))))
+  }
+
+  test("CSV: custom delimiter matches a directory read") {
+    assertArchiveMatchesDir(
+      Seq("a.csv" -> csvBytes("id;name\n1;Alice\n2;Bob\n")),
+      extraOptions = Map("delimiter" -> ";"))
+  }
+
+  test("CSV: multiline quoted fields with embedded newlines match a directory read") {
+    assertArchiveMatchesDir(
+      Seq(
+        "a.csv" -> csvBytes("id,note\n1,\"line1\nline2\"\n2,\"plain\"\n"),
+        "b.csv" -> csvBytes("id,note\n3,\"a\nb\nc\"\n")),
+      extraOptions = Map("multiLine" -> "true"),
+      schema = "id INT, note STRING")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVHeaderTarArchiveReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVHeaderTarArchiveReadSuite.scala
new file mode 100644
index 0000000000000..7ac2ad4084aa0
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVHeaderTarArchiveReadSuite.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+/**
+ * Reads of header-carrying CSV files packed in tar archives (`.tar`/`.tar.gz`/`.tgz`): the shared
+ * archive tests from [[ArchiveReadSuiteBase]] plus the header-mode CSV tests from
+ * [[CSVHeaderArchiveReadBase]], run over tar containers via [[TarArchiveReadBase]].
+ */
+class CSVHeaderTarArchiveReadSuite
+  extends ArchiveReadSuiteBase
+  with CSVHeaderArchiveReadBase
+  with TarArchiveReadBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVHeaderlessArchiveReadBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVHeaderlessArchiveReadBase.scala
new file mode 100644
index 0000000000000..ba4d7f63bb464
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVHeaderlessArchiveReadBase.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+/**
+ * [[CSVArchiveReadBase]] reading headerless CSV files (columns are positional), plus headerless
+ * delimiter/multiline archive tests. The shared archive tests from [[ArchiveReadSuiteBase]] cover
+ * the common headerless read paths.
+ */
+trait CSVHeaderlessArchiveReadBase extends CSVArchiveReadBase {
+
+  override protected def header: Boolean = false
+
+  test("CSV: headerless custom delimiter matches a directory read") {
+    assertArchiveMatchesDir(
+      Seq("a.csv" -> csvBytes("1;Alice\n2;Bob\n"), "b.csv" -> csvBytes("3;Carol\n")),
+      extraOptions = Map("delimiter" -> ";"))
+  }
+
+  test("CSV: headerless multiline quoted fields with embedded newlines match a directory read") {
+    assertArchiveMatchesDir(
+      Seq(
+        "a.csv" -> csvBytes("1,\"line1\nline2\"\n2,\"plain\"\n"),
+        "b.csv" -> csvBytes("3,\"a\nb\nc\"\n")),
+      extraOptions = Map("multiLine" -> "true"),
+      schema = "id INT, note STRING")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVHeaderlessTarArchiveReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVHeaderlessTarArchiveReadSuite.scala
new file mode 100644
index 0000000000000..28316c97ca8c9
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CSVHeaderlessTarArchiveReadSuite.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+/**
+ * Reads of headerless CSV files packed in tar archives (`.tar`/`.tar.gz`/`.tgz`): the shared
+ * archive tests from [[ArchiveReadSuiteBase]] plus the headerless CSV tests from
+ * [[CSVHeaderlessArchiveReadBase]], run over tar containers via [[TarArchiveReadBase]].
+ */
+class CSVHeaderlessTarArchiveReadSuite
+  extends ArchiveReadSuiteBase
+  with CSVHeaderlessArchiveReadBase
+  with TarArchiveReadBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/TarArchiveReadBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/TarArchiveReadBase.scala
new file mode 100644
index 0000000000000..60a73b0891bae
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/TarArchiveReadBase.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.io.{File, FileOutputStream, OutputStream}
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.util.Locale
+import java.util.zip.GZIPOutputStream
+
+import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveOutputStream}
+
+/**
+ * Binds [[ArchiveReadSuiteBase]]'s archive-format hooks to tar containers: plain `.tar`, gzipped
+ * `.tar.gz`, and `.tgz`. Reusable across file formats -- a `<format>TarArchiveReadSuite` mixes this
+ * in alongside the file-format trait.
+ */
+trait TarArchiveReadBase extends ArchiveReadSuiteBase {
+
+  override protected def archiveExtensions: Seq[String] = Seq("tar", "tar.gz", "tgz")
+
+  override protected def corruptArchiveExtension: String = "tar.gz"
+
+  override protected def writeArchive(dest: File, entries: Seq[(String, Array[Byte])]): Unit = {
+    val name = dest.getName.toLowerCase(Locale.ROOT)
+    val rawOut: OutputStream = if (name.endsWith(".gz") || name.endsWith(".tgz")) {
+      new GZIPOutputStream(new FileOutputStream(dest))
+    } else {
+      new FileOutputStream(dest)
+    }
+    val out = new TarArchiveOutputStream(rawOut)
+    try {
+      entries.foreach { case (entryName, bytes) =>
+        val entry = new TarArchiveEntry(entryName)
+        entry.setSize(bytes.length.toLong)
+        out.putArchiveEntry(entry)
+        out.write(bytes)
+        out.closeArchiveEntry()
+      }
+      out.finish()
+    } finally out.close()
+  }
+
+  override protected def writeCorruptArchive(dest: File): Unit =
+    Files.write(dest.toPath, "this is not a valid gzip-compressed tar archive"
+      .getBytes(StandardCharsets.UTF_8))
+}