Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -8171,6 +8171,12 @@
],
"sqlState" : "42K0E"
},
"UNSUPPORTED_HIVE_METASTORE_VERSION_FOR_JAVA" : {
"message" : [
"Hive metastore version <version> requires Java <requiredJavaVersion> or later, but the current JVM is Java <currentJavaVersion>. Please upgrade your Java version or use an earlier Hive metastore version."
],
"sqlState" : "0A000"
},
"UNSUPPORTED_INSERT" : {
"message" : [
"Can't insert into the target."
Expand Down
3 changes: 2 additions & 1 deletion docs/sql-data-sources-hive-tables.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ The following options can be used to configure the version of Hive that is used
<td><code>2.3.10</code></td>
<td>
Version of the Hive metastore. Available
options are <code>2.0.0</code> through <code>2.3.10</code>, <code>3.0.0</code> through <code>3.1.3</code>, and <code>4.0.0</code> through <code>4.1.0</code>.
options are <code>2.0.0</code> through <code>2.3.10</code>, <code>3.0.0</code> through <code>3.1.3</code>, and <code>4.0.0</code> through <code>4.2.0</code>.
Note: Hive 4.2 requires Java 21 or later.
</td>
<td>1.4.0</td>
</tr>
Expand Down
2 changes: 1 addition & 1 deletion docs/sql-migration-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -1070,7 +1070,7 @@ Python UDF registration is unchanged.
Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs.
Currently, Hive SerDes and UDFs are based on built-in Hive,
and Spark SQL can be connected to different versions of Hive Metastore
(from 2.0.0 to 2.3.10 and 3.0.0 to 4.1.0. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore).
(from 2.0.0 to 2.3.10 and 3.0.0 to 4.2.0). Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore).

#### Deploying in Existing Hive Warehouses
{:.no_toc}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1702,6 +1702,18 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
"key" -> key))
}

def unsupportedHiveMetastoreVersionForJavaError(
version: String,
requiredJavaVersion: Int,
currentJavaVersion: Int): SparkUnsupportedOperationException = {
new SparkUnsupportedOperationException(
errorClass = "UNSUPPORTED_HIVE_METASTORE_VERSION_FOR_JAVA",
messageParameters = Map(
"version" -> version,
"requiredJavaVersion" -> requiredJavaVersion.toString,
"currentJavaVersion" -> currentJavaVersion.toString))
}

def loadHiveClientCausesNoClassDefFoundError(
cnf: NoClassDefFoundError,
execJars: Seq[URL],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ private[spark] object HiveUtils extends Logging {
.doc("Version of the Hive metastore. Available options are " +
"<code>2.0.0</code> through <code>2.3.10</code>, " +
"<code>3.0.0</code> through <code>3.1.3</code> and " +
"<code>4.0.0</code> through <code>4.1.0</code>.")
"<code>4.0.0</code> through <code>4.2.0</code>. " +
"Note: Hive 4.2 requires Java 21 or later.")
.version("1.4.0")
.stringConf
.checkValue(isCompatibleHiveVersion, "Unsupported Hive Metastore version")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ private[hive] class HiveClientImpl(
case hive.v3_1 => new Shim_v3_1()
case hive.v4_0 => new Shim_v4_0()
case hive.v4_1 => new Shim_v4_1()
case hive.v4_2 => new Shim_v4_2()
}

// Create an internal session state for this HiveClientImpl.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1545,3 +1545,5 @@ private[client] class Shim_v4_0 extends Shim_v3_1 {
}

private[client] class Shim_v4_1 extends Shim_v4_0

private[client] class Shim_v4_2 extends Shim_v4_1
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ private[hive] object IsolatedClientLoader extends Logging {
case (3, 1, _) => Some(hive.v3_1)
case (4, 0, _) => Some(hive.v4_0)
case (4, 1, _) => Some(hive.v4_1)
case (4, 2, _) => Some(hive.v4_2)
case _ => None
}.getOrElse {
throw QueryExecutionErrors.unsupportedHiveMetastoreVersionError(
Expand Down Expand Up @@ -195,6 +196,14 @@ private[hive] class IsolatedClientLoader(
val barrierPrefixes: Seq[String] = Seq.empty)
extends Logging {

// Hive 4.2 requires Java 21 or later. The guard lives on the client-construction path rather
// than in IsolatedClientLoader.hiveVersion, which is also used for version-string validation,
// so the actionable message reaches the user instead of being swallowed.
if (version == hive.v4_2 && !Utils.isJavaVersionAtLeast21) {
throw QueryExecutionErrors.unsupportedHiveMetastoreVersionForJavaError(
version.fullVersion, 21, Runtime.version().feature())
}

/**
* This controls whether the generated clients maintain an independent/isolated copy of the
* Hive `SessionState`. If false, the Hive will leverage the global/static copy of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,38 @@ package object client {
}
})

// Hive 4.2 ships with datanucleus-api-jdo:6.0.3 (down from 6.0.5), datanucleus-core:6.0.10
// (down from 6.0.11), and javax.jdo:3.2.0-release (down from 3.2.1) relative to Hive 4.1.
// These reflect the actual Hive 4.2 POM and must not be "upgraded" to v4_1 values.
// Derby was bumped to 10.17.1.0 (from 10.14.1.0 in v4_1) for Java 21 compatibility.
case object v4_2 extends HiveVersion("4.2.0",
extraDeps =
"org.antlr:antlr4-runtime:4.9.3" ::
"org.apache.derby:derby:10.17.1.0" ::
"org.apache.hadoop:hadoop-hdfs:3.4.1" ::
"org.datanucleus:datanucleus-api-jdo:6.0.3" ::
"org.datanucleus:datanucleus-core:6.0.10" ::
"org.datanucleus:datanucleus-rdbms:6.0.10" ::
"org.datanucleus:javax.jdo:3.2.0-release" ::
"org.springframework:spring-core:5.3.39" ::
"org.springframework:spring-jdbc:5.3.39" :: Nil,
exclusions =
"org.apache.curator:*" ::
"org.apache.hive:hive-service-rpc" ::
"org.apache.zookeeper:zookeeper" :: Nil ++
{
if (!Utils.isTesting) {
// HiveClientImpl#runHive which is used for testing refers
// `org.apache.hadoop.hive.ql.DriverContext` indirectly and `DriverContext` refers
// Tez APIs.
Seq("org.apache.tez:tez-api")
} else {
Seq.empty
}
})

val allSupportedHiveVersions: Set[HiveVersion] =
Set(v2_0, v2_1, v2_2, v2_3, v3_0, v3_1, v4_0, v4_1)
Set(v2_0, v2_1, v2_2, v2_3, v3_0, v3_1, v4_0, v4_1, v4_2)
}
// scalastyle:on

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,21 @@

package org.apache.spark.sql.hive.client

import org.apache.spark.util.Utils

private[client] trait HiveClientVersions {
private val testVersions = sys.env.get("SPARK_TEST_HIVE_CLIENT_VERSIONS")
protected val versions = if (testVersions.nonEmpty) {
private val allVersions = if (testVersions.nonEmpty) {
testVersions.get.split(",").map(_.trim).filter(_.nonEmpty).toIndexedSeq
} else {
IndexedSeq("2.0", "2.1", "2.2", "2.3", "3.0", "3.1", "4.0", "4.1")
IndexedSeq("2.0", "2.1", "2.2", "2.3", "3.0", "3.1", "4.0", "4.1", "4.2")
}

protected val versions: IndexedSeq[String] = {
if (Utils.isJavaVersionAtLeast21) {
allVersions
} else {
allVersions.filterNot(_ == "4.2")
}
}
}