Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 58 additions & 9 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,23 @@ jobs:
matrix:
os: [ubuntu-22.04]
scala: [2.13, 2.12]
java: [temurin@8]
project: [root-spark33, root-spark34, root-spark35]
java: [temurin@8, temurin@17]
project: [root-spark33, root-spark34, root-spark35, root-spark40]
exclude:
- scala: 2.13
project: root-spark33
- scala: 2.13
project: root-spark34
- scala: 2.12
project: root-spark40
- java: temurin@17
project: root-spark33
- java: temurin@17
project: root-spark34
- java: temurin@17
project: root-spark35
- java: temurin@8
project: root-spark40
runs-on: ${{ matrix.os }}
timeout-minutes: 60
steps:
Expand All @@ -62,6 +72,19 @@ jobs:
if: matrix.java == 'temurin@8' && steps.setup-java-temurin-8.outputs.cache-hit == 'false'
run: sbt +update

- name: Setup Java (temurin@17)
id: setup-java-temurin-17
if: matrix.java == 'temurin@17'
uses: actions/setup-java@v5
with:
distribution: temurin
java-version: 17
cache: sbt

- name: sbt update
if: matrix.java == 'temurin@17' && steps.setup-java-temurin-17.outputs.cache-hit == 'false'
run: sbt +update

- name: Check that workflows are up to date
run: sbt githubWorkflowCheck

Expand Down Expand Up @@ -115,6 +138,19 @@ jobs:
if: matrix.java == 'temurin@8' && steps.setup-java-temurin-8.outputs.cache-hit == 'false'
run: sbt +update

- name: Setup Java (temurin@17)
id: setup-java-temurin-17
if: matrix.java == 'temurin@17'
uses: actions/setup-java@v5
with:
distribution: temurin
java-version: 17
cache: sbt

- name: sbt update
if: matrix.java == 'temurin@17' && steps.setup-java-temurin-17.outputs.cache-hit == 'false'
run: sbt +update

- name: Import signing key
if: env.PGP_SECRET != '' && env.PGP_PASSPHRASE == ''
env:
Expand Down Expand Up @@ -169,18 +205,31 @@ jobs:
if: matrix.java == 'temurin@8' && steps.setup-java-temurin-8.outputs.cache-hit == 'false'
run: sbt +update

- name: Setup Java (temurin@17)
id: setup-java-temurin-17
if: matrix.java == 'temurin@17'
uses: actions/setup-java@v5
with:
distribution: temurin
java-version: 17
cache: sbt

- name: sbt update
if: matrix.java == 'temurin@17' && steps.setup-java-temurin-17.outputs.cache-hit == 'false'
run: sbt +update

- name: Submit Dependencies
uses: scalacenter/sbt-dependency-submission@v2
with:
modules-ignore: root-spark33_2.13 root-spark33_2.12 docs_2.13 docs_2.12 root-spark34_2.13 root-spark34_2.12 root-spark35_2.13 root-spark35_2.12
modules-ignore: root-spark33_2.13 root-spark33_2.12 docs_2.13 docs_2.12 root-spark34_2.13 root-spark34_2.12 root-spark35_2.13 root-spark35_2.12 root-spark40_2.13
configs-ignore: test scala-tool scala-doc-tool test-internal

site:
name: Generate Site
strategy:
matrix:
os: [ubuntu-22.04]
java: [temurin@11]
java: [temurin@17]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout current branch (full)
Expand All @@ -204,17 +253,17 @@ jobs:
if: matrix.java == 'temurin@8' && steps.setup-java-temurin-8.outputs.cache-hit == 'false'
run: sbt +update

- name: Setup Java (temurin@11)
id: setup-java-temurin-11
if: matrix.java == 'temurin@11'
- name: Setup Java (temurin@17)
id: setup-java-temurin-17
if: matrix.java == 'temurin@17'
uses: actions/setup-java@v5
with:
distribution: temurin
java-version: 11
java-version: 17
cache: sbt

- name: sbt update
if: matrix.java == 'temurin@11' && steps.setup-java-temurin-11.outputs.cache-hit == 'false'
if: matrix.java == 'temurin@17' && steps.setup-java-temurin-17.outputs.cache-hit == 'false'
run: sbt +update

- name: Generate site
Expand Down
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ The compatible versions of [Spark](http://spark.apache.org/) and

| Frameless | Spark | Cats | Cats-Effect | Scala |
|-----------|-----------------------------|----------|-------------|-------------|
| 0.17.0 | 4.0.2† / 3.5.8 / 3.4.4 / 3.3.4 | 2.x | 3.x | 2.12 / 2.13 |
| 0.16.0 | 3.5.0 / 3.4.0 / 3.3.0 | 2.x | 3.x | 2.12 / 2.13 |
| 0.15.0 | 3.4.0 / 3.3.0 / 3.2.2 | 2.x | 3.x | 2.12 / 2.13 |
| 0.14.1 | 3.4.0 / 3.3.0 / 3.2.2 | 2.x | 3.x | 2.12 / 2.13 |
Expand All @@ -46,16 +47,19 @@ The compatible versions of [Spark](http://spark.apache.org/) and

_\* 0.11.0 has broken Spark 3.1.2 and 3.0.1 artifacts published._

_† The Spark 4.0.x artifacts (`-spark40`) are published for **Scala 2.13 only** and require **JDK 17+**, since Spark 4 dropped Scala 2.12 and JDK 8/11. The default (unsuffixed) artifacts still target Spark 3.5._

Starting 0.11 we introduced Spark cross published artifacts:

* By default, frameless artifacts depend on the most recent Spark version
* Suffix `-spark{major}{minor}` is added to artifacts that are released for the previous Spark version(s)

Artifact names examples:

* `frameless-dataset` (the latest Spark dependency)
* `frameless-dataset` (the default Spark 3.5.x dependency)
* `frameless-dataset-spark40` (Spark 4.0.x dependency; Scala 2.13 + JDK 17 only)
* `frameless-dataset-spark34` (Spark 3.4.x dependency)
* `frameless-dataset-spark33` (Spark 3.3.x dependency)
* `frameless-dataset-spark32` (Spark 3.2.x dependency)

Versions 0.5.x and 0.6.x have identical features. The first is compatible with Spark 2.2.1 and the second with 2.3.0.

Expand Down
155 changes: 127 additions & 28 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
val sparkVersion = "3.5.8"
val spark40Version = "4.0.2"
val spark34Version = "3.4.4"
val spark33Version = "3.3.4"
val catsCoreVersion = "2.13.0"
Expand Down Expand Up @@ -26,12 +27,25 @@ lazy val root = project
.enablePlugins(NoPublishPlugin)
.settings(crossScalaVersions := Nil)
.aggregate(
`root-spark40`,
`root-spark35`,
`root-spark34`,
`root-spark33`,
docs
)

lazy val `root-spark40` = project
.in(file(".spark40"))
.enablePlugins(NoPublishPlugin)
.settings(crossScalaVersions := Seq(Scala213))
.aggregate(
core,
`cats-spark40`,
`dataset-spark40`,
`refined-spark40`,
`ml-spark40`
)

lazy val `root-spark35` = project
.in(file(".spark35"))
.enablePlugins(NoPublishPlugin)
Expand Down Expand Up @@ -76,6 +90,15 @@ lazy val `cats-spark34` = project
`dataset-spark34` % "test->test;compile->compile;provided->provided"
)

lazy val `cats-spark40` = project
.settings(name := "frameless-cats-spark40")
.settings(sourceDirectory := (cats / sourceDirectory).value)
.settings(catsSettings)
.settings(spark40Settings)
.dependsOn(
`dataset-spark40` % "test->test;compile->compile;provided->provided"
)

lazy val `cats-spark33` = project
.settings(name := "frameless-cats-spark33")
.settings(sourceDirectory := (cats / sourceDirectory).value)
Expand Down Expand Up @@ -111,6 +134,20 @@ lazy val `dataset-spark34` = project
.settings(spark34Settings)
.dependsOn(core % "test->test;compile->compile")

lazy val `dataset-spark40` = project
.settings(name := "frameless-dataset-spark40")
.settings(sourceDirectory := (dataset / sourceDirectory).value)
.settings(
Compile / unmanagedSourceDirectories += (dataset / baseDirectory).value / "src" / "main" / "spark-4"
)
.settings(
Test / unmanagedSourceDirectories += (dataset / baseDirectory).value / "src" / "test" / "spark-3.3+"
)
.settings(datasetSettings)
.settings(sparkDependencies(spark40Version))
.settings(spark40Settings)
.dependsOn(core % "test->test;compile->compile")

lazy val `dataset-spark33` = project
.settings(name := "frameless-dataset-spark33")
.settings(sourceDirectory := (dataset / sourceDirectory).value)
Expand Down Expand Up @@ -139,6 +176,15 @@ lazy val `refined-spark34` = project
`dataset-spark34` % "test->test;compile->compile;provided->provided"
)

lazy val `refined-spark40` = project
.settings(name := "frameless-refined-spark40")
.settings(sourceDirectory := (refined / sourceDirectory).value)
.settings(refinedSettings)
.settings(spark40Settings)
.dependsOn(
`dataset-spark40` % "test->test;compile->compile;provided->provided"
)

lazy val `refined-spark33` = project
.settings(name := "frameless-refined-spark33")
.settings(sourceDirectory := (refined / sourceDirectory).value)
Expand Down Expand Up @@ -168,6 +214,17 @@ lazy val `ml-spark34` = project
`dataset-spark34` % "test->test;compile->compile;provided->provided"
)

lazy val `ml-spark40` = project
.settings(name := "frameless-ml-spark40")
.settings(sourceDirectory := (ml / sourceDirectory).value)
.settings(mlSettings)
.settings(sparkMlDependencies(spark40Version))
.settings(spark40Settings)
.dependsOn(
core % "test->test;compile->compile",
`dataset-spark40` % "test->test;compile->compile;provided->provided"
)

lazy val `ml-spark33` = project
.settings(name := "frameless-ml-spark33")
.settings(sourceDirectory := (ml / sourceDirectory).value)
Expand All @@ -191,7 +248,14 @@ lazy val docs = project
"org.typelevel" % "kind-projector" % "0.13.4" cross CrossVersion.full
),
scalacOptions += "-Ydelambdafy:inline",
libraryDependencies += "org.typelevel" %% "mouse" % "1.3.2"
libraryDependencies += "org.typelevel" %% "mouse" % "1.3.2",
// mdoc executes Spark code via `Compile / runMain`; on JDK 17 (the site CI job) Spark
// needs the module --add-opens flags, so fork the run and pass them through. Forking
// changes the working directory, so pin it to the repo root where the docs read their
// relative data files (e.g. docs/iris.data).
Compile / run / fork := true,
Compile / run / javaOptions ++= sparkJava17Options,
Compile / run / baseDirectory := (LocalRootProject / baseDirectory).value
)
.dependsOn(dataset, cats, ml)

Expand Down Expand Up @@ -241,7 +305,13 @@ lazy val datasetSettings =
mc("frameless.functions.FramelessLit"),
mc(f"frameless.functions.FramelessLit$$"),
dmm("frameless.functions.package.litAggr"),
dmm("org.apache.spark.sql.FramelessInternals.column")
dmm("org.apache.spark.sql.FramelessInternals.column"),
// FramelessInternals is internal plumbing (Spark-version compat seam), not part of
// the intended public API. Spark 4 required reworking it: `column` is now the
// Expression->Column bridge and `mkDataset` derives the session from the source
// Dataset instead of taking a SQLContext.
imt("org.apache.spark.sql.FramelessInternals.column"),
imt("org.apache.spark.sql.FramelessInternals.mkDataset")
)
},
coverageExcludedPackages := "org.apache.spark.sql.reflection",
Expand Down Expand Up @@ -304,6 +374,27 @@ lazy val scalacOptionSettings = Def.setting {
baseScalacOptions(scalaVersion.value)
}

// JVM flags Spark needs on JDK 17+ (the module system blocks its reflective access
// to java.base internals otherwise). Empty on JDK 8/11. Reused by tests and the docs run.
lazy val sparkJava17Options: Seq[String] =
if (sys.props("java.specification.version").toDouble >= 17.0) {
Seq(
"--add-opens=java.base/java.lang=ALL-UNNAMED",
"--add-opens=java.base/java.lang.invoke=ALL-UNNAMED",
"--add-opens=java.base/java.lang.reflect=ALL-UNNAMED",
"--add-opens=java.base/java.io=ALL-UNNAMED",
"--add-opens=java.base/java.net=ALL-UNNAMED",
"--add-opens=java.base/java.nio=ALL-UNNAMED",
"--add-opens=java.base/java.util=ALL-UNNAMED",
"--add-opens=java.base/java.util.concurrent=ALL-UNNAMED",
"--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED",
"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED",
"--add-opens=java.base/sun.nio.cs=ALL-UNNAMED",
"--add-opens=java.base/sun.security.action=ALL-UNNAMED",
"--add-opens=java.base/sun.util.calendar=ALL-UNNAMED"
)
} else Seq.empty

lazy val framelessSettings = Seq(
scalacOptions ++= scalacOptionSettings.value,
Test / testOptions += Tests.Argument(TestFrameworks.ScalaTest, "-oDF"),
Expand All @@ -313,28 +404,7 @@ lazy val framelessSettings = Seq(
"org.scalatestplus" %% "scalatestplus-scalacheck" % scalatestplus % Test,
"org.scalacheck" %% "scalacheck" % scalacheck % Test
),
Test / javaOptions ++= {
val baseOptions = Seq("-Xmx1G", "-ea")
val java17Options =
if (sys.props("java.specification.version").toDouble >= 17.0) {
Seq(
"--add-opens=java.base/java.lang=ALL-UNNAMED",
"--add-opens=java.base/java.lang.invoke=ALL-UNNAMED",
"--add-opens=java.base/java.lang.reflect=ALL-UNNAMED",
"--add-opens=java.base/java.io=ALL-UNNAMED",
"--add-opens=java.base/java.net=ALL-UNNAMED",
"--add-opens=java.base/java.nio=ALL-UNNAMED",
"--add-opens=java.base/java.util=ALL-UNNAMED",
"--add-opens=java.base/java.util.concurrent=ALL-UNNAMED",
"--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED",
"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED",
"--add-opens=java.base/sun.nio.cs=ALL-UNNAMED",
"--add-opens=java.base/sun.security.action=ALL-UNNAMED",
"--add-opens=java.base/sun.util.calendar=ALL-UNNAMED"
)
} else Seq.empty
baseOptions ++ java17Options
},
Test / javaOptions ++= Seq("-Xmx1G", "-ea") ++ sparkJava17Options,
Test / fork := true,
Test / parallelExecution := false,
mimaPreviousArtifacts ~= {
Expand All @@ -352,6 +422,15 @@ lazy val framelessSettings = Seq(
libraryDependencySchemes += "org.scala-lang.modules" %% "scala-xml" % VersionScheme.Always
) ++ consoleSettings

lazy val spark40Settings = Seq[Setting[_]](
// Spark 4 dropped Scala 2.12 support; this module is 2.13-only.
crossScalaVersions := Seq(Scala213),
scalaVersion := Scala213,
tlVersionIntroduced := Map("2.13" -> "0.17.0"),
// Brand-new artifact: no previously published version to check binary compatibility against.
mimaPreviousArtifacts := Set.empty
)

lazy val spark34Settings = Seq[Setting[_]](
tlVersionIntroduced := Map("2.12" -> "0.14.1", "2.13" -> "0.14.1"),
mimaPreviousArtifacts := Set(
Expand Down Expand Up @@ -427,12 +506,32 @@ ThisBuild / developers := List(
ThisBuild / tlCiReleaseBranches := Seq("master")
ThisBuild / tlSitePublishBranch := Some("master")

val roots = List("root-spark33", "root-spark34", "root-spark35")
// Spark 3.x roots: 3.3/3.4 build on 2.12 only, 3.5 builds on both 2.12 and 2.13.
val spark3Roots = List("root-spark33", "root-spark34", "root-spark35")
// Spark 4.x roots: Scala 2.13 only (Spark 4 dropped 2.12).
val spark4Roots = List("root-spark40")
val roots = spark3Roots ++ spark4Roots

// Spark 3.x builds/tests on JDK 8; Spark 4 requires JDK 17+.
val spark3Java = JavaSpec.temurin("8")
val spark4Java = JavaSpec.temurin("17")

ThisBuild / githubWorkflowJavaVersions := Seq(spark3Java, spark4Java)

ThisBuild / githubWorkflowBuildMatrixAdditions += "project" -> roots

ThisBuild / githubWorkflowBuildMatrixExclusions ++= roots.init.map { project =>
MatrixExclude(Map("scala" -> "2.13", "project" -> project))
}
ThisBuild / githubWorkflowBuildMatrixExclusions ++=
// 3.3/3.4 are 2.12-only; 3.5 builds both. Spark 4 is 2.13-only.
spark3Roots.init.map { project =>
MatrixExclude(Map("scala" -> "2.13", "project" -> project))
} ++ spark4Roots.map { project =>
MatrixExclude(Map("scala" -> "2.12", "project" -> project))
} ++
// Pin each Spark line to its JDK: 3.x on JDK 8, 4.x on JDK 17.
spark3Roots.map { project =>
MatrixExclude(Map("java" -> spark4Java.render, "project" -> project))
} ++ spark4Roots.map { project =>
MatrixExclude(Map("java" -> spark3Java.render, "project" -> project))
}

ThisBuild / githubWorkflowEnv += "SBT_OPTS" -> "-Xms1g -Xmx4g"
Loading
Loading