Skip to content

Commit 425f9c9

Browse files
authored
build: Enable Spark SQL tests for Spark 4.1.1 (#4093)
1 parent 3ed80e9 commit 425f9c9

4 files changed

Lines changed: 4421 additions & 3 deletions

File tree

.github/actions/setup-spark-builder/action.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,23 @@ runs:
6767
run: |
6868
# Native library should already be in native/target/release/
6969
./mvnw install -Prelease -DskipTests -Pspark-${{inputs.spark-short-version}}
70+
71+
- name: Purge partial Maven cache entries
72+
shell: bash
73+
run: |
74+
# Comet's Maven phase resolves the dependency graph and downloads POMs
75+
# for transitive artifacts whose JARs it never actually needs. When sbt
76+
# then resolves Spark's deps, Coursier sees the POM in mavenLocal,
77+
# declares the artifact "found locally", and fails on the missing JAR
78+
# without falling back to Maven Central. Delete those partial entries
79+
# so sbt re-fetches the full artifact remotely.
80+
for repo in "$HOME/.m2/repository" /root/.m2/repository; do
81+
[ -d "$repo" ] || continue
82+
find "$repo" -name '*.pom' | while read -r pom; do
83+
jar="${pom%.pom}.jar"
84+
[ -f "$jar" ] && continue
85+
grep -q '<packaging>jar</packaging>\|<packaging>bundle</packaging>' "$pom" 2>/dev/null || continue
86+
rm -f "$pom" "${pom}.sha1" "${pom%.pom}.pom.lastUpdated" \
87+
"$(dirname "$pom")/_remote.repositories"
88+
done
89+
done

.github/workflows/spark_sql_test.yml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ jobs:
126126
needs: build-native
127127
strategy:
128128
matrix:
129-
os: [ubuntu-24.04]
130129
module:
131130
- {name: "catalyst", args1: "catalyst/test", args2: ""}
132131
- {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
@@ -142,12 +141,17 @@ jobs:
142141
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'auto'}
143142
- {spark-short: '4.0', spark-full: '4.0.2', java: 17, scan-impl: 'auto'}
144143
- {spark-short: '4.0', spark-full: '4.0.2', java: 21, scan-impl: 'auto'}
144+
- {spark-short: '4.1', spark-full: '4.1.1', java: 17, scan-impl: 'auto'}
145145
fail-fast: false
146146
name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}
147-
runs-on: ${{ matrix.os }}
147+
# Hive tests stay on the standard GitHub-hosted runner: HiveSparkSubmitSuite
148+
# relies on an Ivy 'local-m2-cache' resolver that the runs-on.com
149+
# ubuntu24-full-x64 image does not provide, so spark-submit fails there.
150+
runs-on: ${{ startsWith(matrix.module.name, 'sql_hive') && 'ubuntu-24.04' || (github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest') }}
148151
container:
149152
image: amd64/rust
150153
steps:
154+
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
151155
- uses: actions/checkout@v6
152156
- name: Setup Rust & Java toolchain
153157
uses: ./.github/actions/setup-builder
@@ -170,7 +174,7 @@ jobs:
170174
cd apache-spark
171175
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
172176
NOLINT_ON_COMPILE=true ENABLE_COMET=true ENABLE_COMET_ONHEAP=true COMET_PARQUET_SCAN_IMPL=${{ matrix.config.scan-impl }} ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
173-
build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
177+
build/sbt -Dsbt.log.noformat=true -mem 6144 ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
174178
if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
175179
find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
176180
fi

0 commit comments

Comments
 (0)