Skip to content

Commit 6f40a15

Browse files
committed
build: compare spark_version numerically via isSparkAtLeast helper
Per /ultrareview feedback: the five `"$spark_version" >= "3.5.0"` checks were lexicographic string comparisons. They happened to work for 3.5.0 and 4.0.2 only because '4' > '3' as chars — a future "3.10.0" release would compare less than "3.5.0" and silently drop the Spark 3.5+ dependencies and exclusions. Introduce an `isSparkAtLeast` closure that tokenizes on `.` and `-`, keeps numeric parts, and compares component-by-component. Replace all five call sites.
1 parent 95c0af5 commit 6f40a15

File tree

1 file changed

+15
-5
lines changed

1 file changed

+15
-5
lines changed

runners/spark/spark_runner.gradle

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,16 @@ applyJavaNature(
3636

3737
description = "Apache Beam :: Runners :: Spark $spark_version"
3838

39+
// Numeric version comparison (lexicographic string compare was fragile — e.g. "3.10.0" < "3.5.0").
40+
def isSparkAtLeast = { String minVersion ->
41+
def parts = spark_version.tokenize('.-').findAll { it.isInteger() }*.toInteger()
42+
def minParts = minVersion.tokenize('.')*.toInteger()
43+
for (int i = 0; i < Math.min(parts.size(), minParts.size()); i++) {
44+
if (parts[i] != minParts[i]) return parts[i] > minParts[i]
45+
}
46+
return parts.size() >= minParts.size()
47+
}
48+
3949
/*
4050
* We need to rely on manually specifying these evaluationDependsOn to ensure that
4151
* the following projects are evaluated before we evaluate this project. This is because
@@ -177,7 +187,7 @@ dependencies {
177187
spark.components.each { component ->
178188
provided "$component:$spark_version"
179189
}
180-
if ("$spark_version" >= "3.5.0") {
190+
if (isSparkAtLeast("3.5.0")) {
181191
implementation "org.apache.spark:spark-common-utils_$spark_scala_version:$spark_version"
182192
implementation "org.apache.spark:spark-sql-api_$spark_scala_version:$spark_version"
183193
}
@@ -214,7 +224,7 @@ dependencies {
214224
testImplementation library.java.mockito_core
215225
testImplementation "org.assertj:assertj-core:3.11.1"
216226
testImplementation "org.apache.zookeeper:zookeeper:3.4.11"
217-
if ("$spark_version" >= "3.5.0") {
227+
if (isSparkAtLeast("3.5.0")) {
218228
testImplementation "org.apache.spark:spark-common-utils_$spark_scala_version:$spark_version"
219229
testImplementation "org.apache.spark:spark-sql-api_$spark_scala_version:$spark_version"
220230
}
@@ -228,7 +238,7 @@ dependencies {
228238
"hadoopVersion$kv.key" "org.apache.hadoop:hadoop-common:$kv.value"
229239
// Force paranamer 2.8 to avoid issues when using Scala 2.12
230240
"hadoopVersion$kv.key" "com.thoughtworks.paranamer:paranamer:2.8"
231-
if ("$spark_version" >= "3.5.0") {
241+
if (isSparkAtLeast("3.5.0")) {
232242
// Add log4j 2.x dependencies as Spark 3.5+ uses slf4j with log4j 2.x backend
233243
"hadoopVersion$kv.key" library.java.log4j2_api
234244
"hadoopVersion$kv.key" library.java.log4j2_core
@@ -254,7 +264,7 @@ configurations.validatesRunner {
254264
// Exclude to make sure log4j binding is used
255265
exclude group: "org.slf4j", module: "slf4j-simple"
256266

257-
if ("$spark_version" >= "3.5.0") {
267+
if (isSparkAtLeast("3.5.0")) {
258268
// Exclude log4j 1.x dependencies to prevent conflict with log4j 2.x used by spark 3.5+
259269
exclude group: "log4j", module: "log4j"
260270
}
@@ -265,7 +275,7 @@ hadoopVersions.each { kv ->
265275
resolutionStrategy {
266276
force "org.apache.hadoop:hadoop-common:$kv.value"
267277
}
268-
if ("$spark_version" >= "3.5.0") {
278+
if (isSparkAtLeast("3.5.0")) {
269279
// Exclude log4j 1.x dependencies to prevent conflict with log4j 2.x used by spark 3.5+
270280
exclude group: "log4j", module: "log4j"
271281
}

0 commit comments

Comments
 (0)