Skip to content

Commit bb9be66

Browse files
Tobias Kaymakclaude
andcommitted
build: add Spark 4 job-server and container modules
Add job-server and container build configurations for Spark 4, mirroring the existing Spark 3 job-server setup. The container uses eclipse-temurin:17 (Spark 4 requires Java 17). The shared spark_job_server.gradle gains a requireJavaVersion conditional for Spark 4 parent projects. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d226b0d commit bb9be66

6 files changed

Lines changed: 117 additions & 2 deletions

File tree

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
def basePath = '../../job-server'
20+
21+
project.ext {
22+
// Look for the source code in the parent module
23+
main_source_dirs = ["$basePath/src/main/java"]
24+
test_source_dirs = ["$basePath/src/test/java"]
25+
main_resources_dirs = ["$basePath/src/main/resources"]
26+
test_resources_dirs = ["$basePath/src/test/resources"]
27+
archives_base_name = 'beam-runners-spark-4-job-server'
28+
}
29+
30+
// Load the main build script which contains all build logic.
31+
apply from: "$basePath/spark_job_server.gradle"
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
###############################################################################
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
###############################################################################
18+
19+
FROM eclipse-temurin:17
20+
MAINTAINER "Apache Beam <dev@beam.apache.org>"
21+
22+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y libltdl7
23+
24+
ADD beam-runners-spark-job-server.jar /opt/apache/beam/jars/
25+
ADD spark-job-server.sh /opt/apache/beam/
26+
27+
WORKDIR /opt/apache/beam
28+
29+
COPY target/LICENSE /opt/apache/beam/
30+
COPY target/NOTICE /opt/apache/beam/
31+
32+
ENTRYPOINT ["./spark-job-server.sh"]
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
def basePath = '../../../job-server/container'
20+
21+
project.ext {
22+
resource_path = basePath
23+
spark_job_server_image = 'spark4_job_server'
24+
}
25+
26+
// Load the main build script which contains all build logic.
27+
apply from: "$basePath/spark_job_server_container.gradle"
28+
29+
// Override the Dockerfile copy to use the Java 17 Dockerfile for Spark 4.
30+
copyDockerfileDependencies {
31+
// Remove the shared Dockerfile added by the shared gradle script and use the local one instead.
32+
// The shared Dockerfile uses eclipse-temurin:11 which is incompatible with Spark 4 (requires Java 17).
33+
exclude 'Dockerfile'
34+
}
35+
36+
task copySpark4Dockerfile(type: Copy) {
37+
from "Dockerfile"
38+
into "build"
39+
}
40+
41+
dockerPrepare.dependsOn copySpark4Dockerfile

runners/spark/4/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/EvaluationContext.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,10 @@ public static <T> void evaluate(String name, Dataset<T> ds) {
8686
ds.write().mode("overwrite").format("noop").save();
8787
LOG.info("Evaluated dataset {} in {}", name, durationSince(startMs));
8888
} catch (RuntimeException e) {
89-
LOG.error("Failed to evaluate dataset {}: {}", name, Throwables.getRootCause(e).getMessage());
89+
LOG.error(
90+
"Failed to evaluate dataset {}: {}",
91+
name,
92+
String.valueOf(Throwables.getRootCause(e).getMessage()));
9093
throw new RuntimeException(e);
9194
}
9295
}
@@ -102,7 +105,10 @@ public static <T> void evaluate(String name, Dataset<T> ds) {
102105
LOG.info("Collected dataset {} in {} [size: {}]", name, durationSince(startMs), res.length);
103106
return res;
104107
} catch (Exception e) {
105-
LOG.error("Failed to collect dataset {}: {}", name, Throwables.getRootCause(e).getMessage());
108+
LOG.error(
109+
"Failed to collect dataset {}: {}",
110+
name,
111+
String.valueOf(Throwables.getRootCause(e).getMessage()));
106112
throw new RuntimeException(e);
107113
}
108114
}

runners/spark/job-server/spark_job_server.gradle

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@ apply plugin: 'application'
2828
// we need to set mainClassName before applying shadow plugin
2929
mainClassName = "org.apache.beam.runners.spark.SparkJobServerDriver"
3030

31+
def parentSparkVersion = project.parent.findProperty('spark_version') ?: ''
32+
3133
applyJavaNature(
34+
requireJavaVersion: (parentSparkVersion.startsWith("4") ? org.gradle.api.JavaVersion.VERSION_17 : null),
3235
automaticModuleName: 'org.apache.beam.runners.spark.jobserver',
3336
archivesBaseName: project.hasProperty('archives_base_name') ? archives_base_name : archivesBaseName,
3437
validateShadowJar: false,

settings.gradle.kts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ include(":runners:spark:3")
151151
include(":runners:spark:3:job-server")
152152
include(":runners:spark:3:job-server:container")
153153
include(":runners:spark:4")
154+
include(":runners:spark:4:job-server")
155+
include(":runners:spark:4:job-server:container")
154156
include(":runners:samza")
155157
include(":runners:samza:job-server")
156158
include(":sdks:go")

0 commit comments

Comments
 (0)