Skip to content

Commit 51b88cb

Browse files
committed
Add MLflowEnabled config option.
Signed-off-by: Alyssa Goins <agoins@redhat.com>
1 parent a09c0f4 commit 51b88cb

10 files changed

Lines changed: 230 additions & 3 deletions

File tree

api/v1/dspipeline_types.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ type DSPASpec struct {
5555
// Proxy configuration for all DSPA components to enable usage in environments requiring proxy access
5656
// +kubebuilder:validation:Optional
5757
Proxy *ProxyConfig `json:"proxy,omitempty"`
58+
59+
MLflow *MLflowConfig `json:"mlflowConfig,omitEmpty"`
5860
}
5961

6062
// +kubebuilder:validation:Pattern=`^(Managed|Removed)$`
@@ -466,6 +468,18 @@ type ComponentDetailStatus struct {
466468
ExternalUrl string `json:"externalUrl,omitempty"`
467469
}
468470

471+
type IntegrationMode int
472+
473+
const (
474+
AutoDetect IntegrationMode = iota
475+
Disabled
476+
)
477+
478+
type MLflowConfig struct {
479+
IntegrationMode
480+
InjectUserEnvVars bool
481+
}
482+
469483
//+kubebuilder:object:root=true
470484
//+kubebuilder:subresource:status
471485
//+kubebuilder:resource:shortName=dspa

config/crd/bases/datasciencepipelinesapplications.opendatahub.io_datasciencepipelinesapplications.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,21 @@ spec:
914914
type: object
915915
type: object
916916
type: object
917+
mlflow:
918+
description: |-
919+
MLflow can be optionally enabled as an API server plugin for tracking pipeline runs
920+
in MLflow experiments.
921+
properties:
922+
IntegrationMode:
923+
description: |-
924+
Indicates whether to enable MLflow plugin on API server if plugin installed on cluster,
925+
or disable entirely.
926+
enum:
927+
- "AUTODETECT"
928+
- "DISABLED"
929+
InjectUserEnvVars:
930+
description: Indicates if user container env variable should be updated by KFP.
931+
type: boolean
917932
podToPodTLS:
918933
default: true
919934
description: PodToPodTLS Set to "true" or "false" to enable or disable

config/internal/apiserver/default/rolebinding_ds-pipeline.yaml.tmpl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,21 @@ roleRef:
1313
subjects:
1414
- kind: ServiceAccount
1515
name: {{.APIServerDefaultResourceName}}
16+
17+
---
18+
# mlflow-integration binding
19+
apiVersion: rbac.authorization.k8s.io/v1
20+
kind: RoleBinding
21+
metadata:
22+
name: mlflow-integration
23+
namespace: {{.Namespace}}
24+
labels:
25+
app: {{.APIServerDefaultResourceName}}
26+
component: data-science-pipelines
27+
roleRef:
28+
apiGroup: rbac.authorization.k8s.io
29+
kind: ClusterRole
30+
name: mlflow-integration
31+
subjects:
32+
- kind: ServiceAccount
33+
name: {{.APIServerDefaultResourceName}}

config/internal/apiserver/default/rolebinding_pipeline-runner.yaml.tmpl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,21 @@ roleRef:
1313
subjects:
1414
- kind: ServiceAccount
1515
name: pipeline-runner-{{.Name}}
16+
17+
---
18+
# mlflow-integration binding
19+
apiVersion: rbac.authorization.k8s.io/v1
20+
kind: RoleBinding
21+
metadata:
22+
name: mlflow-integration
23+
namespace: {{.Namespace}}
24+
labels:
25+
app: {{.APIServerDefaultResourceName}}
26+
component: data-science-pipelines
27+
roleRef:
28+
apiGroup: rbac.authorization.k8s.io
29+
kind: ClusterRole
30+
name: mlflow-integration
31+
subjects:
32+
- kind: ServiceAccount
33+
name: pipeline-runner-{{.Name}}

controllers/dspipeline_params.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ import (
4141
"k8s.io/apimachinery/pkg/api/resource"
4242
"k8s.io/apimachinery/pkg/types"
4343
"sigs.k8s.io/controller-runtime/pkg/client"
44+
45+
mlflowv1 "github.com/opendatahub-io/mlflow-operator/api/v1"
4446
)
4547

4648
const MlmdIsRequired = "MLMD explicitly disabled in DSPA, but is a required component for DSP"
@@ -101,6 +103,7 @@ type DSPAParams struct {
101103

102104
APIServerServiceDNSName string
103105
FIPSEnabled bool
106+
MLflow *dspa.MLflowConfig
104107

105108
// Proxy configuration for all DSPA components
106109
ProxyConfig *dspa.ProxyConfig
@@ -141,6 +144,29 @@ type ObjectStorageConnection struct {
141144
ExternalRouteURL string
142145
}
143146

147+
type PluginConfig struct {
148+
Endpoint string `json:"endpoint,omitempty" mapstructure:"endpoint"`
149+
Timeout string `json:"timeout,omitempty" mapstructure:"timeout"`
150+
TLS TLSConfig `json:"tls,omitempty" mapstructure:"tls"`
151+
Settings MLflowPluginSettings `json:"settings,omitempty" mapstructure:"settings"`
152+
}
153+
154+
type TLSConfig struct {
155+
InsecureSkipVerify bool `json:"insecureSkipVerify,omitempty" mapstructure:"insecureSkipVerify"`
156+
CABundlePath string `json:"caBundlePath,omitempty" mapstructure:"caBundlePath"`
157+
}
158+
159+
// MLflowPluginSettings contains MLflow-specific settings to be marshalled into PluginConfig.Settings
160+
type MLflowPluginSettings struct {
161+
WorkspacesEnabled bool `json:"workspacesEnabled,omitempty"`
162+
ExperimentDescription string `json:"experimentDescription,omitempty"`
163+
//todo: add default experiment name?
164+
DefaultExperimentName string `json:"defaultExperimentName"`
165+
//todo: KFP base URL?
166+
KFPBaseURL string `json:"kfpBaseURL,omitempty"`
167+
InjectUserEnvVars bool `json:"injectUserEnvVars,omitempty"`
168+
}
169+
144170
// UsingExternalDB will return true if an external Database is specified in the CR, otherwise false.
145171
func (p *DSPAParams) UsingExternalDB(dsp *dspa.DataSciencePipelinesApplication) bool {
146172
if dsp.Spec.Database != nil && dsp.Spec.Database.ExternalDB != nil {
@@ -217,6 +243,25 @@ func (p *DSPAParams) RetrieveSecret(ctx context.Context, client client.Client, s
217243
return base64.StdEncoding.EncodeToString(secret.Data[secretKey]), nil
218244
}
219245

246+
func (p *DSPAParams) RetrieveMLflowEndpoint(ctx context.Context, client client.Client, log logr.Logger) (string, error) {
247+
mlflowName := "mlflow"
248+
mlflow := &mlflowv1.MLflow{}
249+
namespacedName := types.NamespacedName{
250+
Name: mlflowName,
251+
Namespace: p.Namespace,
252+
}
253+
err := client.Get(ctx, namespacedName, mlflow)
254+
if err != nil {
255+
log.V(1).Info(fmt.Sprintf("Unable to retrieve mlflow resource [%s].", mlflowName))
256+
return "", err
257+
}
258+
status := mlflow.Status
259+
if status.Address != nil && status.Address.URL != "" {
260+
return mlflow.Status.Address.URL, nil
261+
}
262+
return "", errors.New("MLflow resource missing Status.Address.URL field. Unable to resolve endpoint")
263+
}
264+
220265
func (p *DSPAParams) RetrieveOrCreateSecret(ctx context.Context, client client.Client, secretName, secretKey string, generatedPasswordLength int, log logr.Logger) (string, error) {
221266
val, err := p.RetrieveSecret(ctx, client, secretName, secretKey, log)
222267
if err != nil && apierrs.IsNotFound(err) {
@@ -703,6 +748,23 @@ func (p *DSPAParams) ExtractParams(ctx context.Context, dsp *dspa.DataSciencePip
703748
p.PodToPodTLS = *dsp.Spec.PodToPodTLS
704749
}
705750

751+
if dsp.Spec.MLflow != nil {
752+
mlflowCfg := dspa.MLflowConfig{
753+
IntegrationMode: dspa.AutoDetect,
754+
InjectUserEnvVars: false,
755+
}
756+
757+
// Override default settings if specified.
758+
if dsp.Spec.MLflow.IntegrationMode == dspa.Disabled {
759+
mlflowCfg.IntegrationMode = dspa.Disabled
760+
}
761+
if dsp.Spec.MLflow.InjectUserEnvVars == true {
762+
mlflowCfg.InjectUserEnvVars = true
763+
}
764+
765+
p.MLflow = &mlflowCfg
766+
}
767+
706768
p.ProxyConfig = dsp.Spec.Proxy
707769

708770
log := loggr.WithValues("namespace", p.Namespace).WithValues("dspa_name", p.Name)
@@ -729,6 +791,27 @@ func (p *DSPAParams) ExtractParams(ctx context.Context, dsp *dspa.DataSciencePip
729791

730792
setResourcesDefault(config.APIServerResourceRequirements, &p.APIServer.Resources)
731793

794+
if p.MLflow != nil && p.MLflow.IntegrationMode == dspa.AutoDetect {
795+
// Retrieve internal MLflow service endpoint for use in API Server MLflow plugin config.
796+
mlflowEndpoint, err := p.RetrieveMLflowEndpoint(ctx, client, log)
797+
if err == nil {
798+
pluginCfg, err := BuildMLflowPluginConfigJson(mlflowEndpoint, p.CustomCABundleRootMountPath, p.MLflow.InjectUserEnvVars)
799+
if err != nil {
800+
//todo: do we want to return an error here? or just log an error.
801+
log.Info("Failed to build MLflow plugin config. MLflow API server plugin will not be enabled.")
802+
return err
803+
}
804+
// Append MLflow Plugin config to API server configs.
805+
err = util.UpdateConfigMapByName(ctx, p.APIServer.CustomServerConfig.Name, p.Namespace, client, "config.json", pluginCfg)
806+
if err != nil {
807+
log.Info("Failed to update API server config with MLflow plugin. MLflow API server plugin will not be enabled.")
808+
return err
809+
}
810+
} else {
811+
log.Error(err, "failed to retrieve MLflow internal endpoint. MLflow API server plugin will not be enabled.")
812+
}
813+
}
814+
732815
if p.APIServer.CustomServerConfig == nil {
733816
p.APIServer.CustomServerConfig = &dspa.ScriptConfigMap{
734817
Name: config.CustomServerConfigMapNamePrefix + dsp.Name,
@@ -977,3 +1060,29 @@ func (p *DSPAParams) ExtractParams(ctx context.Context, dsp *dspa.DataSciencePip
9771060

9781061
return nil
9791062
}
1063+
1064+
func BuildMLflowPluginConfigJson(mlflowEndpoint string, caBundlePath string, injectUserEnvVars bool) (string, error) {
1065+
settings := MLflowPluginSettings{
1066+
WorkspacesEnabled: true,
1067+
ExperimentDescription: "Created by AI Pipelines.",
1068+
DefaultExperimentName: "pipelines-exp",
1069+
//todo: this should potentially be passed in.
1070+
KFPBaseURL: "",
1071+
InjectUserEnvVars: injectUserEnvVars,
1072+
}
1073+
1074+
pluginCfgJson, err := json.Marshal(
1075+
PluginConfig{
1076+
Endpoint: mlflowEndpoint,
1077+
Timeout: "30s",
1078+
TLS: TLSConfig{
1079+
InsecureSkipVerify: false,
1080+
CABundlePath: caBundlePath,
1081+
},
1082+
Settings: settings,
1083+
})
1084+
if err != nil {
1085+
return "", fmt.Errorf("failed to marshal MLflow plugin config: %w", err)
1086+
}
1087+
return string(pluginCfgJson), nil
1088+
}

controllers/util/util.go

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@ limitations under the License.
1717
package util
1818

1919
import (
20+
"encoding/json"
2021
"fmt"
22+
"os"
23+
"path/filepath"
24+
2125
mf "github.com/manifestival/manifestival"
2226
dspav1 "github.com/opendatahub-io/data-science-pipelines-operator/api/v1"
2327
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
24-
"os"
25-
"path/filepath"
2628

2729
"github.com/opendatahub-io/data-science-pipelines-operator/controllers/config"
2830

@@ -101,6 +103,35 @@ func GetConfigMap(ctx context.Context, cfgName, ns string, client client.Client)
101103
return cfgMap, nil
102104
}
103105

106+
func UpdateConfigMap(ctx context.Context, cfgMap *v1.ConfigMap, client client.Client) error {
107+
err := client.Update(ctx, cfgMap)
108+
if err != nil {
109+
return err
110+
}
111+
return nil
112+
}
113+
114+
func UpdateConfigMapByName(ctx context.Context, cfgName, ns string, client client.Client, field, value string) error {
115+
cm, err := GetConfigMap(ctx, cfgName, ns, client)
116+
if err != nil {
117+
return fmt.Errorf("failed to get configmap %s/%s: %w", ns, cfgName, err)
118+
}
119+
cmField := cm.Data[field]
120+
if cmField == "" {
121+
return fmt.Errorf("configmap field %s does not exist", field)
122+
}
123+
updatedField, err := updateJSON(cmField, field, value)
124+
if err != nil {
125+
return fmt.Errorf("failed to update configmap field %s/%s: %w", ns, field, err)
126+
}
127+
cm.Data[field] = updatedField
128+
err = UpdateConfigMap(ctx, cm, client)
129+
if err != nil {
130+
return fmt.Errorf("failed to update configmap %s/%s: %w", ns, cfgName, err)
131+
}
132+
return nil
133+
}
134+
104135
// GetConfigMapValue fetches the value for the provided configmap mapped to a given key
105136
func GetConfigMapValue(cfgKey string, cfgMap *v1.ConfigMap) string {
106137
if val, ok := cfgMap.Data[cfgKey]; ok {
@@ -280,3 +311,17 @@ func AddDeploymentPodLabelTransformer(labelKey, labelValue string) mf.Transforme
280311
return nil
281312
}
282313
}
314+
315+
func updateJSON(jsonStr string, field string, value any) (string, error) {
316+
var obj map[string]any
317+
if err := json.Unmarshal([]byte(jsonStr), &obj); err != nil {
318+
return "", fmt.Errorf("invalid JSON input: %w", err)
319+
}
320+
321+
obj[field] = value
322+
out, err := json.Marshal(obj)
323+
if err != nil {
324+
return "", fmt.Errorf("marshal updated JSON: %w", err)
325+
}
326+
return string(out), nil
327+
}

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ require (
1212
github.com/manifestival/controller-runtime-client v0.4.0
1313
github.com/manifestival/manifestival v0.7.2
1414
github.com/minio/minio-go/v7 v7.0.99
15+
github.com/opendatahub-io/mlflow-operator/api v0.0.0-20260331183147-5f8991ebee1a
1516
github.com/openshift/api v0.0.0-20260331162130-f7b3bd900c75
1617
github.com/prometheus/client_golang v1.23.2
1718
github.com/spf13/viper v1.21.0

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,6 +1218,8 @@ github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1y
12181218
github.com/onsi/gomega v1.10.2/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
12191219
github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
12201220
github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
1221+
github.com/opendatahub-io/mlflow-operator/api v0.0.0-20260331183147-5f8991ebee1a h1:mWIyP//5+UMioydwJa1RLmFt2+YnSJHpGwL+4S9xsQY=
1222+
github.com/opendatahub-io/mlflow-operator/api v0.0.0-20260331183147-5f8991ebee1a/go.mod h1:4cBD//CLye8UPT2d2gDLrwLOVGiOv7FtAYNvMYnzLj0=
12211223
github.com/openshift/api v0.0.0-20260331162130-f7b3bd900c75 h1:BcNL7bI9rxRQdDVsbtW0kPw23I6GtNwvBYg9rjoWenE=
12221224
github.com/openshift/api v0.0.0-20260331162130-f7b3bd900c75/go.mod h1:pyVjK0nZ4sRs4fuQVQ4rubsJdahI1PB94LnQ8sGdvxo=
12231225
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=

main.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ import (
5656
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
5757
"sigs.k8s.io/controller-runtime/pkg/webhook"
5858
//+kubebuilder:scaffold:imports
59+
mlflowv1 "github.com/opendatahub-io/mlflow-operator/api/v1"
5960
)
6061

6162
var (
@@ -247,6 +248,9 @@ func main() {
247248
// informer still detects changes to those external resources.
248249
&corev1.ConfigMap{}: {Transform: stripConfigMapData},
249250
&corev1.Secret{}: {Transform: stripSecretData},
251+
//todo: possible to add cache ttl here?
252+
//todo: can we potentially add a specific sync period for this one watched resource?
253+
&mlflowv1.MLflow{}: {},
250254
},
251255
},
252256
// ConfigMap and Secret client reads bypass the cache entirely for

scripts/release/params.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@
4646
"MANAGEDPIPELINES": "\"{}\"",
4747
"PLATFORMVERSION": "\"v0.0.0\"",
4848
"FIPSENABLED": "false",
49-
"WEBHOOK_ANNOTATIONS": ""
49+
"WEBHOOK_ANNOTATIONS": "",
50+
"MLFLOW": "{\"}\"",
5051
}
5152

5253

0 commit comments

Comments
 (0)