diff --git a/k8s/README.md b/k8s/README.md new file mode 100644 index 0000000000..9dbb36766b --- /dev/null +++ b/k8s/README.md @@ -0,0 +1,11 @@ +* The generation of helm charts and yaml manifests relies on kubebuilder-generated manifests in + `../operator/config/[files]`. + +* If you have modified CRD definitions in the operator (`../operator/apis/mlops/v1alpha1/*` and + dependencies), you must first run `make manifests` in the operator directory to update the manifests. + Only then you should run `make create` in this directory. + +* Never modify files in `../operator/config/[files]` manually. These files are generated by + kubebuilder. Instead, modify the CRD definitions in the operator code + + diff --git a/k8s/helm-charts/seldon-core-v2-crds/templates/seldon-v2-crds.yaml b/k8s/helm-charts/seldon-core-v2-crds/templates/seldon-v2-crds.yaml index c2163fbbcb..2c5a66607e 100644 --- a/k8s/helm-charts/seldon-core-v2-crds/templates/seldon-v2-crds.yaml +++ b/k8s/helm-charts/seldon-core-v2-crds/templates/seldon-v2-crds.yaml @@ -177,9 +177,13 @@ spec: jsonPath: .status.conditions[?(@.type=="ModelReady")].status name: Ready type: string - - description: Number of replicas - jsonPath: .status.replicas - name: Replicas + - description: Number of desired replicas + jsonPath: .spec.replicas + name: Desired Replicas + type: integer + - description: Number of replicas available to receive inference requests + jsonPath: .status.availableReplicas + name: Available Replicas type: integer - jsonPath: .metadata.creationTimestamp name: Age @@ -327,6 +331,10 @@ spec: roughly akin to Annotations on any k8s resource, just the reconciler conveying richer information outwards. type: object + availableReplicas: + description: Number of available replicas + format: int32 + type: integer conditions: description: Conditions the latest available observations of a resource's current state. diff --git a/k8s/yaml/crds.yaml b/k8s/yaml/crds.yaml index 20e3a82c2f..01ac210d20 100644 --- a/k8s/yaml/crds.yaml +++ b/k8s/yaml/crds.yaml @@ -180,9 +180,13 @@ spec: jsonPath: .status.conditions[?(@.type=="ModelReady")].status name: Ready type: string - - description: Number of replicas - jsonPath: .status.replicas - name: Replicas + - description: Number of desired replicas + jsonPath: .spec.replicas + name: Desired Replicas + type: integer + - description: Number of replicas available to receive inference requests + jsonPath: .status.availableReplicas + name: Available Replicas type: integer - jsonPath: .metadata.creationTimestamp name: Age @@ -330,6 +334,10 @@ spec: roughly akin to Annotations on any k8s resource, just the reconciler conveying richer information outwards. type: object + availableReplicas: + description: Number of available replicas + format: int32 + type: integer conditions: description: Conditions the latest available observations of a resource's current state. diff --git a/operator/apis/mlops/v1alpha1/model_types.go b/operator/apis/mlops/v1alpha1/model_types.go index 904d6dd4d1..7828d0d8e0 100644 --- a/operator/apis/mlops/v1alpha1/model_types.go +++ b/operator/apis/mlops/v1alpha1/model_types.go @@ -126,9 +126,11 @@ type InferenceArtifactSpec struct { // ModelStatus defines the observed state of Model type ModelStatus struct { // Total number of replicas targeted by this model - Replicas int32 `json:"replicas,omitempty"` - Selector string `json:"selector,omitempty"` - duckv1.Status `json:",inline"` + Replicas int32 `json:"replicas,omitempty"` + Selector string `json:"selector,omitempty"` + // Number of available replicas + AvailableReplicas int32 `json:"availableReplicas,omitempty"` + duckv1.Status `json:",inline"` } //+kubebuilder:object:root=true @@ -136,7 +138,8 @@ type ModelStatus struct { //+kubebuilder:resource:shortName=mlm //+kubebuilder:subresource:scale:specpath=.spec.replicas,statuspath=.status.replicas,selectorpath=.status.selector //+kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.conditions[?(@.type=="ModelReady")].status`,description="Model ready status" -//+kubebuilder:printcolumn:name="Replicas",type=integer,JSONPath=`.status.replicas`, description="Number of replicas" +//+kubebuilder:printcolumn:name="Desired Replicas",type=integer,JSONPath=`.spec.replicas`,description="Number of desired replicas" +//+kubebuilder:printcolumn:name="Available Replicas",type=integer,JSONPath=`.status.availableReplicas`,description="Number of replicas available to receive inference requests" //+kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` // Model is the Schema for the models API diff --git a/operator/config/crd/bases/mlops.seldon.io_models.yaml b/operator/config/crd/bases/mlops.seldon.io_models.yaml index 270374db34..c7cb739a2b 100644 --- a/operator/config/crd/bases/mlops.seldon.io_models.yaml +++ b/operator/config/crd/bases/mlops.seldon.io_models.yaml @@ -21,9 +21,13 @@ spec: jsonPath: .status.conditions[?(@.type=="ModelReady")].status name: Ready type: string - - description: Number of replicas - jsonPath: .status.replicas - name: Replicas + - description: Number of desired replicas + jsonPath: .spec.replicas + name: Desired Replicas + type: integer + - description: Number of replicas available to receive inference requests + jsonPath: .status.availableReplicas + name: Available Replicas type: integer - jsonPath: .metadata.creationTimestamp name: Age @@ -171,6 +175,10 @@ spec: roughly akin to Annotations on any k8s resource, just the reconciler conveying richer information outwards. type: object + availableReplicas: + description: Number of available replicas + format: int32 + type: integer conditions: description: Conditions the latest available observations of a resource's current state. diff --git a/operator/scheduler/model.go b/operator/scheduler/model.go index 9c5a18aad5..f2b725df38 100644 --- a/operator/scheduler/model.go +++ b/operator/scheduler/model.go @@ -260,6 +260,9 @@ func (s *SchedulerClient) SubscribeModelEvents(ctx context.Context, grpcClient s modelStatus.GetAvailableReplicas() + modelStatus.GetUnavailableReplicas(), ) + latestModel.Status.AvailableReplicas = int32( + modelStatus.GetAvailableReplicas(), + ) latestModel.Status.Selector = "server=" + latestVersionStatus.ServerName return s.updateModelStatus(ctxWithTimeout, latestModel) }) diff --git a/scheduler/pkg/scheduler/scheduler.go b/scheduler/pkg/scheduler/scheduler.go index 8b4132c813..ecba18ed2f 100644 --- a/scheduler/pkg/scheduler/scheduler.go +++ b/scheduler/pkg/scheduler/scheduler.go @@ -173,6 +173,7 @@ func (s *SimpleScheduler) scheduleToServer(modelName string) error { logger. WithField("candidate_servers", filteredServers). WithField("desired_replicas", desiredReplicas). + WithField("min_replicas", minReplicas). Debug("Identified candidate servers for model") // The main logic of trying to find a server for the model is as follows: diff --git a/scheduler/pkg/store/memory_status.go b/scheduler/pkg/store/memory_status.go index 0c0187782d..558673150d 100644 --- a/scheduler/pkg/store/memory_status.go +++ b/scheduler/pkg/store/memory_status.go @@ -106,7 +106,8 @@ func updateModelState(isLatest bool, modelVersion *ModelVersion, prevModelVersio } func (m *MemoryStore) FailedScheduling(modelVersion *ModelVersion, reason string, reset bool) { - availableReplicas := modelVersion.state.AvailableReplicas + // we use len of GetAssignment instead of .state.AvailableReplicas as it is more accurate in this context + availableReplicas := uint32(len(modelVersion.GetAssignment())) modelVersion.state = ModelStatus{ State: ScheduleFailed,