From 184cbc56dcd5c36acf575ea4649861f8f3230b7c Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Thu, 3 Oct 2024 13:50:53 +0200 Subject: [PATCH] WIP: consume kloglevel at runtime Signed-off-by: Francesco Romani --- .../v1/numaresourcesoperator_normalize.go | 9 +++++ .../v1/numaresourcesoperator_types.go | 9 ++++- .../v1/zz_generated.deepcopy.go | 5 +++ ...y.openshift.io_numaresourcesoperators.yaml | 10 +++++- ...ources-operator.clusterserviceversion.yaml | 10 ++++-- ...y.openshift.io_numaresourcesoperators.yaml | 10 +++++- ...ources-operator.clusterserviceversion.yaml | 8 ++++- .../numaresourcesoperator_controller.go | 33 +++++++++++++++++++ internal/api/features/_topics.json | 3 +- main.go | 4 ++- pkg/status/status.go | 1 + 11 files changed, 94 insertions(+), 8 deletions(-) diff --git a/api/numaresourcesoperator/v1/numaresourcesoperator_normalize.go b/api/numaresourcesoperator/v1/numaresourcesoperator_normalize.go index 25599b926..1f4955687 100644 --- a/api/numaresourcesoperator/v1/numaresourcesoperator_normalize.go +++ b/api/numaresourcesoperator/v1/numaresourcesoperator_normalize.go @@ -20,8 +20,17 @@ import ( "sort" corev1 "k8s.io/api/core/v1" + + operatorv1 "github.com/openshift/api/operator/v1" ) +func NormalizeSpec(spec *NUMAResourcesOperatorSpec) { + defaultLog := operatorv1.Normal + if spec.LogLevel == "" { + spec.LogLevel = defaultLog + } +} + func (nodeGroup NodeGroup) NormalizeConfig() NodeGroupConfig { conf := DefaultNodeGroupConfig() if nodeGroup.Config == nil { diff --git a/api/numaresourcesoperator/v1/numaresourcesoperator_types.go b/api/numaresourcesoperator/v1/numaresourcesoperator_types.go index 73db5a2f4..7a3d88fbc 100644 --- a/api/numaresourcesoperator/v1/numaresourcesoperator_types.go +++ b/api/numaresourcesoperator/v1/numaresourcesoperator_types.go @@ -35,7 +35,7 @@ type NUMAResourcesOperatorSpec struct { // Optional Resource Topology Exporter image URL //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Optional RTE image URL",xDescriptors={"urn:alm:descriptor:com.tectonic.ui:text"} ExporterImage string `json:"imageSpec,omitempty"` - // Valid values are: "Normal", "Debug", "Trace", "TraceAll". + // RTE log verbosity. Valid values are: "Normal", "Debug", "Trace", "TraceAll". // Defaults to "Normal". // +optional // +kubebuilder:default=Normal @@ -45,6 +45,10 @@ type NUMAResourcesOperatorSpec struct { // +optional //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Optional ignore pod namespace/name glob patterns" PodExcludes []NamespacedName `json:"podExcludes,omitempty"` + // Operator verbosity value, set the same way as the commandline + // +optional + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Operator verbosity" + Verbose *int `json:"verbose,omitempty"` } // +kubebuilder:validation:Enum=Disabled;Enabled;EnabledExclusiveResources @@ -178,6 +182,9 @@ type NUMAResourcesOperatorStatus struct { // RelatedObjects list of objects of interest for this operator //+operator-sdk:csv:customresourcedefinitions:type=status,displayName="Related Objects" RelatedObjects []configv1.ObjectReference `json:"relatedObjects,omitempty"` + // Verbose is the current log verbosity of the operator. + //+operator-sdk:csv:customresourcedefinitions:type=status,displayName="Operator log verbosity" + Verbose int `json:"verbose"` } // MachineConfigPool defines the observed state of each MachineConfigPool selected by node groups diff --git a/api/numaresourcesoperator/v1/zz_generated.deepcopy.go b/api/numaresourcesoperator/v1/zz_generated.deepcopy.go index b704f5342..f695ad512 100644 --- a/api/numaresourcesoperator/v1/zz_generated.deepcopy.go +++ b/api/numaresourcesoperator/v1/zz_generated.deepcopy.go @@ -129,6 +129,11 @@ func (in *NUMAResourcesOperatorSpec) DeepCopyInto(out *NUMAResourcesOperatorSpec *out = make([]NamespacedName, len(*in)) copy(*out, *in) } + if in.Verbose != nil { + in, out := &in.Verbose, &out.Verbose + *out = new(int) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NUMAResourcesOperatorSpec. diff --git a/bundle/manifests/nodetopology.openshift.io_numaresourcesoperators.yaml b/bundle/manifests/nodetopology.openshift.io_numaresourcesoperators.yaml index aedcec63b..146093438 100644 --- a/bundle/manifests/nodetopology.openshift.io_numaresourcesoperators.yaml +++ b/bundle/manifests/nodetopology.openshift.io_numaresourcesoperators.yaml @@ -48,7 +48,7 @@ spec: logLevel: default: Normal description: |- - Valid values are: "Normal", "Debug", "Trace", "TraceAll". + RTE log verbosity. Valid values are: "Normal", "Debug", "Trace", "TraceAll". Defaults to "Normal". enum: - "" @@ -213,6 +213,9 @@ spec: type: string type: object type: array + verbose: + description: Operator verbosity value, set the same way as the commandline + type: integer type: object status: description: NUMAResourcesOperatorStatus defines the observed state of @@ -548,6 +551,11 @@ spec: - resource type: object type: array + verbose: + description: Verbose is the current log verbosity of the operator. + type: integer + required: + - verbose type: object type: object served: true diff --git a/bundle/manifests/numaresources-operator.clusterserviceversion.yaml b/bundle/manifests/numaresources-operator.clusterserviceversion.yaml index 97f0372b4..ab8efc7ad 100644 --- a/bundle/manifests/numaresources-operator.clusterserviceversion.yaml +++ b/bundle/manifests/numaresources-operator.clusterserviceversion.yaml @@ -62,7 +62,7 @@ metadata: } ] capabilities: Basic Install - createdAt: "2025-01-07T13:18:55Z" + createdAt: "2025-01-09T09:47:29Z" olm.skipRange: '>=4.18.0 <4.19.0' operatorframework.io/cluster-monitoring: "true" operators.operatorframework.io/builder: operator-sdk-v1.36.1 @@ -89,7 +89,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:text - description: |- - Valid values are: "Normal", "Debug", "Trace", "TraceAll". + RTE log verbosity. Valid values are: "Normal", "Debug", "Trace", "TraceAll". Defaults to "Normal". displayName: RTE log verbosity path: logLevel @@ -131,6 +131,9 @@ spec: level displayName: Optional ignore pod namespace/name glob patterns path: podExcludes + - description: Operator verbosity value, set the same way as the commandline + displayName: Operator verbosity + path: verbose statusDescriptors: - description: Conditions show the current state of the NUMAResourcesOperator Operator @@ -168,6 +171,9 @@ spec: - description: RelatedObjects list of objects of interest for this operator displayName: Related Objects path: relatedObjects + - description: Verbose is the current log verbosity of the operator. + displayName: Operator log verbosity + path: verbose version: v1 - description: NUMAResourcesOperator is the Schema for the numaresourcesoperators API diff --git a/config/crd/bases/nodetopology.openshift.io_numaresourcesoperators.yaml b/config/crd/bases/nodetopology.openshift.io_numaresourcesoperators.yaml index f5f51c173..a1c26c3c2 100644 --- a/config/crd/bases/nodetopology.openshift.io_numaresourcesoperators.yaml +++ b/config/crd/bases/nodetopology.openshift.io_numaresourcesoperators.yaml @@ -48,7 +48,7 @@ spec: logLevel: default: Normal description: |- - Valid values are: "Normal", "Debug", "Trace", "TraceAll". + RTE log verbosity. Valid values are: "Normal", "Debug", "Trace", "TraceAll". Defaults to "Normal". enum: - "" @@ -213,6 +213,9 @@ spec: type: string type: object type: array + verbose: + description: Operator verbosity value, set the same way as the commandline + type: integer type: object status: description: NUMAResourcesOperatorStatus defines the observed state of @@ -548,6 +551,11 @@ spec: - resource type: object type: array + verbose: + description: Verbose is the current log verbosity of the operator. + type: integer + required: + - verbose type: object type: object served: true diff --git a/config/manifests/bases/numaresources-operator.clusterserviceversion.yaml b/config/manifests/bases/numaresources-operator.clusterserviceversion.yaml index d879e3555..fb6057650 100644 --- a/config/manifests/bases/numaresources-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/numaresources-operator.clusterserviceversion.yaml @@ -28,7 +28,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:text - description: |- - Valid values are: "Normal", "Debug", "Trace", "TraceAll". + RTE log verbosity. Valid values are: "Normal", "Debug", "Trace", "TraceAll". Defaults to "Normal". displayName: RTE log verbosity path: logLevel @@ -70,6 +70,9 @@ spec: level displayName: Optional ignore pod namespace/name glob patterns path: podExcludes + - description: Operator verbosity value, set the same way as the commandline + displayName: Operator verbosity + path: verbose statusDescriptors: - description: Conditions show the current state of the NUMAResourcesOperator Operator @@ -107,6 +110,9 @@ spec: - description: RelatedObjects list of objects of interest for this operator displayName: Related Objects path: relatedObjects + - description: Verbose is the current log verbosity of the operator. + displayName: Operator log verbosity + path: verbose version: v1 - description: NUMAResourcesOperator is the Schema for the numaresourcesoperators API diff --git a/controllers/numaresourcesoperator_controller.go b/controllers/numaresourcesoperator_controller.go index 25454e477..6d5d5aeba 100644 --- a/controllers/numaresourcesoperator_controller.go +++ b/controllers/numaresourcesoperator_controller.go @@ -68,6 +68,7 @@ import ( "github.com/openshift-kni/numaresources-operator/pkg/status/conditioninfo" "github.com/openshift-kni/numaresources-operator/pkg/validation" + intkloglevel "github.com/openshift-kni/numaresources-operator/internal/kloglevel" intreconcile "github.com/openshift-kni/numaresources-operator/internal/reconcile" ) @@ -91,6 +92,7 @@ type NUMAResourcesOperatorReconciler struct { ImagePullPolicy corev1.PullPolicy Recorder record.EventRecorder ForwardMCPConds bool + Verbose int } // TODO: narrow down @@ -145,11 +147,18 @@ func (r *NUMAResourcesOperatorReconciler) Reconcile(ctx context.Context, req ctr return r.degradeStatus(ctx, instance, status.ConditionTypeIncorrectNUMAResourcesOperatorResourceName, err) } + nropv1.NormalizeSpec(&instance.Spec) + // can't be API normalization because the value is dynamic + normalizeSpecVerboseFromRuntime(&instance.Spec, r.Verbose) + if annotations.IsPauseReconciliationEnabled(instance.Annotations) { klog.V(2).InfoS("Pause reconciliation enabled", "object", req.NamespacedName) return ctrl.Result{}, nil } + // validation step. This is not expected to fail often, hence log messages, if any, needs to be loud + // regardless by verbosiness value, including the value dynamically set. + if err := validation.NodeGroups(instance.Spec.NodeGroups, r.Platform); err != nil { return r.degradeStatus(ctx, instance, validation.NodeGroupsError, err) } @@ -222,6 +231,20 @@ func (r *NUMAResourcesOperatorReconciler) degradeStatus(ctx context.Context, ins return ctrl.Result{}, nil } +func (r *NUMAResourcesOperatorReconciler) UpdateVerbosity(ctx context.Context, verbose *int) int { + // do as early as possible, but always after validation so we don't miss log messages + if verb := *verbose; verb != r.Verbose { + err := intkloglevel.Set(klog.Level(verb)) + if err != nil { + klog.InfoS("cannot updated log level dynamically", "desiredLevel", verb) + } else { + r.Verbose = verb + } + } + + return r.Verbose +} + func (r *NUMAResourcesOperatorReconciler) reconcileResourceAPI(ctx context.Context, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree) intreconcile.Step { applied, err := r.syncNodeResourceTopologyAPI(ctx) if err != nil { @@ -288,6 +311,9 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceDaemonSet(ctx context } func (r *NUMAResourcesOperatorReconciler) reconcileResource(ctx context.Context, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree) intreconcile.Step { + // this must be the first, and must be done as early as possible. Should not make the reconciliation attempt fail (best-effort) + instance.Status.Verbose = r.UpdateVerbosity(ctx, instance.Spec.Verbose) + if step := r.reconcileResourceAPI(ctx, instance, trees); step.EarlyStop() { updateStatusConditionsIfNeeded(instance, step.ConditionInfo) return step @@ -777,3 +803,10 @@ func getTreesByNodeGroup(ctx context.Context, cli client.Client, nodeGroups []nr return nil, fmt.Errorf("unsupported platform") } } + +func normalizeSpecVerboseFromRuntime(spec *nropv1.NUMAResourcesOperatorSpec, verb int) { + if spec.Verbose != nil { + return + } + spec.Verbose = &verb +} diff --git a/internal/api/features/_topics.json b/internal/api/features/_topics.json index b6ee43240..43bc1e942 100644 --- a/internal/api/features/_topics.json +++ b/internal/api/features/_topics.json @@ -18,6 +18,7 @@ "tmpol", "schedattrwatch", "ngpoolname", - "nganns" + "nganns", + "opverbctl" ] } diff --git a/main.go b/main.go index 2937438f1..eb3ed479a 100644 --- a/main.go +++ b/main.go @@ -188,7 +188,7 @@ func main() { klogV, err := intkloglevel.Get() if err != nil { - klog.V(1).ErrorS(err, "setting up the logger") + klog.ErrorS(err, "setting up the logger") os.Exit(1) } @@ -282,6 +282,7 @@ func main() { os.Exit(1) } + // the only way this can change from the command line is to restart, so fetching once is the best option if err = (&controllers.NUMAResourcesOperatorReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), @@ -296,6 +297,7 @@ func main() { ImagePullPolicy: pullPolicy, Namespace: namespace, ForwardMCPConds: params.enableMCPCondsForward, + Verbose: int(klogV), }).SetupWithManager(mgr); err != nil { klog.ErrorS(err, "unable to create controller", "controller", "NUMAResourcesOperator") os.Exit(1) diff --git a/pkg/status/status.go b/pkg/status/status.go index 8922ae805..6cf6e384c 100644 --- a/pkg/status/status.go +++ b/pkg/status/status.go @@ -44,6 +44,7 @@ const ( const ( ConditionTypeIncorrectNUMAResourcesOperatorResourceName = "IncorrectNUMAResourcesOperatorResourceName" + ConditionTypeInternalError = "Internal Error" ) const (