From 82fd27a219fb8e24438e2f08f4f4e55f2856764a Mon Sep 17 00:00:00 2001
From: bvolovat <borisv@armosec.io>
Date: Sun, 23 Feb 2025 16:30:58 +0200
Subject: [PATCH] Calculates resource requests and limits based on node size,
 count, and cluster resources

Signed-off-by: bvolovat <borisv@armosec.io>
---
 .github/workflows/performance.yaml |  21 +++++-
 performance.py                     | 114 ++++++++++++++++++++++++++++-
 2 files changed, 128 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml
index f82bb03..1ad35fa 100644
--- a/.github/workflows/performance.yaml
+++ b/.github/workflows/performance.yaml
@@ -41,11 +41,11 @@ on:
       #   type: string
       #   required: true
       STORAGE_VERSION:
-        description: 'storage version'
+        description: 'Storage version'
         type: string
         required: false
       NODE_AGENT_VERSION:
-        description: 'node agent version'
+        description: 'Node agent version'
         type: string
         required: false
       ENABLE_KDR:
@@ -107,7 +107,20 @@ jobs:
           CLUSTER_ID=$(doctl kubernetes cluster get ${{ github.event.inputs.CLUSTER_NAME }} --format ID --no-header)
           doctl kubernetes cluster kubeconfig save $CLUSTER_ID
 
-      - name: Run performance Test
+      - name: Debug Inputs
+        run: |
+          echo "CLUSTER_NAME: ${{ github.event.inputs.CLUSTER_NAME }}"
+          echo "NODE_SIZE: ${{ github.event.inputs.NODE_SIZE }}"
+          echo "NODE_COUNT: ${{ github.event.inputs.NODE_COUNT }}"
+          echo "DURATION_TIME: ${{ github.event.inputs.DURATION_TIME }}"
+          echo "KUBERNETES_VERSION: ${{ github.event.inputs.KUBERNETES_VERSION }}"
+          echo "STORAGE_VERSION: ${{ github.event.inputs.STORAGE_VERSION }}"
+          echo "NODE_AGENT_VERSION: ${{ github.event.inputs.NODE_AGENT_VERSION }}"
+          echo "ENABLE_KDR: ${{ github.event.inputs.ENABLE_KDR }}"
+          echo "PRIVATE_NODE_AGENT: ${{ github.event.inputs.PRIVATE_NODE_AGENT }}"
+          echo "HELM_GIT_BRANCH: ${{ github.event.inputs.HELM_GIT_BRANCH }}"
+
+      - name: Run Performance Test & Deploy Kubescape
         env:
           QUAYIO_REGISTRY_PASSWORD: ${{ secrets.QUAYIO_REGISTRY_PASSWORD }}
           QUAYIO_REGISTRY_USERNAME: ${{ secrets.QUAYIO_REGISTRY_USERNAME }}
@@ -115,7 +128,7 @@ jobs:
           ACCOUNT_ID: ${{ secrets.PERFO_ACCOUNT_ID }}
           ACCESS_KEY: ${{ secrets.PERFO_ACCESS_KEY }}
         run: |
-          CMD="python performance.py -skip-cluster -nodes ${{ github.event.inputs.NODE_COUNT }} -account $ACCOUNT_ID -accessKey $ACCESS_KEY"
+          CMD="python performance.py -skip-cluster -nodes ${{ github.event.inputs.NODE_COUNT }} -node_size ${{ github.event.inputs.NODE_SIZE }} -account $ACCOUNT_ID -accessKey $ACCESS_KEY"
 
           if [ ! -z "${{ github.event.inputs.STORAGE_VERSION }}" ]; then
             CMD="$CMD -storage-version ${{ github.event.inputs.STORAGE_VERSION }}"
diff --git a/performance.py b/performance.py
index a95ceee..d834a3d 100644
--- a/performance.py
+++ b/performance.py
@@ -7,6 +7,16 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 
         
+NODE_SIZES = {
+    "s-2vcpu-2gb": {"vcpu": 2, "memory_gb": 2},
+    "s-4vcpu-8gb": {"vcpu": 4, "memory_gb": 8},
+    "s-8vcpu-16gb": {"vcpu": 8, "memory_gb": 16},
+    "s-16vcpu-32gb": {"vcpu": 16, "memory_gb": 32}
+}
+
+DEFAULT_NODE_SIZE = "s-4vcpu-16gb"
+DEFAULT_NODE_COUNT = 4        
+        
 def run_command(command, cwd=None):
     try:
         result = subprocess.run(command, check=True, capture_output=True, text=True, shell=True, cwd=cwd)
@@ -334,6 +344,97 @@ def get_node_agent_tag_from_git():
         exit(1)
         return None
     
+def calculate_resources(node_size, node_count):
+    """Calculates resource requests and limits based on node size, count, and cluster resources."""
+    
+    node_size = node_size or DEFAULT_NODE_SIZE
+    node_count = node_count or DEFAULT_NODE_COUNT
+
+    if node_size not in NODE_SIZES:
+        print(f"Warning: Unknown NODE_SIZE '{node_size}'. Using default '{DEFAULT_NODE_SIZE}'.")
+        node_size = DEFAULT_NODE_SIZE  
+
+    vcpu_per_node = NODE_SIZES[node_size]["vcpu"]
+    memory_per_node_gb = NODE_SIZES[node_size]["memory_gb"]
+
+    # Cluster-wide capacity
+    total_vcpu = vcpu_per_node * node_count
+    total_memory_gb = memory_per_node_gb * node_count
+
+    print(f"Cluster Resources - Nodes: {node_count}, Total vCPU: {total_vcpu}, Total Memory: {total_memory_gb}GB")
+
+    # Get the total number of resources in the cluster
+    total_resources = int(subprocess.run(
+        ['kubectl', 'get', 'all', '-A', '--no-headers'],
+        check=True, capture_output=True, text=True
+    ).stdout.strip().count("\n"))
+
+    # **Node-agent calculations**
+    node_agent_cpu_request = int(0.025 * vcpu_per_node * 1000)
+    node_agent_cpu_limit = int(0.10 * vcpu_per_node * 1000)
+    node_agent_memory_request = int(0.025 * memory_per_node_gb * 1024)
+    node_agent_memory_limit = int(0.10 * memory_per_node_gb * 1024)
+
+    # **Storage component calculations**
+    storage_memory_request = int(0.2 * total_resources)
+    storage_memory_limit = int(0.8 * total_resources)
+
+    # **KubeVuln calculations**
+    largest_image_size_mb = 1000
+    kubevuln_memory_limit = largest_image_size_mb + 400
+
+    config = {
+        "nodeAgent": {
+            "resources": {
+                "requests": {
+                    "cpu": f"{node_agent_cpu_request}m",
+                    "memory": f"{node_agent_memory_request}Mi"
+                },
+                "limits": {
+                    "cpu": f"{node_agent_cpu_limit}m",
+                    "memory": f"{node_agent_memory_limit}Mi"
+                }
+            }
+        },
+        "storage": {
+            "resources": {
+                "requests": {
+                    "memory": f"{storage_memory_request}Mi"
+                },
+                "limits": {
+                    "memory": f"{storage_memory_limit}Mi"
+                }
+            }
+        },
+        "kubevuln": {
+            "resources": {
+                "limits": {
+                    "memory": f"{kubevuln_memory_limit}Mi"
+                }
+            }
+        }
+    }
+
+    return config
+    
+def update_kubescape_helm(node_size, node_count):
+    """Updates the Kubescape deployment using Helm based on cluster specifications."""
+    print("Updating Kubescape configuration...")
+
+    config = calculate_resources(node_size, node_count)
+
+    # Save config
+    with open("kubescape-autoscale.yaml", "w") as file:
+        yaml.dump(config, file, default_flow_style=False)
+
+    # Apply update via Helm
+    run_command(
+        "helm upgrade --install kubescape kubescape/kubescape-operator "
+        "-n kubescape -f kubescape-autoscale.yaml"
+    )
+    print("Kubescape updated with optimized resource allocation.")
+    
+    
 # Step 3: Wait for the cluster to be ready
 def check_cluster_ready(timeout=300):  # Timeout 5 min
     start_time = time.time()  
@@ -439,7 +540,8 @@ def main():
     # Parse command-line arguments
     parser = argparse.ArgumentParser(description="Deploy Kubescape with optional Helm parameters")
     parser.add_argument('-kdr', action='store_true', help="Enable KDR capabilities")
-    parser.add_argument('-nodes', type=int, default=3, help="Number of nodes (default is 3)")
+    parser.add_argument('-nodes', type=int, default=DEFAULT_NODE_COUNT, help="Number of nodes (default is 4)")
+    parser.add_argument('-node_size', type=str, default=DEFAULT_NODE_SIZE, help="Node type (default is s-4vcpu-16gb)")
     parser.add_argument('-account', type=str, required=True, help="Account ID")
     parser.add_argument('-accessKey', type=str, required=True, help="Access key")
     parser.add_argument('-duration', type=int, default=4, help="Duration time in hours (default is 4)")
@@ -501,8 +603,14 @@ def main():
 
     # Step 4: Check if the cluster is ready by polling the node readiness
     check_cluster_ready()
-
-    # Step 5: Check if any pods are in CrashLoopBackOff state
+    
+    # Step 5: Update Kubescape Helm chart with optimized resources
+    optimized_resources = calculate_resources(node_size=args.node_size, node_count=node_count)
+    update_kubescape_helm(node_size=args.node_size, node_count=node_count)
+    print("Kubescape Helm chart updated with optimized resources.")
+    time.sleep(30)  # Wait for the operator
+    
+    # Step 6: Check if any pods are in CrashLoopBackOff state
     print("Checking for pods in CrashLoopBackOff state...")
     check_crashloop_pods(namespace="kubescape")