From c13831b22607064b3c26dbd2588c388fd5723586 Mon Sep 17 00:00:00 2001
From: kooomix <eranm@armosec.io>
Date: Wed, 1 Jan 2025 11:23:58 +0200
Subject: [PATCH 01/11] Add namespace filtering to pod retrieval for
 consistency and safety

---
 tests_scripts/kubernetes/base_k8s.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/tests_scripts/kubernetes/base_k8s.py b/tests_scripts/kubernetes/base_k8s.py
index 4acff51d..78831476 100755
--- a/tests_scripts/kubernetes/base_k8s.py
+++ b/tests_scripts/kubernetes/base_k8s.py
@@ -634,6 +634,9 @@ def get_pods(self, namespace: str = None, name: str = None, include_terminating:
         if pods is None:
             return []
         
+        # Safeguard: Explicit namespace filter
+        pods = [pod for pod in pods if pod.metadata.namespace == namespace]
+
         if name:
             if isinstance(name, str):
                 pods = [pod for pod in pods if name in pod.metadata.name]
@@ -702,9 +705,16 @@ def get_ready_pods(self, namespace, name: str = None):
         """
         :return: list of running pods with all containers ready
         """
-        ready_pods =  list(
-            filter(lambda pod: not any(container.ready is False for container in pod.status.container_statuses or []),
-                   self.get_pods(namespace=namespace, name=name)))
+
+        pods = self.get_pods(namespace=namespace, name=name)
+    
+        # Safeguard: Ensure namespace consistency
+        pods = [pod for pod in pods if pod.metadata.namespace == namespace]
+
+        ready_pods = [
+        pod for pod in pods
+        if all(container.ready for container in (pod.status.container_statuses or []))
+    ]
         return ready_pods
 
     def restart_pods(self, wlid=None, namespace: str = None, name: str = None):

From 13cf8f549e8d64634310974d33e2d1c37c6bc682 Mon Sep 17 00:00:00 2001
From: kooomix <eranm@armosec.io>
Date: Wed, 1 Jan 2025 11:51:34 +0200
Subject: [PATCH 02/11] Disable caching in kubectl pod retrieval for accurate
 cluster state reporting

---
 tests_scripts/kubernetes/base_k8s.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests_scripts/kubernetes/base_k8s.py b/tests_scripts/kubernetes/base_k8s.py
index 78831476..28565e8b 100755
--- a/tests_scripts/kubernetes/base_k8s.py
+++ b/tests_scripts/kubernetes/base_k8s.py
@@ -777,7 +777,7 @@ def verify_running_pods(self, namespace: str, replicas: int = None, name: str =
             running_pods = self.get_ready_pods(namespace=namespace, name=name)
             if comp_operator(len(running_pods), replicas):  # and len(running_pods) == len(total_pods):
                 Logger.logger.info(f"all pods are running after {delta_t} seconds")
-                result = subprocess.run("kubectl get pods -A", timeout=300, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                result = subprocess.run("kubectl get pods -A --cache=false", timeout=300, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                 result = " ".join(result.stdout.splitlines())
                 Logger.logger.info(
                     "cluster state\n"
@@ -793,7 +793,7 @@ def verify_running_pods(self, namespace: str, replicas: int = None, name: str =
                             format(timeout,
                                    KubectlWrapper.convert_workload_to_dict(non_running_pods, f_json=True, indent=2)))
         
-        result = subprocess.run("kubectl get pods -A", timeout=300, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        result = subprocess.run("kubectl get pods -A --cache=false", timeout=300, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         result = " ".join(result.stdout.splitlines())
         Logger.logger.info(
                     "cluster state\n"

From 1a23406403bd08fa3cfe024242e0e44f0b9fbac0 Mon Sep 17 00:00:00 2001
From: kooomix <eranm@armosec.io>
Date: Wed, 1 Jan 2025 12:21:28 +0200
Subject: [PATCH 03/11] Refactor pod state logging in BaseK8S class to display
 detailed pod information for improved cluster state visibility

---
 tests_scripts/kubernetes/base_k8s.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/tests_scripts/kubernetes/base_k8s.py b/tests_scripts/kubernetes/base_k8s.py
index 28565e8b..3685a565 100755
--- a/tests_scripts/kubernetes/base_k8s.py
+++ b/tests_scripts/kubernetes/base_k8s.py
@@ -777,12 +777,11 @@ def verify_running_pods(self, namespace: str, replicas: int = None, name: str =
             running_pods = self.get_ready_pods(namespace=namespace, name=name)
             if comp_operator(len(running_pods), replicas):  # and len(running_pods) == len(total_pods):
                 Logger.logger.info(f"all pods are running after {delta_t} seconds")
-                result = subprocess.run("kubectl get pods -A --cache=false", timeout=300, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-                result = " ".join(result.stdout.splitlines())
-                Logger.logger.info(
-                    "cluster state\n"
-                    f"{result}"
-                )
+                all_pods = self.get_all_pods()
+                Logger.logger.info("cluster states:")
+               # Print the pod details
+                for pod in all_pods.items:
+                    Logger.logger.info(f"Namespace: {pod.metadata.namespace}, Name: {pod.metadata.name}, Status: {pod.status.phase}")
 
                 return
             delta_t = (datetime.now() - start).total_seconds()
@@ -793,12 +792,12 @@ def verify_running_pods(self, namespace: str, replicas: int = None, name: str =
                             format(timeout,
                                    KubectlWrapper.convert_workload_to_dict(non_running_pods, f_json=True, indent=2)))
         
-        result = subprocess.run("kubectl get pods -A --cache=false", timeout=300, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        result = " ".join(result.stdout.splitlines())
-        Logger.logger.info(
-                    "cluster state\n"
-                    f"{result}"
-                )
+        all_pods = self.get_all_pods()
+        Logger.logger.info("cluster states:")
+        # Print the pod details
+        for pod in all_pods.items:
+            Logger.logger.info(f"Namespace: {pod.metadata.namespace}, Name: {pod.metadata.name}, Status: {pod.status.phase}")
+        
         raise Exception("wrong number of pods are running after {} seconds. expected: {}, running: {}, pods:{}"
                         .format(delta_t, replicas, len(running_pods), running_pods))  # , len(total_pods)))
 

From 9c7b0939b80fa53e9ef5c821fe8b4deecbb58fbb Mon Sep 17 00:00:00 2001
From: kooomix <eranm@armosec.io>
Date: Wed, 1 Jan 2025 12:39:31 +0200
Subject: [PATCH 04/11] Add method to retrieve detailed pod information for
 improved logging

---
 tests_scripts/kubernetes/base_k8s.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests_scripts/kubernetes/base_k8s.py b/tests_scripts/kubernetes/base_k8s.py
index 3685a565..0972daef 100755
--- a/tests_scripts/kubernetes/base_k8s.py
+++ b/tests_scripts/kubernetes/base_k8s.py
@@ -620,6 +620,14 @@ def get_nodes(self):
     def get_all_pods(self):
         return self.kubernetes_obj.client_CoreV1Api.list_pod_for_all_namespaces()
 
+    def get_all_pods_printable_details(self):
+        pods = self.get_all_pods()
+        message = ""
+        for pod in pods.items:
+            message += "Pod name: {0}, namespace: {1}, status: {2}\n".format(pod.metadata.name, pod.metadata.namespace,
+                                                                                pod.status.phase)
+        return message
+
     def get_pods(self, namespace: str = None, name: str = None, include_terminating: bool = True, wlid: str = None):
         """
         :return: list of running pods
@@ -777,12 +785,8 @@ def verify_running_pods(self, namespace: str, replicas: int = None, name: str =
             running_pods = self.get_ready_pods(namespace=namespace, name=name)
             if comp_operator(len(running_pods), replicas):  # and len(running_pods) == len(total_pods):
                 Logger.logger.info(f"all pods are running after {delta_t} seconds")
-                all_pods = self.get_all_pods()
-                Logger.logger.info("cluster states:")
-               # Print the pod details
-                for pod in all_pods.items:
-                    Logger.logger.info(f"Namespace: {pod.metadata.namespace}, Name: {pod.metadata.name}, Status: {pod.status.phase}")
-
+                all_pods_message = self.get_all_pods_printable_details()
+                Logger.logger.info(f"cluster states:\n{all_pods_message}")
                 return
             delta_t = (datetime.now() - start).total_seconds()
             time.sleep(10)
@@ -792,12 +796,8 @@ def verify_running_pods(self, namespace: str, replicas: int = None, name: str =
                             format(timeout,
                                    KubectlWrapper.convert_workload_to_dict(non_running_pods, f_json=True, indent=2)))
         
-        all_pods = self.get_all_pods()
-        Logger.logger.info("cluster states:")
-        # Print the pod details
-        for pod in all_pods.items:
-            Logger.logger.info(f"Namespace: {pod.metadata.namespace}, Name: {pod.metadata.name}, Status: {pod.status.phase}")
-        
+        all_pods_message = self.get_all_pods_printable_details()
+        Logger.logger.info(f"cluster states:\n{all_pods_message}")    
         raise Exception("wrong number of pods are running after {} seconds. expected: {}, running: {}, pods:{}"
                         .format(delta_t, replicas, len(running_pods), running_pods))  # , len(total_pods)))
 

From 1d7f887eb9272937823ac0fbd97f15a66c71f295 Mon Sep 17 00:00:00 2001
From: kooomix <eranm@armosec.io>
Date: Wed, 1 Jan 2025 12:45:13 +0200
Subject: [PATCH 05/11] Implement wait mechanism for security risk report
 retrieval in Jira integration

---
 tests_scripts/helm/jira_integration.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests_scripts/helm/jira_integration.py b/tests_scripts/helm/jira_integration.py
index 9450c6c8..f3f16e37 100644
--- a/tests_scripts/helm/jira_integration.py
+++ b/tests_scripts/helm/jira_integration.py
@@ -209,7 +209,15 @@ def get_posture_resource(self):
 
     def create_jira_issue_for_security_risks(self):
         security_risk_id = "R_0011"
-        resource = self.get_security_risks_resource(security_risk_id)
+
+        resource, t = self.wait_for_report(
+            timeout=120, 
+            sleep_interval=10,
+            report_type=self.backend.get_security_risks_list,
+            security_risk_id=security_risk_id,
+        )
+
+        # resource = self.get_security_risks_resource(security_risk_id)
         resourceHash = resource['k8sResourceHash']
 
         Logger.logger.info(f"Create Jira issue for resource {resourceHash} and security_risk_id {security_risk_id}")

From ac1637036180fa69eb3fbb89247a550c0a0b268b Mon Sep 17 00:00:00 2001
From: kooomix <eranm@armosec.io>
Date: Wed, 1 Jan 2025 12:55:12 +0200
Subject: [PATCH 06/11] Update report type in wait_for_report method for
 security risk retrieval

---
 tests_scripts/helm/jira_integration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests_scripts/helm/jira_integration.py b/tests_scripts/helm/jira_integration.py
index f3f16e37..61b4a031 100644
--- a/tests_scripts/helm/jira_integration.py
+++ b/tests_scripts/helm/jira_integration.py
@@ -211,9 +211,9 @@ def create_jira_issue_for_security_risks(self):
         security_risk_id = "R_0011"
 
         resource, t = self.wait_for_report(
+            report_type=self.get_security_risks_resource,
             timeout=120, 
             sleep_interval=10,
-            report_type=self.backend.get_security_risks_list,
             security_risk_id=security_risk_id,
         )
 

From bf0e8178b97fa878f579869674806d81a844af1d Mon Sep 17 00:00:00 2001
From: kooomix <eranm@armosec.io>
Date: Wed, 1 Jan 2025 13:17:32 +0200
Subject: [PATCH 07/11] Increase timeout duration in wait_for_report method for
 enhanced security risk retrieval

---
 tests_scripts/helm/jira_integration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests_scripts/helm/jira_integration.py b/tests_scripts/helm/jira_integration.py
index 61b4a031..f98264e5 100644
--- a/tests_scripts/helm/jira_integration.py
+++ b/tests_scripts/helm/jira_integration.py
@@ -212,7 +212,7 @@ def create_jira_issue_for_security_risks(self):
 
         resource, t = self.wait_for_report(
             report_type=self.get_security_risks_resource,
-            timeout=120, 
+            timeout=220, 
             sleep_interval=10,
             security_risk_id=security_risk_id,
         )

From f398533d7078d4daf250e68df78712b1acf28989 Mon Sep 17 00:00:00 2001
From: kooomix <eranm@armosec.io>
Date: Wed, 1 Jan 2025 13:30:57 +0200
Subject: [PATCH 08/11] Add wait mechanism and trigger posture scan in Jira
 integration for report retrieval

---
 tests_scripts/helm/jira_integration.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tests_scripts/helm/jira_integration.py b/tests_scripts/helm/jira_integration.py
index f98264e5..7667b77e 100644
--- a/tests_scripts/helm/jira_integration.py
+++ b/tests_scripts/helm/jira_integration.py
@@ -144,7 +144,17 @@ def setup_cluster_and_run_posture_scan(self):
             cluster_name=cluster, wait_to_result=True, framework_name="AllControls"
         )
         assert report_guid != "", "report guid is empty"
-        self.report_guid = report_guid
+
+
+        # to make sure kubernetes resources are created
+        time.sleep(20)
+        Logger.logger.info(f"Trigger posture scan")
+        self.backend.trigger_posture_scan(cluster)
+
+        report_guid_new = self.get_report_guid(
+            cluster_name=cluster, wait_to_result=True, framework_name="AllControls", old_report_guid=report_guid
+        )
+        self.report_guid = report_guid_new
         self.namespace = namespace
         self.cluster = cluster
 

From cc0f7cc78c7e9e1c736e282b59fdc23dd81198a8 Mon Sep 17 00:00:00 2001
From: kooomix <eranm@armosec.io>
Date: Wed, 1 Jan 2025 13:57:17 +0200
Subject: [PATCH 09/11] Add rate limiting to Jira API requests and simplify
 issue creation logic

---
 infrastructure/backend_api.py          | 60 ++++++++++++++++++++------
 tests_scripts/helm/jira_integration.py | 16 +------
 2 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/infrastructure/backend_api.py b/infrastructure/backend_api.py
index 0f8974f9..7f180210 100644
--- a/infrastructure/backend_api.py
+++ b/infrastructure/backend_api.py
@@ -1965,6 +1965,26 @@ def post(self, url, **args):
         if not url.startswith("http://") and not url.startswith("https://"):
             url = self.server + url
         return requests.post(url, **args)
+    
+    @deco_cookie
+    def post_with_ratelimit(self, url, **args):
+        rate_limit_retries = 1
+        if "rate_limit_retries" in args:
+            rate_limit_retries = args["rate_limit_retries"]
+            del args["rate_limit_retries"]
+        
+        rate_limit_sleep = 45
+        if "rate_limit_sleep" in args:
+            rate_limit_sleep = args["rate_limit_sleep"]
+            del args["rate_limit_sleep"]
+        
+        for i in range(rate_limit_retries):
+            r = self.post(url, **args)
+            if r.status_code == 429  or "retryAfter".lower() in r.text.lower():
+                Logger.logger.debug(f"Rate limit reached for url: {url}. Retrying in {rate_limit_sleep} seconds")
+                time.sleep(rate_limit_sleep)
+            else:
+                return r
 
     @deco_cookie
     def get(self, url, **args):
@@ -1972,6 +1992,25 @@ def get(self, url, **args):
             url = self.server + url
         return requests.get(url, **args)
 
+    def get_with_rate_limit(self, url, **args):
+        rate_limit_retries = 1
+        if "rate_limit_retries" in args:
+            rate_limit_retries = args["rate_limit_retries"]
+            del args["rate_limit_retries"]
+        
+        rate_limit_sleep = 45
+        if "rate_limit_sleep" in args:
+            rate_limit_sleep = args["rate_limit_sleep"]
+            del args["rate_limit_sleep"]
+        
+        for i in range(rate_limit_retries):
+            r = self.get(url, **args)
+            if r.status_code == 429  or "retryAfter".lower() in r.text.lower():
+                Logger.logger.debug(f"Rate limit reached for url: {url}. Retrying in {rate_limit_sleep} seconds")
+                time.sleep(rate_limit_sleep)
+            else:
+                return r
+
     @deco_cookie
     def put(self, url, **args):
         return requests.put(self.server + url, **args)
@@ -2886,7 +2925,7 @@ def get_integration_status(self, provider: str):
 
     def get_jira_config(self):
         url = API_INTEGRATIONS + "/jira/configV2"
-        r = self.get(url, params={"customerGUID": self.selected_tenant_id})
+        r = self.get_with_rate_limit(url, params={"customerGUID": self.selected_tenant_id})
         assert 200 <= r.status_code < 300, f"{inspect.currentframe().f_code.co_name}, url: '{url}', customer: '{self.customer}' code: {r.status_code}, message: '{r.text}'"
         return r.json()
     
@@ -2909,9 +2948,7 @@ def get_jira_collaboration_guid_by_site_name(self, site_name: str):
 
     def update_jira_config(self, body: dict):
         url = API_INTEGRATIONS + "/jira/configV2"
-        r = self.post(url,
-                      params={"customerGUID": self.selected_tenant_id},
-                      json=body)
+        r = self.post_with_ratelimit(url, params={"customerGUID": self.selected_tenant_id}, json=body)
         if not 200 <= r.status_code < 300:
             raise Exception(
                 'Error accessing smart remediation. Request: results of posture resources highlights "%s" (code: %d, message: %s)' % (
@@ -2919,8 +2956,7 @@ def update_jira_config(self, body: dict):
 
     def search_jira_projects(self, body: dict):
         url = API_INTEGRATIONS + "/jira/projectsV2/search"
-        r = self.post(url, params={"customerGUID": self.customer_guid},
-                      json=body)
+        r = self.post_with_ratelimit(url, params={"customerGUID": self.selected_tenant_id}, json=body)
         if not 200 <= r.status_code < 300:
             raise Exception(
                 'Error accessing dashboard. Request to: %s "%s" (code: %d, message: %s)' % (
@@ -2929,8 +2965,7 @@ def search_jira_projects(self, body: dict):
 
     def search_jira_issue_types(self, body: dict):
         url = API_INTEGRATIONS + "/jira/issueTypesV2/search"
-        r = self.post(url, params={"customerGUID": self.customer_guid},
-                      json=body)
+        r = self.post_with_ratelimit(url, params={"customerGUID": self.selected_tenant_id}, json=body)
         if not 200 <= r.status_code < 300:
             raise Exception(
                 'Error accessing dashboard. Request to: %s "%s" (code: %d, message: %s)' % (
@@ -2939,8 +2974,7 @@ def search_jira_issue_types(self, body: dict):
 
     def search_jira_schema(self, body: dict):
         url = API_INTEGRATIONS + "/jira/issueTypesV2/schema/search"
-        r = self.post(url, params={"customerGUID": self.customer_guid},
-                      json=body)
+        r = self.post_with_ratelimit(url, params={"customerGUID": self.selected_tenant_id}, json=body)
         if not 200 <= r.status_code < 300:
             raise Exception(
                 'Error accessing dashboard. Request to: %s "%s" (code: %d, message: %s)' % (
@@ -2949,8 +2983,7 @@ def search_jira_schema(self, body: dict):
 
     def search_jira_issue_field(self, body: dict):
         url = API_INTEGRATIONS + "jira/issueTypes/fields/search"
-        r = self.post(url, params={"customerGUID": self.customer_guid},
-                      json=body)
+        r = self.post_with_ratelimit(url, params={"customerGUID": self.selected_tenant_id}, json=body)
         if not 200 <= r.status_code < 300:
             raise Exception(
                 'Error accessing dashboard. Request to: %s "%s" (code: %d, message: %s)' % (
@@ -2959,8 +2992,7 @@ def search_jira_issue_field(self, body: dict):
 
     def create_jira_issue(self, body: dict):
         url = API_INTEGRATIONS + "/jira/issueV2"
-        r = self.post(url, params={"customerGUID": self.customer_guid},
-                      json=body)
+        r = self.post_with_ratelimit(url, params={"customerGUID": self.selected_tenant_id}, json=body)
         if not 200 <= r.status_code < 300:
             raise Exception(
                 'Error accessing dashboard. Request to: %s "%s" (code: %d, message: %s)' % (
diff --git a/tests_scripts/helm/jira_integration.py b/tests_scripts/helm/jira_integration.py
index 7667b77e..f65ba89e 100644
--- a/tests_scripts/helm/jira_integration.py
+++ b/tests_scripts/helm/jira_integration.py
@@ -159,20 +159,8 @@ def setup_cluster_and_run_posture_scan(self):
         self.cluster = cluster
 
     def create_jira_issue(self, issue, retries=3, sleep=45):
-        for i in range(retries):
-            Logger.logger.info(f"Create Jira issue attempt {i+1}")
-            try:
-                ticket = self.backend.create_jira_issue(issue)
-                assert ticket, "Jira ticket is empty"
-                return ticket
-            except (Exception, AssertionError) as e:
-                # we can get RetryAfter error, so we will retry
-                if "RetryAfter".lower() in str(e).lower():
-                    Logger.logger.info(f"Jira issue creation failed with RetryAfter, retrying in {sleep} seconds")
-                    time.sleep(sleep)
-                else:
-                    raise e
-
+        return self.backend.create_jira_issue(issue)
+    
 
     def create_jira_issue_for_posture(self):
         resource = self.get_posture_resource()

From 267dd63dfb7279d249b1963950aa9c339af2ea42 Mon Sep 17 00:00:00 2001
From: kooomix <eranm@armosec.io>
Date: Wed, 1 Jan 2025 14:18:09 +0200
Subject: [PATCH 10/11] Increase timeout duration in verify_running_pods method
 for improved reliability

---
 tests_scripts/kubernetes/base_k8s.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/tests_scripts/kubernetes/base_k8s.py b/tests_scripts/kubernetes/base_k8s.py
index 0972daef..1a8cd2f0 100755
--- a/tests_scripts/kubernetes/base_k8s.py
+++ b/tests_scripts/kubernetes/base_k8s.py
@@ -627,6 +627,15 @@ def get_all_pods_printable_details(self):
             message += "Pod name: {0}, namespace: {1}, status: {2}\n".format(pod.metadata.name, pod.metadata.namespace,
                                                                                 pod.status.phase)
         return message
+    
+    def get_all_not_running_pods_describe_details(self):
+        pods = self.get_all_pods()
+        message = ""
+        for pod in pods.items:
+            if pod.status.phase != "Running":
+                message += "Pod name: {0}, namespace: {1}, status: {2}, pod: {3}\n".format(pod.metadata.name, pod.metadata.namespace,
+                                                                                pod.status.phase, pod)
+        return message
 
     def get_pods(self, namespace: str = None, name: str = None, include_terminating: bool = True, wlid: str = None):
         """
@@ -768,7 +777,7 @@ def verify_all_pods_are_running(self, workload, namespace: str, timeout=180):
                                  timeout=timeout)
         return replicas
 
-    def verify_running_pods(self, namespace: str, replicas: int = None, name: str = None, timeout=180,
+    def verify_running_pods(self, namespace: str, replicas: int = None, name: str = None, timeout=220,
                             comp_operator=operator.eq):
         """
         compare number of expected running pods with actually running pods
@@ -797,9 +806,11 @@ def verify_running_pods(self, namespace: str, replicas: int = None, name: str =
                                    KubectlWrapper.convert_workload_to_dict(non_running_pods, f_json=True, indent=2)))
         
         all_pods_message = self.get_all_pods_printable_details()
-        Logger.logger.info(f"cluster states:\n{all_pods_message}")    
-        raise Exception("wrong number of pods are running after {} seconds. expected: {}, running: {}, pods:{}"
-                        .format(delta_t, replicas, len(running_pods), running_pods))  # , len(total_pods)))
+        Logger.logger.info(f"cluster states:\n{all_pods_message}") 
+        not_running_pods_message = self.get_all_not_running_pods_describe_details()   
+        Logger.logger.info(f"not running pods details:\n{not_running_pods_message}")
+        raise Exception("wrong number of pods are running after {} seconds. expected: {}, running: {}"
+                        .format(delta_t, replicas, len(running_pods)))  # , len(total_pods)))
 
     def is_namespace_running(self, namespace):
         for ns in self.kubernetes_obj.client_CoreV1Api.list_namespace().items:

From 70b8144d418d6dae1b01755f56f96f12e8764a8d Mon Sep 17 00:00:00 2001
From: kooomix <eranm@armosec.io>
Date: Wed, 1 Jan 2025 14:55:40 +0200
Subject: [PATCH 11/11] Refactor rate limiting logic in post_with_ratelimit and
 get_with_rate_limit methods for improved clarity and reliability

---
 infrastructure/backend_api.py | 69 ++++++++++++++++++++---------------
 1 file changed, 40 insertions(+), 29 deletions(-)

diff --git a/infrastructure/backend_api.py b/infrastructure/backend_api.py
index 7f180210..96f51759 100644
--- a/infrastructure/backend_api.py
+++ b/infrastructure/backend_api.py
@@ -1966,26 +1966,32 @@ def post(self, url, **args):
             url = self.server + url
         return requests.post(url, **args)
     
-    @deco_cookie
     def post_with_ratelimit(self, url, **args):
-        rate_limit_retries = 1
-        if "rate_limit_retries" in args:
-            rate_limit_retries = args["rate_limit_retries"]
-            del args["rate_limit_retries"]
-        
-        rate_limit_sleep = 45
-        if "rate_limit_sleep" in args:
-            rate_limit_sleep = args["rate_limit_sleep"]
-            del args["rate_limit_sleep"]
-        
-        for i in range(rate_limit_retries):
+        # Extract optional parameters with defaults
+        rate_limit_retries = args.pop("rate_limit_retries", 1)
+        rate_limit_sleep = args.pop("rate_limit_sleep", 45)
+
+        for attempt in range(1, rate_limit_retries + 1):
             r = self.post(url, **args)
-            if r.status_code == 429  or "retryAfter".lower() in r.text.lower():
-                Logger.logger.debug(f"Rate limit reached for url: {url}. Retrying in {rate_limit_sleep} seconds")
-                time.sleep(rate_limit_sleep)
+            
+            # Check for rate limiting in status code or response text
+            if r.status_code == 429 or "retryafter" in r.text.lower():
+                Logger.logger.debug(
+                    f"Rate limit reached for URL: {url}. Attempt {attempt} of {rate_limit_retries}. "
+                    f"Retrying in {rate_limit_sleep} seconds."
+                )
+                if attempt < rate_limit_retries:
+                    time.sleep(rate_limit_sleep)
+                else:
+                    Logger.logger.warning(
+                        f"Rate limit retries exhausted for URL: {url}. Returning last response."
+                    )
             else:
                 return r
 
+        # Return the last response if retries are exhausted
+        return r
+
     @deco_cookie
     def get(self, url, **args):
         if not url.startswith("http://") and not url.startswith("https://"):
@@ -1993,23 +1999,28 @@ def get(self, url, **args):
         return requests.get(url, **args)
 
     def get_with_rate_limit(self, url, **args):
-        rate_limit_retries = 1
-        if "rate_limit_retries" in args:
-            rate_limit_retries = args["rate_limit_retries"]
-            del args["rate_limit_retries"]
-        
-        rate_limit_sleep = 45
-        if "rate_limit_sleep" in args:
-            rate_limit_sleep = args["rate_limit_sleep"]
-            del args["rate_limit_sleep"]
-        
-        for i in range(rate_limit_retries):
+        rate_limit_retries = args.pop("rate_limit_retries", 1)
+        rate_limit_sleep = args.pop("rate_limit_sleep", 45)
+
+        for attempt in range(1, rate_limit_retries + 1):
             r = self.get(url, **args)
-            if r.status_code == 429  or "retryAfter".lower() in r.text.lower():
-                Logger.logger.debug(f"Rate limit reached for url: {url}. Retrying in {rate_limit_sleep} seconds")
-                time.sleep(rate_limit_sleep)
+            
+            if r.status_code == 429 or "retryafter" in r.text.lower():
+                Logger.logger.debug(
+                    f"Rate limit reached for URL: {url}. Attempt {attempt} of {rate_limit_retries}. "
+                    f"Retrying in {rate_limit_sleep} seconds."
+                )
+                if attempt < rate_limit_retries:
+                    time.sleep(rate_limit_sleep)
+                else:
+                    Logger.logger.warning(
+                        f"Rate limit retries exhausted for URL: {url}. Returning last response."
+                    )
             else:
                 return r
+        
+        # Return the last response if retries are exhausted
+        return r
 
     @deco_cookie
     def put(self, url, **args):