diff --git a/krkn/cerberus/setup.py b/krkn/cerberus/setup.py index 26efc1523..c20f2a132 100644 --- a/krkn/cerberus/setup.py +++ b/krkn/cerberus/setup.py @@ -2,19 +2,33 @@ import requests import sys import json +from krkn_lib.utils.functions import get_yaml_item_value +check_application_routes = "" +cerberus_url = None +exit_on_failure = False +cerberus_enabled = False -def get_status(config, start_time, end_time): +def set_url(config): + global exit_on_failure + exit_on_failure = get_yaml_item_value(config["kraken"], "exit_on_failure", False) + global cerberus_enabled + cerberus_enabled = get_yaml_item_value(config["cerberus"],"cerberus_enabled", False) + if cerberus_enabled: + global cerberus_url + cerberus_url = get_yaml_item_value(config["cerberus"],"cerberus_url", "") + global check_application_routes + check_application_routes = \ + get_yaml_item_value(config["cerberus"],"check_applicaton_routes","") + +def get_status(start_time, end_time): """ Get cerberus status """ cerberus_status = True check_application_routes = False application_routes_status = True - if config["cerberus"]["cerberus_enabled"]: - cerberus_url = config["cerberus"]["cerberus_url"] - check_application_routes = \ - config["cerberus"]["check_applicaton_routes"] + if cerberus_enabled: if not cerberus_url: logging.error( "url where Cerberus publishes True/False signal " @@ -61,40 +75,38 @@ def get_status(config, start_time, end_time): return cerberus_status -def publish_kraken_status(config, failed_post_scenarios, start_time, end_time): +def publish_kraken_status( start_time, end_time): """ Publish kraken status to cerberus """ - cerberus_status = get_status(config, start_time, end_time) + cerberus_status = get_status(start_time, end_time) if not cerberus_status: - if failed_post_scenarios: - if config["kraken"]["exit_on_failure"]: - logging.info( - "Cerberus status is not healthy and post action scenarios " - "are still failing, exiting kraken run" - ) - sys.exit(1) - else: - logging.info( - "Cerberus status is not healthy and post action scenarios " - "are still failing" - ) + if exit_on_failure: + logging.info( + "Cerberus status is not healthy and post action scenarios " + "are still failing, exiting kraken run" + ) + sys.exit(1) + else: + logging.info( + "Cerberus status is not healthy and post action scenarios " + "are still failing" + ) else: - if failed_post_scenarios: - if config["kraken"]["exit_on_failure"]: - logging.info( - "Cerberus status is healthy but post action scenarios " - "are still failing, exiting kraken run" - ) - sys.exit(1) - else: - logging.info( - "Cerberus status is healthy but post action scenarios " - "are still failing" - ) + if exit_on_failure: + logging.info( + "Cerberus status is healthy but post action scenarios " + "are still failing, exiting kraken run" + ) + sys.exit(1) + else: + logging.info( + "Cerberus status is healthy but post action scenarios " + "are still failing" + ) -def application_status(cerberus_url, start_time, end_time): +def application_status( start_time, end_time): """ Check application availability """ diff --git a/krkn/scenario_plugins/abstract_scenario_plugin.py b/krkn/scenario_plugins/abstract_scenario_plugin.py index 060d9ec36..606ed62a4 100644 --- a/krkn/scenario_plugins/abstract_scenario_plugin.py +++ b/krkn/scenario_plugins/abstract_scenario_plugin.py @@ -4,7 +4,7 @@ from krkn_lib.models.telemetry import ScenarioTelemetry from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift -from krkn import utils +from krkn import utils, cerberus class AbstractScenarioPlugin(ABC): @@ -13,7 +13,6 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: @@ -76,10 +75,10 @@ def run_scenarios( logging.info( f"Running {self.__class__.__name__}: {self.get_scenario_types()} -> {scenario_config}" ) + start_time = int(time.time()) return_value = self.run( run_uuid, scenario_config, - krkn_config, telemetry, scenario_telemetry, ) @@ -110,6 +109,9 @@ def run_scenarios( if scenario_telemetry.exit_status != 0: failed_scenarios.append(scenario_config) scenario_telemetries.append(scenario_telemetry) + end_time = int(time.time()) + cerberus.publish_kraken_status(start_time, end_time) logging.info(f"wating {wait_duration} before running the next scenario") time.sleep(wait_duration) + return failed_scenarios, scenario_telemetries diff --git a/krkn/scenario_plugins/application_outage/application_outage_scenario_plugin.py b/krkn/scenario_plugins/application_outage/application_outage_scenario_plugin.py index e016c2dcb..831fe082c 100644 --- a/krkn/scenario_plugins/application_outage/application_outage_scenario_plugin.py +++ b/krkn/scenario_plugins/application_outage/application_outage_scenario_plugin.py @@ -14,11 +14,9 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: - wait_duration = krkn_config["tunings"]["wait_duration"] try: with open(scenario, "r") as f: app_outage_config_yaml = yaml.full_load(f) @@ -68,14 +66,8 @@ def run( "kraken-deny", namespace ) - logging.info( - "End of scenario. Waiting for the specified duration: %s" - % wait_duration - ) - time.sleep(wait_duration) - end_time = int(time.time()) - cerberus.publish_kraken_status(krkn_config, [], start_time, end_time) + except Exception as e: logging.error( "ApplicationOutageScenarioPlugin exiting due to Exception %s" % e diff --git a/krkn/scenario_plugins/arcaflow/arcaflow_scenario_plugin.py b/krkn/scenario_plugins/arcaflow/arcaflow_scenario_plugin.py index a61cd167c..6c5c53449 100644 --- a/krkn/scenario_plugins/arcaflow/arcaflow_scenario_plugin.py +++ b/krkn/scenario_plugins/arcaflow/arcaflow_scenario_plugin.py @@ -15,7 +15,6 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: diff --git a/krkn/scenario_plugins/container/container_scenario_plugin.py b/krkn/scenario_plugins/container/container_scenario_plugin.py index 9da36d11e..e7af11fdd 100644 --- a/krkn/scenario_plugins/container/container_scenario_plugin.py +++ b/krkn/scenario_plugins/container/container_scenario_plugin.py @@ -18,13 +18,10 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: - start_time = int(time.time()) pool = PodsMonitorPool(lib_telemetry.get_lib_kubernetes()) - wait_duration = krkn_config["tunings"]["wait_duration"] try: with open(scenario, "r") as f: cont_scenario_config = yaml.full_load(f) @@ -44,14 +41,7 @@ def run( ) return 1 scenario_telemetry.affected_pods = result - logging.info("Waiting for the specified duration: %s" % (wait_duration)) - time.sleep(wait_duration) - # capture end time - end_time = int(time.time()) - - # publish cerberus status - cerberus.publish_kraken_status(krkn_config, [], start_time, end_time) except (RuntimeError, Exception): logging.error("ContainerScenarioPlugin exiting due to Exception %s" % e) return 1 diff --git a/krkn/scenario_plugins/managed_cluster/managed_cluster_scenario_plugin.py b/krkn/scenario_plugins/managed_cluster/managed_cluster_scenario_plugin.py index b95238d8b..a895cd6c5 100644 --- a/krkn/scenario_plugins/managed_cluster/managed_cluster_scenario_plugin.py +++ b/krkn/scenario_plugins/managed_cluster/managed_cluster_scenario_plugin.py @@ -7,7 +7,6 @@ from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift from krkn_lib.utils import get_yaml_item_value -from krkn import cerberus, utils from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin from krkn.scenario_plugins.managed_cluster.common_functions import get_managedcluster from krkn.scenario_plugins.managed_cluster.scenarios import Scenarios @@ -18,7 +17,6 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: @@ -38,8 +36,6 @@ def run( managedcluster_scenario_object, lib_telemetry.get_lib_kubernetes(), ) - end_time = int(time.time()) - cerberus.get_status(krkn_config, start_time, end_time) except Exception as e: logging.error( "ManagedClusterScenarioPlugin exiting due to Exception %s" diff --git a/krkn/scenario_plugins/native/native_scenario_plugin.py b/krkn/scenario_plugins/native/native_scenario_plugin.py index 4c4605b7b..58e8dd3ab 100644 --- a/krkn/scenario_plugins/native/native_scenario_plugin.py +++ b/krkn/scenario_plugins/native/native_scenario_plugin.py @@ -13,7 +13,6 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: @@ -29,7 +28,6 @@ def run( PLUGINS.run( scenario, lib_telemetry.get_lib_kubernetes().get_kubeconfig_path(), - krkn_config, run_uuid, ) result = pool.join() @@ -48,7 +46,7 @@ def run( def get_scenario_types(self) -> list[str]: return [ "pod_disruption_scenarios", - "pod_network_scenario", + "pod_network_scenarios", "vmware_node_scenarios", "ibmcloud_node_scenarios", ] diff --git a/krkn/scenario_plugins/native/network/cerberus.py b/krkn/scenario_plugins/native/network/cerberus.py deleted file mode 100644 index d245b865a..000000000 --- a/krkn/scenario_plugins/native/network/cerberus.py +++ /dev/null @@ -1,141 +0,0 @@ -import logging -import requests -import sys -import json - - -def get_status(config, start_time, end_time): - """ - Function to get Cerberus status - - Args: - config - - Kraken config dictionary - - start_time - - The time when chaos is injected - - end_time - - The time when chaos is removed - - Returns: - Cerberus status - """ - - cerberus_status = True - check_application_routes = False - application_routes_status = True - if config["cerberus"]["cerberus_enabled"]: - cerberus_url = config["cerberus"]["cerberus_url"] - check_application_routes = config["cerberus"]["check_applicaton_routes"] - if not cerberus_url: - logging.error("url where Cerberus publishes True/False signal is not provided.") - sys.exit(1) - cerberus_status = requests.get(cerberus_url, timeout=60).content - cerberus_status = True if cerberus_status == b"True" else False - - # Fail if the application routes monitored by cerberus experience downtime during the chaos - if check_application_routes: - application_routes_status, unavailable_routes = application_status(cerberus_url, start_time, end_time) - if not application_routes_status: - logging.error( - "Application routes: %s monitored by cerberus encountered downtime during the run, failing" - % unavailable_routes - ) - else: - logging.info("Application routes being monitored didn't encounter any downtime during the run!") - - if not cerberus_status: - logging.error( - "Received a no-go signal from Cerberus, looks like " - "the cluster is unhealthy. Please check the Cerberus " - "report for more details. Test failed." - ) - - if not application_routes_status or not cerberus_status: - sys.exit(1) - else: - logging.info("Received a go signal from Ceberus, the cluster is healthy. " "Test passed.") - return cerberus_status - - -def publish_kraken_status(config, failed_post_scenarios, start_time, end_time): - """ - Function to publish Kraken status to Cerberus - - Args: - config - - Kraken config dictionary - - failed_post_scenarios - - String containing the failed post scenarios - - start_time - - The time when chaos is injected - - end_time - - The time when chaos is removed - """ - - cerberus_status = get_status(config, start_time, end_time) - if not cerberus_status: - if failed_post_scenarios: - if config["kraken"]["exit_on_failure"]: - logging.info( - "Cerberus status is not healthy and post action scenarios " "are still failing, exiting kraken run" - ) - sys.exit(1) - else: - logging.info("Cerberus status is not healthy and post action scenarios " "are still failing") - else: - if failed_post_scenarios: - if config["kraken"]["exit_on_failure"]: - logging.info( - "Cerberus status is healthy but post action scenarios " "are still failing, exiting kraken run" - ) - sys.exit(1) - else: - logging.info("Cerberus status is healthy but post action scenarios " "are still failing") - - -def application_status(cerberus_url, start_time, end_time): - """ - Function to check application availability - - Args: - cerberus_url - - url where Cerberus publishes True/False signal - - start_time - - The time when chaos is injected - - end_time - - The time when chaos is removed - - Returns: - Application status and failed routes - """ - - if not cerberus_url: - logging.error("url where Cerberus publishes True/False signal is not provided.") - sys.exit(1) - else: - duration = (end_time - start_time) / 60 - url = cerberus_url + "/" + "history" + "?" + "loopback=" + str(duration) - logging.info("Scraping the metrics for the test duration from cerberus url: %s" % url) - try: - failed_routes = [] - status = True - metrics = requests.get(url, timeout=60).content - metrics_json = json.loads(metrics) - for entry in metrics_json["history"]["failures"]: - if entry["component"] == "route": - name = entry["name"] - failed_routes.append(name) - status = False - else: - continue - except Exception as e: - logging.error("Failed to scrape metrics from cerberus API at %s: %s" % (url, e)) - sys.exit(1) - return status, set(failed_routes) diff --git a/krkn/scenario_plugins/native/network/ingress_shaping.py b/krkn/scenario_plugins/native/network/ingress_shaping.py index cf74828f5..186ae1554 100644 --- a/krkn/scenario_plugins/native/network/ingress_shaping.py +++ b/krkn/scenario_plugins/native/network/ingress_shaping.py @@ -8,7 +8,6 @@ from traceback import format_exc from jinja2 import Environment, FileSystemLoader from . import kubernetes_functions as kube_helper -from . import cerberus import typing from arcaflow_plugin_sdk import validation, plugin from kubernetes.client.api.core_v1_api import CoreV1Api as CoreV1Api @@ -116,17 +115,6 @@ class NetworkScenarioConfig: } ) - kraken_config: typing.Optional[str] = field( - default='', - metadata={ - "name": "Kraken Config", - "description": - "Path to the config file of Kraken. " - "Set this field if you wish to publish status onto Cerberus" - } - ) - - @dataclass class NetworkScenarioSuccessOutput: filter_direction: str = field( @@ -833,20 +821,6 @@ def network_chaos(cfg: NetworkScenarioConfig) -> typing.Tuple[ format_exc() ) job_list = [] - publish = False - if cfg.kraken_config: - failed_post_scenarios = "" - try: - with open(cfg.kraken_config, "r") as f: - config = yaml.full_load(f) - except Exception: - logging.error( - "Error reading Kraken config from %s" % cfg.kraken_config - ) - return "error", NetworkScenarioErrorOutput( - format_exc() - ) - publish = True try: if cfg.execution_type == 'parallel': @@ -866,13 +840,6 @@ def network_chaos(cfg: NetworkScenarioConfig) -> typing.Tuple[ start_time = int(time.time()) wait_for_job(batch_cli, job_list[:], cfg.test_duration+100) end_time = int(time.time()) - if publish: - cerberus.publish_kraken_status( - config, - failed_post_scenarios, - start_time, - end_time - ) elif cfg.execution_type == 'serial': create_interfaces = True @@ -897,18 +864,7 @@ def network_chaos(cfg: NetworkScenarioConfig) -> typing.Tuple[ logging.info("Deleting jobs") delete_jobs(cli, batch_cli, job_list[:]) job_list = [] - logging.info( - "Waiting for wait_duration : %ss" % cfg.wait_duration - ) - time.sleep(cfg.wait_duration) - end_time = int(time.time()) - if publish: - cerberus.publish_kraken_status( - config, - failed_post_scenarios, - start_time, - end_time - ) + create_interfaces = False else: diff --git a/krkn/scenario_plugins/native/plugins.py b/krkn/scenario_plugins/native/plugins.py index 34347c0dc..715c5d6a2 100644 --- a/krkn/scenario_plugins/native/plugins.py +++ b/krkn/scenario_plugins/native/plugins.py @@ -54,7 +54,7 @@ def __init__(self, steps: List[PluginStep]): def unserialize_scenario(self, file: str) -> Any: return serialization.load_from_file(abspath(file)) - def run(self, file: str, kubeconfig_path: str, kraken_config: str, run_uuid: str): + def run(self, file: str, kubeconfig_path: str, run_uuid: str): """ Run executes a series of steps """ @@ -98,8 +98,6 @@ def run(self, file: str, kubeconfig_path: str, kraken_config: str, run_uuid: str unserialized_input = step.schema.input.unserialize(entry["config"]) if "kubeconfig_path" in step.schema.input.properties: unserialized_input.kubeconfig_path = kubeconfig_path - if "kraken_config" in step.schema.input.properties: - unserialized_input.kraken_config = kraken_config output_id, output_data = step.schema( params=unserialized_input, run_id=run_uuid ) diff --git a/krkn/scenario_plugins/native/pod_network_outage/cerberus.py b/krkn/scenario_plugins/native/pod_network_outage/cerberus.py deleted file mode 100644 index 1122945e2..000000000 --- a/krkn/scenario_plugins/native/pod_network_outage/cerberus.py +++ /dev/null @@ -1,157 +0,0 @@ -import logging -import requests -import sys -import json - - -def get_status(config, start_time, end_time): - """ - Function to get Cerberus status - - Args: - config - - Kraken config dictionary - - start_time - - The time when chaos is injected - - end_time - - The time when chaos is removed - - Returns: - Cerberus status - """ - - cerberus_status = True - check_application_routes = False - application_routes_status = True - if config["cerberus"]["cerberus_enabled"]: - cerberus_url = config["cerberus"]["cerberus_url"] - check_application_routes = config["cerberus"]["check_applicaton_routes"] - if not cerberus_url: - logging.error( - "url where Cerberus publishes True/False signal is not provided.") - sys.exit(1) - cerberus_status = requests.get(cerberus_url, timeout=60).content - cerberus_status = True if cerberus_status == b"True" else False - - # Fail if the application routes monitored by cerberus experience - # downtime during the chaos - if check_application_routes: - application_routes_status, unavailable_routes = application_status( - cerberus_url, start_time, end_time) - if not application_routes_status: - logging.error( - "Application routes: %s monitored by cerberus encountered downtime during the run, failing" - % unavailable_routes - ) - else: - logging.info( - "Application routes being monitored didn't encounter any downtime during the run!") - - if not cerberus_status: - logging.error( - "Received a no-go signal from Cerberus, looks like " - "the cluster is unhealthy. Please check the Cerberus " - "report for more details. Test failed." - ) - - if not application_routes_status or not cerberus_status: - sys.exit(1) - else: - logging.info( - "Received a go signal from Ceberus, the cluster is healthy. " - "Test passed.") - return cerberus_status - - -def publish_kraken_status(config, failed_post_scenarios, start_time, end_time): - """ - Function to publish Kraken status to Cerberus - - Args: - config - - Kraken config dictionary - - failed_post_scenarios - - String containing the failed post scenarios - - start_time - - The time when chaos is injected - - end_time - - The time when chaos is removed - """ - - cerberus_status = get_status(config, start_time, end_time) - if not cerberus_status: - if failed_post_scenarios: - if config["kraken"]["exit_on_failure"]: - logging.info( - "Cerberus status is not healthy and post action scenarios " "are still failing, exiting kraken run" - ) - sys.exit(1) - else: - logging.info( - "Cerberus status is not healthy and post action scenarios " - "are still failing") - else: - if failed_post_scenarios: - if config["kraken"]["exit_on_failure"]: - logging.info( - "Cerberus status is healthy but post action scenarios " "are still failing, exiting kraken run" - ) - sys.exit(1) - else: - logging.info( - "Cerberus status is healthy but post action scenarios " - "are still failing") - - -def application_status(cerberus_url, start_time, end_time): - """ - Function to check application availability - - Args: - cerberus_url - - url where Cerberus publishes True/False signal - - start_time - - The time when chaos is injected - - end_time - - The time when chaos is removed - - Returns: - Application status and failed routes - """ - - if not cerberus_url: - logging.error( - "url where Cerberus publishes True/False signal is not provided.") - sys.exit(1) - else: - duration = (end_time - start_time) / 60 - url = cerberus_url + "/" + "history" + \ - "?" + "loopback=" + str(duration) - logging.info( - "Scraping the metrics for the test duration from cerberus url: %s" % - url) - try: - failed_routes = [] - status = True - metrics = requests.get(url, timeout=60).content - metrics_json = json.loads(metrics) - for entry in metrics_json["history"]["failures"]: - if entry["component"] == "route": - name = entry["name"] - failed_routes.append(name) - status = False - else: - continue - except Exception as e: - logging.error( - "Failed to scrape metrics from cerberus API at %s: %s" % - (url, e)) - sys.exit(1) - return status, set(failed_routes) diff --git a/krkn/scenario_plugins/native/pod_network_outage/pod_network_outage_plugin.py b/krkn/scenario_plugins/native/pod_network_outage/pod_network_outage_plugin.py index 410a58b7d..a4bd1f8c4 100755 --- a/krkn/scenario_plugins/native/pod_network_outage/pod_network_outage_plugin.py +++ b/krkn/scenario_plugins/native/pod_network_outage/pod_network_outage_plugin.py @@ -15,7 +15,6 @@ from kubernetes import client from kubernetes.client.api.apiextensions_v1_api import ApiextensionsV1Api from kubernetes.client.api.custom_objects_api import CustomObjectsApi -from . import cerberus def get_test_pods( @@ -927,15 +926,6 @@ class InputParams: }, ) - kraken_config: typing.Optional[str] = field( - default=None, - metadata={ - "name": "Kraken Config", - "description": "Path to the config file of Kraken. " - "Set this field if you wish to publish status onto Cerberus", - }, - ) - test_duration: typing.Annotated[typing.Optional[int], validation.min(1)] = field( default=120, metadata={ @@ -1042,17 +1032,6 @@ def pod_outage( job_list = [] publish = False - if params.kraken_config: - failed_post_scenarios = "" - try: - with open(params.kraken_config, "r") as f: - config = yaml.full_load(f) - except Exception: - logging.error("Error reading Kraken config from %s" % - params.kraken_config) - return "error", PodOutageErrorOutput(format_exc()) - publish = True - for i in params.direction: filter_dict[i] = eval(f"params.{i}_ports") @@ -1103,11 +1082,6 @@ def pod_outage( start_time = int(time.time()) logging.info("Waiting for job to finish") wait_for_job(job_list[:], kubecli, params.test_duration + 300) - end_time = int(time.time()) - if publish: - cerberus.publish_kraken_status( - config, failed_post_scenarios, start_time, end_time - ) return "success", PodOutageSuccessOutput( test_pods=pods_list, @@ -1179,15 +1153,6 @@ class EgressParams: }, ) - kraken_config: typing.Optional[str] = field( - default=None, - metadata={ - "name": "Kraken Config", - "description": "Path to the config file of Kraken. " - "Set this field if you wish to publish status onto Cerberus", - }, - ) - test_duration: typing.Annotated[typing.Optional[int], validation.min(1)] = field( default=90, metadata={ @@ -1364,24 +1329,13 @@ def pod_egress_shaping( logging.info("Waiting for wait_duration %s" % params.test_duration) time.sleep(params.test_duration) - end_time = int(time.time()) - if publish: - cerberus.publish_kraken_status( - config, failed_post_scenarios, start_time, end_time - ) if params.execution_type == "parallel": break if params.execution_type == "parallel": logging.info("Waiting for parallel job to finish") - start_time = int(time.time()) wait_for_job(job_list[:], kubecli, params.test_duration + 300) logging.info("Waiting for wait_duration %s" % params.test_duration) time.sleep(params.test_duration) - end_time = int(time.time()) - if publish: - cerberus.publish_kraken_status( - config, failed_post_scenarios, start_time, end_time - ) return "success", PodEgressNetShapingSuccessOutput( test_pods=pods_list, @@ -1452,15 +1406,6 @@ class IngressParams: }, ) - kraken_config: typing.Optional[str] = field( - default=None, - metadata={ - "name": "Kraken Config", - "description": "Path to the config file of Kraken. " - "Set this field if you wish to publish status onto Cerberus", - }, - ) - test_duration: typing.Annotated[typing.Optional[int], validation.min(1)] = field( default=90, metadata={ @@ -1631,17 +1576,12 @@ def pod_ingress_shaping( )) if params.execution_type == "serial": logging.info("Waiting for serial job to finish") - start_time = int(time.time()) wait_for_job(job_list[:], kubecli, params.test_duration + 20) logging.info("Waiting for wait_duration %s" % params.test_duration) time.sleep(params.test_duration) - end_time = int(time.time()) - if publish: - cerberus.publish_kraken_status( - config, failed_post_scenarios, start_time, end_time - ) + if params.execution_type == "parallel": break if params.execution_type == "parallel": @@ -1650,11 +1590,6 @@ def pod_ingress_shaping( wait_for_job(job_list[:], kubecli, params.test_duration + 300) logging.info("Waiting for wait_duration %s" % params.test_duration) time.sleep(params.test_duration) - end_time = int(time.time()) - if publish: - cerberus.publish_kraken_status( - config, failed_post_scenarios, start_time, end_time - ) return "success", PodIngressNetShapingSuccessOutput( test_pods=pods_list, diff --git a/krkn/scenario_plugins/network_chaos/network_chaos_scenario_plugin.py b/krkn/scenario_plugins/network_chaos/network_chaos_scenario_plugin.py index eaa0719fd..48f1f9369 100644 --- a/krkn/scenario_plugins/network_chaos/network_chaos_scenario_plugin.py +++ b/krkn/scenario_plugins/network_chaos/network_chaos_scenario_plugin.py @@ -10,7 +10,7 @@ from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift from krkn_lib.utils import get_yaml_item_value, log_exception -from krkn import cerberus, utils +from krkn import cerberus from krkn.scenario_plugins.node_actions import common_node_functions from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin @@ -20,7 +20,6 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: @@ -122,7 +121,6 @@ def run( end_time = int(time.time()) cerberus.publish_kraken_status( - krkn_config, None, start_time, end_time, @@ -139,7 +137,7 @@ def run( ) end_time = int(time.time()) cerberus.publish_kraken_status( - krkn_config, [], start_time, end_time + [], start_time, end_time ) except Exception as e: logging.error( diff --git a/krkn/scenario_plugins/node_actions/node_actions_scenario_plugin.py b/krkn/scenario_plugins/node_actions/node_actions_scenario_plugin.py index c49afdaff..240eb0cae 100644 --- a/krkn/scenario_plugins/node_actions/node_actions_scenario_plugin.py +++ b/krkn/scenario_plugins/node_actions/node_actions_scenario_plugin.py @@ -28,7 +28,6 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: @@ -49,7 +48,7 @@ def run( lib_telemetry.get_lib_kubernetes(), ) end_time = int(time.time()) - cerberus.get_status(krkn_config, start_time, end_time) + cerberus.get_status(start_time, end_time) except (RuntimeError, Exception) as e: logging.error("Node Actions exiting due to Exception %s" % e) return 1 diff --git a/krkn/scenario_plugins/pvc/pvc_scenario_plugin.py b/krkn/scenario_plugins/pvc/pvc_scenario_plugin.py index d842e955f..efb530170 100644 --- a/krkn/scenario_plugins/pvc/pvc_scenario_plugin.py +++ b/krkn/scenario_plugins/pvc/pvc_scenario_plugin.py @@ -7,9 +7,8 @@ from krkn_lib.k8s import KrknKubernetes from krkn_lib.models.telemetry import ScenarioTelemetry from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift -from krkn_lib.utils import get_yaml_item_value, log_exception +from krkn_lib.utils import get_yaml_item_value -from krkn import cerberus, utils from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin @@ -18,7 +17,6 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: @@ -173,7 +171,6 @@ def run( ) ) - start_time = int(time.time()) # Create temp file in the PVC full_path = "%s/%s" % (str(mount_path), str(file_name)) command = "fallocate -l $((%s*1024)) %s" % ( @@ -233,7 +230,6 @@ def run( ) ) - start_time = int(time.time()) # Create temp file in the PVC full_path = "%s/%s" % (str(mount_path), str(file_name)) command = "fallocate -l $((%s*1024)) %s" % ( @@ -269,8 +265,6 @@ def run( file_size_kb, lib_telemetry.get_lib_kubernetes(), ) - end_time = int(time.time()) - cerberus.publish_kraken_status(krkn_config, [], start_time, end_time) except (RuntimeError, Exception) as e: logging.error("PvcScenarioPlugin exiting due to Exception %s" % e) return 1 diff --git a/krkn/scenario_plugins/service_disruption/service_disruption_scenario_plugin.py b/krkn/scenario_plugins/service_disruption/service_disruption_scenario_plugin.py index 710d0a0c4..f0069677f 100644 --- a/krkn/scenario_plugins/service_disruption/service_disruption_scenario_plugin.py +++ b/krkn/scenario_plugins/service_disruption/service_disruption_scenario_plugin.py @@ -6,9 +6,9 @@ from krkn_lib.k8s import KrknKubernetes from krkn_lib.models.telemetry import ScenarioTelemetry from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift -from krkn_lib.utils import get_yaml_item_value, log_exception +from krkn_lib.utils import get_yaml_item_value -from krkn import cerberus, utils +from krkn import cerberus from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin @@ -17,7 +17,6 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: @@ -116,7 +115,7 @@ def run( end_time = int(time.time()) cerberus.publish_kraken_status( - krkn_config, [], start_time, end_time + [], start_time, end_time ) except (Exception, RuntimeError) as e: logging.error( diff --git a/krkn/scenario_plugins/service_hijacking/service_hijacking_scenario_plugin.py b/krkn/scenario_plugins/service_hijacking/service_hijacking_scenario_plugin.py index 781d36020..98c94ea26 100644 --- a/krkn/scenario_plugins/service_hijacking/service_hijacking_scenario_plugin.py +++ b/krkn/scenario_plugins/service_hijacking/service_hijacking_scenario_plugin.py @@ -12,7 +12,6 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: diff --git a/krkn/scenario_plugins/shut_down/shut_down_scenario_plugin.py b/krkn/scenario_plugins/shut_down/shut_down_scenario_plugin.py index ea915e32c..1d95f0303 100644 --- a/krkn/scenario_plugins/shut_down/shut_down_scenario_plugin.py +++ b/krkn/scenario_plugins/shut_down/shut_down_scenario_plugin.py @@ -7,7 +7,6 @@ from krkn_lib.models.telemetry import ScenarioTelemetry from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift -from krkn import cerberus from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin from krkn.scenario_plugins.node_actions.aws_node_scenarios import AWS from krkn.scenario_plugins.node_actions.az_node_scenarios import Azure @@ -20,7 +19,6 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: @@ -30,12 +28,9 @@ def run( shut_down_config_scenario = shut_down_config_yaml[ "cluster_shut_down_scenario" ] - start_time = int(time.time()) self.cluster_shut_down( shut_down_config_scenario, lib_telemetry.get_lib_kubernetes() ) - end_time = int(time.time()) - cerberus.publish_kraken_status(krkn_config, [], start_time, end_time) return 0 except Exception as e: logging.error( diff --git a/krkn/scenario_plugins/syn_flood/syn_flood_scenario_plugin.py b/krkn/scenario_plugins/syn_flood/syn_flood_scenario_plugin.py index 17e970238..611656165 100644 --- a/krkn/scenario_plugins/syn_flood/syn_flood_scenario_plugin.py +++ b/krkn/scenario_plugins/syn_flood/syn_flood_scenario_plugin.py @@ -14,7 +14,6 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: diff --git a/krkn/scenario_plugins/time_actions/time_actions_scenario_plugin.py b/krkn/scenario_plugins/time_actions/time_actions_scenario_plugin.py index 0ba97d2a8..052873add 100644 --- a/krkn/scenario_plugins/time_actions/time_actions_scenario_plugin.py +++ b/krkn/scenario_plugins/time_actions/time_actions_scenario_plugin.py @@ -11,7 +11,6 @@ from krkn_lib.utils import get_random_string, get_yaml_item_value, log_exception from kubernetes.client import ApiException -from krkn import cerberus, utils from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin @@ -20,7 +19,6 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: @@ -28,7 +26,6 @@ def run( with open(scenario, "r") as f: scenario_config = yaml.full_load(f) for time_scenario in scenario_config["time_scenarios"]: - start_time = int(time.time()) object_type, object_names = self.skew_time( time_scenario, lib_telemetry.get_lib_kubernetes() ) @@ -39,10 +36,6 @@ def run( ) if len(not_reset) > 0: logging.info("Object times were not reset") - end_time = int(time.time()) - cerberus.publish_kraken_status( - krkn_config, not_reset, start_time, end_time - ) except (RuntimeError, Exception): logging.error( f"TimeActionsScenarioPlugin scenario {scenario} failed with exception: {e}" diff --git a/krkn/scenario_plugins/zone_outage/zone_outage_scenario_plugin.py b/krkn/scenario_plugins/zone_outage/zone_outage_scenario_plugin.py index c2a83ee5e..fa9d22952 100644 --- a/krkn/scenario_plugins/zone_outage/zone_outage_scenario_plugin.py +++ b/krkn/scenario_plugins/zone_outage/zone_outage_scenario_plugin.py @@ -6,9 +6,7 @@ from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift from krkn_lib.utils import log_exception -from krkn import utils from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin -from krkn.scenario_plugins.native.network import cerberus from krkn.scenario_plugins.node_actions.aws_node_scenarios import AWS @@ -17,7 +15,6 @@ def run( self, run_uuid: str, scenario: str, - krkn_config: dict[str, any], lib_telemetry: KrknTelemetryOpenshift, scenario_telemetry: ScenarioTelemetry, ) -> int: @@ -41,8 +38,6 @@ def run( ) return 1 - start_time = int(time.time()) - for subnet_id in subnet_ids: logging.info("Targeting subnet_id") network_association_ids = [] @@ -88,8 +83,6 @@ def run( for acl_id in acl_ids_created: cloud_object.delete_network_acl(acl_id) - end_time = int(time.time()) - cerberus.publish_kraken_status(krkn_config, [], start_time, end_time) except (RuntimeError, Exception): logging.error( f"ZoneOutageScenarioPlugin scenario {scenario} failed with exception: {e}" diff --git a/run_kraken.py b/run_kraken.py index ea6ec6988..990d76e60 100644 --- a/run_kraken.py +++ b/run_kraken.py @@ -10,6 +10,7 @@ import uuid import time +from krkn import cerberus from krkn_lib.elastic.krkn_elastic import KrknElastic from krkn_lib.models.elastic import ElasticChaosRunTelemetry from krkn_lib.models.krkn import ChaosRunOutput, ChaosRunAlertSummary @@ -134,6 +135,9 @@ def main(cfg) -> int: return 1 logging.info("Initializing client to talk to the Kubernetes cluster") + # Set Cerberus url if enabled + cerberus.set_url(config) + # Generate uuid for the run if run_uuid: logging.info(