Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Common cerberus #710

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 44 additions & 32 deletions krkn/cerberus/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,33 @@
import requests
import sys
import json
from krkn_lib.utils.functions import get_yaml_item_value

check_application_routes = ""
cerberus_url = None
exit_on_failure = False
cerberus_enabled = False

def get_status(config, start_time, end_time):
def set_url(config):
global exit_on_failure
exit_on_failure = get_yaml_item_value(config["kraken"], "exit_on_failure", False)
global cerberus_enabled
cerberus_enabled = get_yaml_item_value(config["cerberus"],"cerberus_enabled", False)
if cerberus_enabled:
global cerberus_url
cerberus_url = get_yaml_item_value(config["cerberus"],"cerberus_url", "")
global check_application_routes
check_application_routes = \
get_yaml_item_value(config["cerberus"],"check_applicaton_routes","")

def get_status(start_time, end_time):
"""
Get cerberus status
"""
cerberus_status = True
check_application_routes = False
application_routes_status = True
if config["cerberus"]["cerberus_enabled"]:
cerberus_url = config["cerberus"]["cerberus_url"]
check_application_routes = \
config["cerberus"]["check_applicaton_routes"]
if cerberus_enabled:
if not cerberus_url:
logging.error(
"url where Cerberus publishes True/False signal "
Expand Down Expand Up @@ -61,40 +75,38 @@ def get_status(config, start_time, end_time):
return cerberus_status


def publish_kraken_status(config, failed_post_scenarios, start_time, end_time):
def publish_kraken_status( start_time, end_time):
"""
Publish kraken status to cerberus
"""
cerberus_status = get_status(config, start_time, end_time)
cerberus_status = get_status(start_time, end_time)
if not cerberus_status:
if failed_post_scenarios:
if config["kraken"]["exit_on_failure"]:
logging.info(
"Cerberus status is not healthy and post action scenarios "
"are still failing, exiting kraken run"
)
sys.exit(1)
else:
logging.info(
"Cerberus status is not healthy and post action scenarios "
"are still failing"
)
if exit_on_failure:
logging.info(
"Cerberus status is not healthy and post action scenarios "
"are still failing, exiting kraken run"
)
sys.exit(1)
else:
logging.info(
"Cerberus status is not healthy and post action scenarios "
"are still failing"
)
else:
if failed_post_scenarios:
if config["kraken"]["exit_on_failure"]:
logging.info(
"Cerberus status is healthy but post action scenarios "
"are still failing, exiting kraken run"
)
sys.exit(1)
else:
logging.info(
"Cerberus status is healthy but post action scenarios "
"are still failing"
)
if exit_on_failure:
logging.info(
"Cerberus status is healthy but post action scenarios "
"are still failing, exiting kraken run"
)
sys.exit(1)
else:
logging.info(
"Cerberus status is healthy but post action scenarios "
"are still failing"
)


def application_status(cerberus_url, start_time, end_time):
def application_status( start_time, end_time):
"""
Check application availability
"""
Expand Down
8 changes: 5 additions & 3 deletions krkn/scenario_plugins/abstract_scenario_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from krkn_lib.models.telemetry import ScenarioTelemetry
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift

from krkn import utils
from krkn import utils, cerberus


class AbstractScenarioPlugin(ABC):
Expand All @@ -13,7 +13,6 @@ def run(
self,
run_uuid: str,
scenario: str,
krkn_config: dict[str, any],
lib_telemetry: KrknTelemetryOpenshift,
scenario_telemetry: ScenarioTelemetry,
) -> int:
Expand Down Expand Up @@ -76,10 +75,10 @@ def run_scenarios(
logging.info(
f"Running {self.__class__.__name__}: {self.get_scenario_types()} -> {scenario_config}"
)
start_time = int(time.time())
return_value = self.run(
run_uuid,
scenario_config,
krkn_config,
telemetry,
scenario_telemetry,
)
Expand Down Expand Up @@ -110,6 +109,9 @@ def run_scenarios(
if scenario_telemetry.exit_status != 0:
failed_scenarios.append(scenario_config)
scenario_telemetries.append(scenario_telemetry)
end_time = int(time.time())
cerberus.publish_kraken_status(start_time, end_time)
logging.info(f"wating {wait_duration} before running the next scenario")
time.sleep(wait_duration)

return failed_scenarios, scenario_telemetries
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@ def run(
self,
run_uuid: str,
scenario: str,
krkn_config: dict[str, any],
lib_telemetry: KrknTelemetryOpenshift,
scenario_telemetry: ScenarioTelemetry,
) -> int:
wait_duration = krkn_config["tunings"]["wait_duration"]
try:
with open(scenario, "r") as f:
app_outage_config_yaml = yaml.full_load(f)
Expand Down Expand Up @@ -68,14 +66,8 @@ def run(
"kraken-deny", namespace
)

logging.info(
"End of scenario. Waiting for the specified duration: %s"
% wait_duration
)
time.sleep(wait_duration)

end_time = int(time.time())
cerberus.publish_kraken_status(krkn_config, [], start_time, end_time)

except Exception as e:
logging.error(
"ApplicationOutageScenarioPlugin exiting due to Exception %s" % e
Expand Down
1 change: 0 additions & 1 deletion krkn/scenario_plugins/arcaflow/arcaflow_scenario_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def run(
self,
run_uuid: str,
scenario: str,
krkn_config: dict[str, any],
lib_telemetry: KrknTelemetryOpenshift,
scenario_telemetry: ScenarioTelemetry,
) -> int:
Expand Down
10 changes: 0 additions & 10 deletions krkn/scenario_plugins/container/container_scenario_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,10 @@ def run(
self,
run_uuid: str,
scenario: str,
krkn_config: dict[str, any],
lib_telemetry: KrknTelemetryOpenshift,
scenario_telemetry: ScenarioTelemetry,
) -> int:
start_time = int(time.time())
pool = PodsMonitorPool(lib_telemetry.get_lib_kubernetes())
wait_duration = krkn_config["tunings"]["wait_duration"]
try:
with open(scenario, "r") as f:
cont_scenario_config = yaml.full_load(f)
Expand All @@ -44,14 +41,7 @@ def run(
)
return 1
scenario_telemetry.affected_pods = result
logging.info("Waiting for the specified duration: %s" % (wait_duration))
time.sleep(wait_duration)

# capture end time
end_time = int(time.time())

# publish cerberus status
cerberus.publish_kraken_status(krkn_config, [], start_time, end_time)
except (RuntimeError, Exception):
logging.error("ContainerScenarioPlugin exiting due to Exception %s" % e)
return 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
from krkn_lib.utils import get_yaml_item_value

from krkn import cerberus, utils
from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin
from krkn.scenario_plugins.managed_cluster.common_functions import get_managedcluster
from krkn.scenario_plugins.managed_cluster.scenarios import Scenarios
Expand All @@ -18,7 +17,6 @@ def run(
self,
run_uuid: str,
scenario: str,
krkn_config: dict[str, any],
lib_telemetry: KrknTelemetryOpenshift,
scenario_telemetry: ScenarioTelemetry,
) -> int:
Expand All @@ -38,8 +36,6 @@ def run(
managedcluster_scenario_object,
lib_telemetry.get_lib_kubernetes(),
)
end_time = int(time.time())
cerberus.get_status(krkn_config, start_time, end_time)
except Exception as e:
logging.error(
"ManagedClusterScenarioPlugin exiting due to Exception %s"
Expand Down
4 changes: 1 addition & 3 deletions krkn/scenario_plugins/native/native_scenario_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ def run(
self,
run_uuid: str,
scenario: str,
krkn_config: dict[str, any],
lib_telemetry: KrknTelemetryOpenshift,
scenario_telemetry: ScenarioTelemetry,
) -> int:
Expand All @@ -29,7 +28,6 @@ def run(
PLUGINS.run(
scenario,
lib_telemetry.get_lib_kubernetes().get_kubeconfig_path(),
krkn_config,
run_uuid,
)
result = pool.join()
Expand All @@ -48,7 +46,7 @@ def run(
def get_scenario_types(self) -> list[str]:
return [
"pod_disruption_scenarios",
"pod_network_scenario",
"pod_network_scenarios",
"vmware_node_scenarios",
"ibmcloud_node_scenarios",
]
Expand Down
Loading
Loading