Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chaos AI updates #652

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion utils/chaos_ai/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ Enhancing Chaos Engineering with AI-assisted fault injection for better resilien

## Generate python package wheel file
```
$ rm -rf build/ constraints.egg-info
$ python3.9 generate_wheel_package.py sdist bdist_wheel
$ cp dist/aichaos-0.0.1-py3-none-any.whl docker/
```
This creates a python package file aichaos-0.0.1-py3-none-any.whl in the dist folder.
This creates a python package file aichaos-0.0.1-py3-none-any.whl in the dist folder.

## Build Image
```
Expand Down
4 changes: 2 additions & 2 deletions utils/chaos_ai/docker/config/yml/chaosGen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ Generate chaos on an application deployed on a cluster.
type: string
default: robot-shop
required: true
description: Namespace to test
description: Comma separated namespaces to test
- name: podlabels
in: formData
type: string
default: service=cart,service=payment
required: true
required: false
description: Pod labels to test
- name: nodelabels
in: formData
Expand Down
8 changes: 5 additions & 3 deletions utils/chaos_ai/docker/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
numpy
pandas
requests
numpy==2.0.0
pandas==2.2.2
requests==2.32.3
Flask==2.2.5
Werkzeug==3.0.3
flasgger==0.9.5
kubernetes==30.1.0
urllib3==2.2.3
7 changes: 7 additions & 0 deletions utils/chaos_ai/docker/swagger_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

# sys.path.append("..")
from src.aichaos_main import AIChaos
import src.utils as utils

app = Flask(__name__)
Swagger(app)
Expand Down Expand Up @@ -54,6 +55,8 @@ def startchaos(self, kubeconfigfile, file_id, params):
params['iterations'] = config_params['iterations']
params['maxfaults'] = config_params['maxfaults']
# faults = [f + ':' + p for f in params['faults'].split(',') for p in params['podlabels'].split(',')]
if params['podlabels'] is None or params['podlabels'] == '':
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @sandeephans pod labels are not being automatically picked up when not defined. Thoughts?

Traceback (most recent call last):
  File "/usr/local/lib/python3.9/threading.py", line 980, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.9/threading.py", line 917, in run
    self._target(*self._args, **self._kwargs)
  File "/app/swagger_api.py", line 58, in startchaos
    if params['podlabels'] is None or params['podlabels'] == '':
KeyError: 'podlabels'

params['podlabels'] = ','.join(utils.get_pods(kubeconfigfile))
faults = []
for f in params['faults'].split(','):
if f in ['pod-delete']:
Expand Down Expand Up @@ -119,6 +122,10 @@ def chaos_gen():
# print('HEADER:', f.headers)
print('[GenerateChaos] reqs:', request.form.to_dict())
# print('[GenerateChaos]', f.filename, datetime.now())
if utils.is_cluster_accessible(kubeconfigfile):
print("Cluster is accessible!")
else:
return 'Cluster not accessible !!!'
thread = threading.Thread(target=sw.startchaos, args=(kubeconfigfile, str(i), request.form.to_dict()))
thread.daemon = True
print(thread.getName())
Expand Down
10 changes: 0 additions & 10 deletions utils/chaos_ai/requirements.txt

This file was deleted.

6 changes: 4 additions & 2 deletions utils/chaos_ai/src/kraken_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def __init__(self, namespace='robot-shop', chaos_dir='../config/',
self.engines = []
self.wait_checks = wait_checks
self.command = command
self.ns_pods = utils.get_namespace_pods(namespace, kubeconfig)

def exp_status(self, engine='engine-cartns3'):
substring_list = ['Waiting for the specified duration','Waiting for wait_duration', 'Step workload started, waiting for response']
Expand Down Expand Up @@ -83,15 +84,16 @@ def inject_faults(self, fault, pod_name):
self.logger.debug('[KRAKEN][INJECT_FAULT] ' + fault + ':' + pod_name)
fault, load = utils.get_load(fault)
engine = 'engine-' + pod_name.replace('=', '-').replace('/','-') + '-' + fault
ns = utils.get_ns_from_pod(self.ns_pods, pod_name)
if fault == 'pod-delete':
cmd = self.command+' run -d -e NAMESPACE='+self.namespace+' -e POD_LABEL='+pod_name+' --name='+engine+' --net=host -v '+self.kubeconfig+':/root/.kube/config:Z quay.io/redhat-chaos/krkn-hub:pod-scenarios >> temp'
cmd = self.command+' run -d -e NAMESPACE='+ns+' -e POD_LABEL='+pod_name+' --name='+engine+' --net=host -v '+self.kubeconfig+':/root/.kube/config:Z quay.io/redhat-chaos/krkn-hub:pod-scenarios >> temp'
elif fault == 'network-chaos':
# 'docker run -e NODE_NAME=minikube-m03 -e DURATION=10 --name=knetwork --net=host -v /home/chaos/.kube/kube-config-raw:/root/.kube/config:Z -d quay.io/redhat-chaos/krkn-hub:network-chaos >> temp'
cmd = self.command+' run -d -e NODE_NAME='+pod_name+' -e DURATION=120 --name='+engine+' --net=host -v '+self.kubeconfig+':/root/.kube/config:Z -d quay.io/redhat-chaos/krkn-hub:network-chaos >> temp'
elif fault == 'node-memory-hog':
cmd = self.command+' run -d -e NODE_NAME='+pod_name+' -e DURATION=120 -e NODES_AFFECTED_PERC=100 --name='+engine+' --net=host -v '+self.kubeconfig+':/root/.kube/config:Z -d quay.io/redhat-chaos/krkn-hub:node-memory-hog >> temp'
elif fault == 'node-cpu-hog':
cmd = self.command+' run -e NODE_SELECTORS='+pod_name+' -e NODE_CPU_PERCENTAGE=100 -e NAMESPACE='+self.namespace+' -e TOTAL_CHAOS_DURATION=120 -e NODE_CPU_CORE=100 --name='+engine+' --net=host -env-host=true -v '+self.kubeconfig+':/root/.kube/config:Z -d quay.io/redhat-chaos/krkn-hub:node-cpu-hog'
cmd = self.command+' run -e NODE_SELECTORS='+pod_name+' -e NODE_CPU_PERCENTAGE=100 -e NAMESPACE='+ns+' -e TOTAL_CHAOS_DURATION=120 -e NODE_CPU_CORE=100 --name='+engine+' --net=host -env-host=true -v '+self.kubeconfig+':/root/.kube/config:Z -d quay.io/redhat-chaos/krkn-hub:node-cpu-hog'
else:
cmd = 'echo'
self.logger.debug('[KRAKEN][INJECT_FAULT] ' + cmd)
Expand Down
91 changes: 91 additions & 0 deletions utils/chaos_ai/src/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import re

import urllib3
from kubernetes import client, config
from kubernetes.client.rest import ApiException


def get_load(fault):
params = re.findall(r'\(.*?\)', fault)
Expand All @@ -8,3 +12,90 @@ def get_load(fault):
load = params[0].strip('()')
fault = fault.strip(params[0])
return fault, load


def is_cluster_accessible(kubeconfig_path: str) -> bool:
try:
config.load_kube_config(config_file=kubeconfig_path)
v1 = client.CoreV1Api()

# Try to list nodes in the cluster
nodes = v1.list_node()
print("#Nodes in Cluster: ", len(nodes.items))

return True
except (FileNotFoundError, ApiException, Exception) as e:
print(f"Cluster is not accessible: {e}")
return False

def get_namespace_pods(namespaces: str, kubeconfig_path: str):
ns_list = namespaces.split(",")
ns_pods = {}
for ns in ns_list:
pods = get_pod_labels(ns, kubeconfig_path)
if len(pods) > 0:
ns_pods[ns] = pods
return ns_pods

def get_ns_from_pod(ns_pods, podlabel):
for ns in ns_pods:
if podlabel in ns:
return ns
return ''

# get all pod labels from a namespace
def get_pod_labels(namespace: str, kubeconfig_path: str = None):
pods = []
try:
if kubeconfig_path:
config.load_kube_config(config_file=kubeconfig_path)
else:
config.load_kube_config() # Load default kubeconfig file

v1 = client.CoreV1Api()

pods = v1.list_namespaced_pod(namespace)

print(f"Pod labels in namespace '{namespace}':")
for pod in pods.items:
print(f"Pod Name: {pod.metadata.name}, Labels: {pod.metadata.labels}")

except FileNotFoundError:
print(f"Kubeconfig file not found at {kubeconfig_path}")
except ApiException as e:
print(f"API exception occurred: {e}")
except urllib3.exceptions.MaxRetryError as e:
print(f"Max retries exceeded: {e}")
except urllib3.exceptions.NewConnectionError as e:
print(f"New connection error: {e}")
except urllib3.exceptions.NameResolutionError as e:
print(f"Name resolution error: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
return pods


def get_pods(kubeconfig_path: str = None):
pods = []
try:
# Load the kubeconfig file
if kubeconfig_path:
config.load_kube_config(config_file=kubeconfig_path)
else:
config.load_kube_config() # Load default kubeconfig file
v1 = client.CoreV1Api()
pods = v1.list_pod_for_all_namespaces()

except Exception as e:
print(f"An unexpected error occurred: {e}")
return pods

# Example usage
namespace = 'default'
kubeconfig_path = '/path/to/your/kubeconfig' # Provide the path to your kubeconfig file, or set to None to use the default
get_pod_labels(namespace, kubeconfig_path)

if __name__ == '__main__':
# print(is_cluster_accessible("~/Downloads/chaos/kraken/kubeconfig"))
print(is_cluster_accessible("~/Downloads/kube-config-raw"))

Loading