Skip to content

Commit

Permalink
Arcaflow scenarios porting (#142)
Browse files Browse the repository at this point in the history
* get_node_cpu_count

get_node_cpu_count

* test_fix

* hog deployment + tests

* lowered threshold

* test_fix

* test fix

* sleep

* removed removal test not reliable

* added list_schedulable_nodes + tests

* added get_node_resources infos

* deploy_hog refactoring

* removed mandatory selector

* added optional worker in model

* removed useless log

* changed node selector type from dict to str

* changed node selector input

* changed deploy_hog tests

* changed regex match

regex fix

regex fix

regex fix

match fix

* number of nodes option

number of nodes

* linting
  • Loading branch information
tsebastiani authored Jan 31, 2025
1 parent a07496b commit 045db43
Show file tree
Hide file tree
Showing 8 changed files with 503 additions and 2 deletions.
124 changes: 123 additions & 1 deletion src/krkn_lib/k8s/krkn_kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
ServiceHijacking,
Volume,
VolumeMount,
NodeResources,
)
from krkn_lib.models.krkn import HogConfig, HogType
from krkn_lib.models.telemetry import ClusterEvent, NodeInfo, Taint
from krkn_lib.utils import filter_dictionary, get_random_string
from krkn_lib.utils.safe_logger import SafeLogger
Expand Down Expand Up @@ -1181,7 +1183,7 @@ def delete_pod(self, name: str, namespace: str = "default"):
time.sleep(1)
except ApiException as e:
if e.status == 404:
logging.info("Pod already deleted")
return
else:
logging.error("Failed to delete pod %s", str(e))
raise e
Expand Down Expand Up @@ -1860,6 +1862,37 @@ def list_ready_nodes(self, label_selector: str = None) -> list[str]:

return nodes

def list_schedulable_nodes(self, label_selector: str = None) -> list[str]:
"""
Lists all the nodes that do not have `NoSchedule` or `NoExecute` taints
and where pods can be scheduled
:param label_selector: a label selector to filter the nodes
:return: a list of node names
"""
nodes = []
try:
if label_selector:
ret = self.cli.list_node(
pretty=True, label_selector=label_selector
)
else:
ret = self.cli.list_node(pretty=True)
except ApiException as e:
logging.error(
"Exception when calling CoreV1Api->list_node: %s\n", str(e)
)
raise e
for node in ret.items:
if node.spec.taints:
try:
for taint in node.spec.taints:
if taint.effect in ["NoSchedule", "NoExecute"]:
raise Exception
except Exception:
continue
nodes.append(node.metadata.name)
return nodes

# TODO: is the signature correct? the method
# returns a list of nodes and the signature name is `get_node`
def get_node(
Expand Down Expand Up @@ -2020,6 +2053,24 @@ def get_api_resources_by_group(self, group, version):

return None

def get_node_cpu_count(self, node_name: str) -> int:
"""
Returns the number of cpus of a specified node
:param node_name: the name of the node
:return: the number of cpus or 0 if any exception is raised
"""
api_client = self.api_client

if api_client:
try:
v1 = self.cli
node = v1.read_node(node_name)
cpu_capacity = node.status.capacity.get("cpu")
return int(cpu_capacity)
except Exception:
return 0

def get_nodes_infos(self) -> (list[NodeInfo], list[Taint]):
"""
Returns a list of NodeInfo objects
Expand Down Expand Up @@ -3258,3 +3309,74 @@ def deploy_syn_flood(
)

self.create_pod(namespace=namespace, body=pod_body)

def deploy_hog(self, pod_name: str, hog_config: HogConfig):
"""
Deploys a Pod to run the Syn Flood scenario
:param pod_name: The name of the pod that will be deployed
:param hog_config: Hog Configuration
"""
compiled_regex = re.compile(r"^.+=.*$")
has_selector = hog_config.node_selector is not None and bool(
compiled_regex.match(hog_config.node_selector)
)
if has_selector:
node_selector = hog_config.node_selector.split("=")
else:
node_selector = {"", ""}
file_loader = PackageLoader("krkn_lib.k8s", "templates")
env = Environment(loader=file_loader, autoescape=True)
io_volume = {"volumes": [hog_config.io_target_pod_volume]}
yaml_data = yaml.dump(io_volume, default_flow_style=False, indent=2)
pod_template = env.get_template("hog_pod.j2")
pod_body = yaml.safe_load(
pod_template.render(
name=pod_name,
namespace=hog_config.namespace,
hog_type=hog_config.type.value,
hog_type_io=HogType.io.value,
has_selector=has_selector,
node_selector_key=node_selector[0],
node_selector_value=node_selector[1],
image=hog_config.image,
duration=hog_config.duration,
cpu_load_percentage=hog_config.cpu_load_percentage,
cpu_method=hog_config.cpu_method,
io_block_size=hog_config.io_block_size,
io_write_bytes=hog_config.io_write_bytes,
io_target_pod_folder=hog_config.io_target_pod_folder,
io_volume_mount=yaml_data,
memory_vm_bytes=hog_config.memory_vm_bytes,
workers=hog_config.workers,
target_pod_folder=hog_config.io_target_pod_folder,
)
)

self.create_pod(namespace=hog_config.namespace, body=pod_body)

def get_node_resources_info(self, node_name: str) -> NodeResources:
resources = NodeResources()
path_params: dict[str, str] = {}
query_params: list[str] = []
header_params: dict[str, str] = {}
auth_settings = ["BearerToken"]
header_params["Accept"] = self.api_client.select_header_accept(
["application/json"]
)
path = f"/api/v1/nodes/{node_name}/proxy/stats/summary"
(data) = self.api_client.call_api(
path,
"GET",
path_params,
query_params,
header_params,
response_type="str",
auth_settings=auth_settings,
)

json_obj = ast.literal_eval(data[0])
resources.cpu = json_obj["node"]["cpu"]["usageNanoCores"]
resources.memory = json_obj["node"]["memory"]["availableBytes"]
resources.disk_space = json_obj["node"]["fs"]["availableBytes"]
return resources
44 changes: 44 additions & 0 deletions src/krkn_lib/k8s/templates/hog_pod.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
apiVersion: v1
kind: Pod
metadata:
name: {{ name }}
namespace: {{ namespace }}
spec:
{% if has_selector == true %}
nodeSelector:
{{ node_selector_key }}: {{ node_selector_value }}
{% endif %}
{% if hog_type == hog_type_io %}
{{ io_volume_mount | safe | indent(4)}}
{% endif %}
restartPolicy: Never
imagePullPolicy: IfNotPresent
containers:
- name: krkn-hog
image: {{ image }}
securityContext:
privileged: true
{% if hog_type == hog_type_io %}
volumeMounts:
- name: node-volume
mountPath: {{ target_pod_folder }}
{% endif %}
env:
- name: HOG_TYPE
value: '{{ hog_type }}'
- name: LOAD_PERCENTAGE
value: '{{ cpu_load_percentage }}'
- name: CPU_METHOD
value: '{{ cpu_method }}'
- name: HDD_WRITE_SIZE
value: '{{ io_block_size }}'
- name: HDD_BYTES
value: '{{ io_write_bytes }}'
- name: STRESS_PATH
value: '{{ target_pod_folder }}'
- name: VM_BYTES
value: '{{ memory_vm_bytes }}'
- name: WORKERS
value: '{{ workers }}'
- name: DURATION
value: '{{ duration }}'
11 changes: 11 additions & 0 deletions src/krkn_lib/models/k8s/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,3 +381,14 @@ def __init__(
self.namespace = namespace
self.selector = selector
self.config_map_name = config_map_name


class NodeResources:
memory: int
cpu: int
disk_space: int

def __init__(self):
self.memory = 0
self.cpu = 0
self.disk_space = 0
118 changes: 118 additions & 0 deletions src/krkn_lib/models/krkn/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import json
from dataclasses import dataclass
from enum import Enum
from typing import Optional

from krkn_lib.models.telemetry import ChaosRunTelemetry

Expand Down Expand Up @@ -95,3 +97,119 @@ def __init__(self):

def to_json(self) -> str:
return json.dumps(self, default=lambda o: o.__dict__, indent=4)


class HogType(str, Enum):
cpu = "cpu"
memory = "memory"
io = "io"


class HogConfig:
type: HogType
image: str
# cpu hog
cpu_load_percentage: int
cpu_method: str

# io hog
io_block_size: str
io_write_bytes: str
io_target_pod_folder: str
io_target_pod_volume: dict[str, any]

# memory hog
memory_vm_bytes: str

workers: Optional[int]
number_of_nodes: Optional[int]
duration: int
namespace: str
node_selector: str

def __init__(self):
self.type = HogType.cpu
self.image = "quay.io/krkn-chaos/krkn-hog"
self.cpu_load_percentage = 80
self.cpu_method = "all"
self.io_block_size = "1m"
self.io_write_bytes = "10m"
self.io_target_pod_folder = "/hog-data"
self.io_target_pod_volume = {
"hostPath": {"path": "/tmp"},
"name": "node-volume",
}
self.memory_vm_bytes = "10%"
self.workers = None
self.number_of_nodes = None
self.duration = 30
self.namespace = "default"
self.node_selector = ""

@staticmethod
def from_yaml_dict(yaml_dict: dict[str, str]) -> HogConfig:
config = HogConfig()
missing_fields = []
if "hog-type" not in yaml_dict.keys() or not yaml_dict["hog-type"]:
missing_fields.append("hog-type")

if len(missing_fields) > 0:
missing = ",".join(missing_fields)
raise Exception(
f"missing mandatory fields on hog config file: {missing}"
)

config.type = HogType[yaml_dict["hog-type"]]
config.node_selector = yaml_dict["node-selector"]

if "duration" in yaml_dict.keys() and yaml_dict["duration"]:
config.duration = yaml_dict["duration"]
if "namespace" in yaml_dict.keys() and yaml_dict["namespace"]:
config.namespace = yaml_dict["namespace"]
if "workers" in yaml_dict.keys() and yaml_dict["workers"]:
config.workers = yaml_dict["workers"]
if (
"number-of-nodes" in yaml_dict.keys()
and yaml_dict["number-of-nodes"]
):
config.number_of_nodes = yaml_dict["number-of-nodes"]
if "image" in yaml_dict.keys() and yaml_dict["image"]:
config.image = yaml_dict["image"]

if config.type == HogType.cpu:
if (
"cpu-load-percentage" in yaml_dict.keys()
and yaml_dict["cpu-load-percentage"]
):
config.cpu_load_percentage = yaml_dict["cpu-load-percentage"]
if "cpu-method" in yaml_dict.keys() and yaml_dict["cpu-method"]:
config.cpu_method = yaml_dict["cpu-method"]
elif config.type == HogType.io:
if (
"io-block-size" in yaml_dict.keys()
and yaml_dict["io-block-size"]
):
config.io_block_size = yaml_dict["io-block-size"]
if (
"io-write-bytes" in yaml_dict.keys()
and yaml_dict["io-write-bytes"]
):
config.io_write_bytes = yaml_dict["io-write-bytes"]
if (
"io-target-pod-folder" in yaml_dict.keys()
and yaml_dict["io-target-pod-folder"]
):
config.io_target_pod_folder = yaml_dict["io-target-pod-folder"]
if (
"io-target-pod-volume" in yaml_dict.keys()
and yaml_dict["io-target-pod-volume"]
):
config.io_target_pod_volume = yaml_dict["io-target-pod-volume"]
elif config.type == HogType.memory:
if (
"memory-vm-bytes" in yaml_dict.keys()
and yaml_dict["memory-vm-bytes"]
):
config.memory_vm_bytes = yaml_dict["memory-vm-bytes"]

return config
15 changes: 15 additions & 0 deletions src/krkn_lib/tests/test_krkn_kubernetes_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,21 @@ def test_get_nodes_infos(self):
except Exception:
self.fail("failed to deserialize NodeInfo")

def test_get_node_cpu_count(self):
nodes = self.lib_k8s.list_nodes()
node_cpus = self.lib_k8s.get_node_cpu_count(nodes[0])
self.assertGreater(node_cpus, 0)

node_cpus = self.lib_k8s.get_node_cpu_count("does_not_exist")
self.assertEqual(node_cpus, 0)

def test_get_node_resources_infos(self):
nodes = self.lib_k8s.list_nodes()
infos = self.lib_k8s.get_node_resources_info(nodes[0])
self.assertGreater(infos.disk_space, 0)
self.assertGreater(infos.memory, 0)
self.assertGreater(infos.cpu, 0)


if __name__ == "__main__":
unittest.main()
10 changes: 10 additions & 0 deletions src/krkn_lib/tests/test_krkn_kubernetes_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,16 @@ def test_list_namespaces_by_regex(self):
self.assertEqual(len(filtered_ns_ok), 2)
self.assertEqual(len(filtered_ns_fail), 0)

def test_list_schedulable_nodes(self):
schedulable_nodes = self.lib_k8s.list_schedulable_nodes()
self.assertGreater(len(schedulable_nodes), 0)
schedulable_nodes_empty_selector = self.lib_k8s.list_schedulable_nodes(
label_selector=""
)
self.assertEqual(
len(schedulable_nodes), len(schedulable_nodes_empty_selector)
)


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit 045db43

Please sign in to comment.