From d00d6ec69ea0ceac9deebb09815b2c5e526b974f Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Wed, 5 May 2021 19:23:45 +0530 Subject: [PATCH] Install pre-commit and use GitHub Actions (#94) * added pre-commit and code-cleaning * removed tox and TravisCI --- .github/workflows/ci.yml | 22 ++ .gitignore | 1 - .pre-commit-config.yaml | 30 +++ .travis.yml | 15 -- CI/scenarios/post_action_etcd.yml | 2 +- CI/scenarios/post_action_etcd_example_py.py | 6 +- CI/scenarios/post_action_prometheus.yml | 1 - CI/scenarios/post_action_regex.py | 13 +- README.md | 2 +- ansible/vars/kraken_vars.yml | 2 +- containers/README.md | 6 +- containers/build_own_image-README.md | 2 +- docs/contribute.md | 17 +- docs/litmus_scenarios.md | 19 +- docs/node_scenarios.md | 16 +- docs/pod_scenarios.md | 2 +- docs/time_scenarios.md | 2 +- kraken/invoke/command.py | 6 +- kraken/kubernetes/client.py | 63 +++--- kraken/litmus/common_litmus.py | 60 +++--- .../node_actions/abstract_node_scenarios.py | 13 +- kraken/node_actions/aws_node_scenarios.py | 56 ++--- kraken/node_actions/az_node_scenarios.py | 38 ++-- kraken/node_actions/common_node_functions.py | 24 +-- kraken/node_actions/gcp_node_scenarios.py | 82 ++++---- .../general_cloud_node_scenarios.py | 12 +- .../node_actions/openstack_node_scenarios.py | 36 ++-- kraken/performance_dashboards/setup.py | 2 +- kraken/time_actions/common_time_functions.py | 51 +++-- requirements.txt | 1 - run_kraken.py | 194 +++++++++--------- scenarios/etcd.yml | 2 +- scenarios/node_hog_engine.yaml | 2 +- scenarios/openshift-apiserver.yml | 2 +- scenarios/openshift-kube-apiserver.yml | 2 +- scenarios/post_action_etcd_example_py.py | 6 +- scenarios/post_action_prometheus.yml | 1 - scenarios/post_action_regex.py | 13 +- scenarios/prometheus.yml | 2 +- setup.cfg | 8 - setup.py | 2 +- test-requirements.txt | 2 - tox.ini | 24 --- 43 files changed, 424 insertions(+), 438 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 .pre-commit-config.yaml delete mode 100644 .travis.yml delete mode 100644 test-requirements.txt delete mode 100644 tox.ini diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..08d1bc68 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,22 @@ +name: Code Quality Check + +on: + - push + - pull_request + +jobs: + lint-ci: + runs-on: ubuntu-latest + name: Run pre-commit and install test + steps: + - name: Check out source repository + uses: actions/checkout@v2 + - name: Set up Python environment + uses: actions/setup-python@v1 + with: + python-version: "3.8" + - name: Run pre-commit + uses: pre-commit/action@v2.0.3 + - name: Install Kraken + run: | + python setup.py develop diff --git a/.gitignore b/.gitignore index 2be14f00..08ce389c 100644 --- a/.gitignore +++ b/.gitignore @@ -31,7 +31,6 @@ tags # Unittest and coverage htmlcov/* .coverage -.tox junit.xml coverage.xml .pytest_cache/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..cc6997cf --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,30 @@ +--- +repos: + - repo: git://github.com/Lucas-C/pre-commit-hooks + rev: v1.1.1 + hooks: + - id: remove-tabs + + - repo: git://github.com/pre-commit/pre-commit-hooks + rev: v2.0.0 + hooks: + - id: trailing-whitespace + - id: check-merge-conflict + - id: end-of-file-fixer + - id: check-case-conflict + - id: detect-private-key + - id: check-ast + + - repo: https://github.com/psf/black + rev: 19.10b0 + hooks: + - id: black + args: ['--line-length', '120'] + + - repo: https://gitlab.com/PyCQA/flake8 + rev: '3.7.8' + hooks: + - id: flake8 + additional_dependencies: ['pep8-naming'] + # Ignore all format-related checks as Black takes care of those. + args: ['--ignore', 'E123,E125', '--select', 'E,W,F', '--max-line-length=120'] diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 1f947e4f..00000000 --- a/.travis.yml +++ /dev/null @@ -1,15 +0,0 @@ -dist: xenial - -langauge: python - -python: "3.6.8" - -before_install: - - sudo apt-get update - -install: - - sudo apt install python3-pip - - sudo pip3 install -r test-requirements.txt - - sudo pip3 install tox - -script: tox . diff --git a/CI/scenarios/post_action_etcd.yml b/CI/scenarios/post_action_etcd.yml index 6a472e26..29721ea4 100755 --- a/CI/scenarios/post_action_etcd.yml +++ b/CI/scenarios/post_action_etcd.yml @@ -18,4 +18,4 @@ scenarios: # The actions will be executed in the order specified actions: - checkPodCount: - count: 3 \ No newline at end of file + count: 3 diff --git a/CI/scenarios/post_action_etcd_example_py.py b/CI/scenarios/post_action_etcd_example_py.py index 1d840625..1c7a2cf4 100755 --- a/CI/scenarios/post_action_etcd_example_py.py +++ b/CI/scenarios/post_action_etcd_example_py.py @@ -5,9 +5,9 @@ def run(cmd): try: - output = subprocess.Popen(cmd, shell=True, - universal_newlines=True, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + output = subprocess.Popen( + cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) (out, err) = output.communicate() logging.info("out " + str(out)) except Exception as e: diff --git a/CI/scenarios/post_action_prometheus.yml b/CI/scenarios/post_action_prometheus.yml index ebae7879..76405fb0 100644 --- a/CI/scenarios/post_action_prometheus.yml +++ b/CI/scenarios/post_action_prometheus.yml @@ -19,4 +19,3 @@ scenarios: actions: - checkPodCount: count: 2 - \ No newline at end of file diff --git a/CI/scenarios/post_action_regex.py b/CI/scenarios/post_action_regex.py index ce12ab8c..f7e2ce2e 100755 --- a/CI/scenarios/post_action_regex.py +++ b/CI/scenarios/post_action_regex.py @@ -15,8 +15,11 @@ def list_namespaces(): cli = client.CoreV1Api() ret = cli.list_namespace(pretty=True) except ApiException as e: - logging.error("Exception when calling \ - CoreV1Api->list_namespaced_pod: %s\n" % e) + logging.error( + "Exception when calling \ + CoreV1Api->list_namespaced_pod: %s\n" + % e + ) for namespace in ret.items: namespaces.append(namespace.metadata.name) return namespaces @@ -47,9 +50,9 @@ def check_namespaces(namespaces): def run(cmd): try: - output = subprocess.Popen(cmd, shell=True, - universal_newlines=True, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + output = subprocess.Popen( + cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) (out, err) = output.communicate() except Exception as e: logging.error("Failed to run %s, error: %s" % (cmd, e)) diff --git a/README.md b/README.md index 1c6ff52d..7aeb7baf 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ Monitoring the Kubernetes/OpenShift cluster to observe the impact of Kraken chao - Blog post emphasizing the importance of making Chaos part of Performance and Scale runs to mimic the production environments: https://www.openshift.com/blog/making-chaos-part-of-kubernetes/openshift-performance-and-scalability-tests ### Contributions -We are always looking for more enhancements, fixes to make it better, any contributions are most welcome. Feel free to report or work on the issues filed on github. +We are always looking for more enhancements, fixes to make it better, any contributions are most welcome. Feel free to report or work on the issues filed on github. [More information on how to Contribute](docs/contribute.md) diff --git a/ansible/vars/kraken_vars.yml b/ansible/vars/kraken_vars.yml index f35de533..06af0f6b 100644 --- a/ansible/vars/kraken_vars.yml +++ b/ansible/vars/kraken_vars.yml @@ -25,7 +25,7 @@ scenarios: "{{ lookup('env', 'SCENARIOS')|default('[[scenarios/etcd.yml, scenari exit_on_failure: "{{ lookup('env', 'EXIT_ON_FAILURE')|default(false, true) }}" -# Cerberus enabled by user +# Cerberus enabled by user cerberus_enabled: "{{ lookup('env', 'CERBERUS_ENABLED')|default(false, true) }}" cerberus_url: "{{ lookup('env', 'CERBERUS_URL')|default('', true) }}" diff --git a/containers/README.md b/containers/README.md index fb390325..ca556937 100644 --- a/containers/README.md +++ b/containers/README.md @@ -1,12 +1,12 @@ ### Kraken image -Container image gets automatically built by quay.io at [Kraken image](https://quay.io/repository/openshift-scale/kraken). +Container image gets automatically built by quay.io at [Kraken image](https://quay.io/repository/openshift-scale/kraken). ### Run containerized version Refer [instructions](https://github.com/cloud-bulldozer/kraken/blob/master/docs/installation.md#run-containerized-version) for information on how to run the containerized version of kraken. -### Run Custom Kraken Image +### Run Custom Kraken Image Refer to [instructions](https://github.com/cloud-bulldozer/kraken/blob/master/containers/build_own_image-README.md) for information on how to run a custom containerized version of kraken using podman @@ -25,4 +25,4 @@ To run containerized Kraken as a Kubernetes/OpenShift Deployment, follow these s 8. In Openshift, add privileges to service account and execute `oc adm policy add-scc-to-user privileged -z useroot`. 9. Create a Deployment and a NodePort Service using `kubectl apply -f kraken.yml` -NOTE: It is not recommended to run Kraken internal to the cluster as the pod which is running Kraken might get disrupted. \ No newline at end of file +NOTE: It is not recommended to run Kraken internal to the cluster as the pod which is running Kraken might get disrupted. diff --git a/containers/build_own_image-README.md b/containers/build_own_image-README.md index 6e3cf0c7..c8443a31 100644 --- a/containers/build_own_image-README.md +++ b/containers/build_own_image-README.md @@ -10,4 +10,4 @@ 1. Git clone the Kraken repository using `git clone https://github.com/cloud-bulldozer/kraken.git` on an IBM Power Systems server. 2. Modify the python code and yaml files to address your needs. 3. Execute `podman build -t :latest -f Dockerfile-ppc64le` in the containers directory within kraken to build an image from the Dockerfile for Power. -4. Execute `podman run --detach --name :latest` to start a container based on your new image. \ No newline at end of file +4. Execute `podman run --detach --name :latest` to start a container based on your new image. diff --git a/docs/contribute.md b/docs/contribute.md index 7a97a6d5..fcb8f83f 100644 --- a/docs/contribute.md +++ b/docs/contribute.md @@ -22,15 +22,17 @@ $ git push ``` ## Fix Formatting -You can do this before your first commit but please take a look at the formatting outlined using tox. +Kraken uses [pre-commit](https://pre-commit.com) framework to maintain the code linting and python code styling. +The CI would run the pre-commit check on each pull request. +We encourage our contributors to follow the same pattern, while contributing to the code. -To run: +The pre-commit configuration file is present in the repository `.pre-commit-config.yaml` +It contains the different code styling and linting guide which we use for the application. -```pip install tox ```(if not already installed) +Following command can be used to run the pre-commit: +`pre-commit run --all-files` -```tox``` - -Fix all spacing, import issues and other formatting issues +If pre-commit is not installed in your system, it can be install with : `pip install pre-commit` ## Squash Commits If there are mutliple commits, please rebase/squash multiple commits @@ -50,6 +52,3 @@ Push your rebased commits (you may need to force), then issue your PR. ``` $ git push origin --force ``` - - - diff --git a/docs/litmus_scenarios.md b/docs/litmus_scenarios.md index d3b6ed86..499062dd 100644 --- a/docs/litmus_scenarios.md +++ b/docs/litmus_scenarios.md @@ -1,6 +1,6 @@ ### Litmus Scenarios Kraken consumes [Litmus](https://github.com/litmuschaos/litmus) under the hood for some infrastructure, pod, and node scenarios - + Official Litmus documentation and to read more information on specifics of Litmus resources can be found [here](https://docs.litmuschaos.io/docs/next/getstarted/) @@ -10,32 +10,29 @@ There are 3 custom resources that are created during each Litmus scenario. Below * ChaosExperiment: A resource to group the configuration parameters of a chaos experiment. ChaosExperiment CRs are created by the operator when experiments are invoked by ChaosEngine. * ChaosResult : A resource to hold the results of a chaos-experiment. The Chaos-exporter reads the results and exports the metrics into a configured Prometheus server. -### Understanding Litmus Scenarios +### Understanding Litmus Scenarios To run Litmus scenarios we need to apply 3 different resources/yaml files to our cluster 1. **Chaos experiments** contain the actual chaos details of a scenario i. This is installed automatically by Kraken (does not need to be specified in kraken scenario configuration) - -2. **Service Account**: should be created to allow chaosengine to run experiments in your application namespace. Usually sets just enough permissions to a specific namespace to be able to run the experiment properly + +2. **Service Account**: should be created to allow chaosengine to run experiments in your application namespace. Usually sets just enough permissions to a specific namespace to be able to run the experiment properly i. This can be defined using either a link to a yaml file or a downloaded file in the scenarios folder - -3. **Chaos Engine** connects the application instance to a Chaos Experiment. This is where you define the specifics of your scenario; ie: the node or pod name you want to cause chaos within + +3. **Chaos Engine** connects the application instance to a Chaos Experiment. This is where you define the specifics of your scenario; ie: the node or pod name you want to cause chaos within i. This is a downloaded yaml file in the scenarios folder, full list of scenarios can be found [here](https://hub.litmuschaos.io/) -**NOTE**: By default all chaos experiments will be installed based on the version you give in the config file. +**NOTE**: By default all chaos experiments will be installed based on the version you give in the config file. Adding a new Litmus based scenario is as simple as adding references to 2 new yaml files (the Service Account and Chaos engine files for your scenario ) in the Kraken config. ### Current Scenarios -Following are the start of scenarios for which a chaos scenario config exists today. +Following are the start of scenarios for which a chaos scenario config exists today. Component | Description | Working ------------------------ | ---------------------------------------------------------------------------------------------------| ------------------------- | Node CPU Hog | Chaos scenario that hogs up the CPU on a defined node for a specific amount of time | :heavy_check_mark: | - - - diff --git a/docs/node_scenarios.md b/docs/node_scenarios.md index 864970b2..b3b035e0 100644 --- a/docs/node_scenarios.md +++ b/docs/node_scenarios.md @@ -16,7 +16,7 @@ Following node chaos scenarios are supported: **NOTE**: node_start_scenario, node_stop_scenario, node_stop_start_scenario, node_termination_scenario, node_reboot_scenario and stop_start_kubelet_scenario are supported only on AWS and GCP as of now. -#### AWS +#### AWS **NOTE**: For clusters with AWS make sure [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) is installed and properly [configured](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html) using an AWS account @@ -24,9 +24,9 @@ Following node chaos scenarios are supported: **NOTE**: For clusters with GCP make sure [GCP CLI](https://cloud.google.com/sdk/docs/install#linux) is installed. A google service account is required to give proper authentication to GCP for node actions. See [here](https://cloud.google.com/docs/authentication/getting-started) for how to create a service account. - + **NOTE**: A user with 'resourcemanager.projects.setIamPolicy' permission is required to grant project-level permissions to the service account. - + After creating the service account you'll need to enable the account using the following: ```export GOOGLE_APPLICATION_CREDENTIALS=""``` #### OPENSTACK @@ -47,7 +47,7 @@ You will also need to create a service principal and give it the correct access, To properly run the service principal requires “Azure Active Directory Graph/Application.ReadWrite.OwnedBy” api permission granted and “User Access Administrator” -Before running you'll need to set the following: +Before running you'll need to set the following: 1. Login using ```az login``` 2. ```export AZURE_TENANT_ID=``` @@ -87,15 +87,15 @@ node_scenarios: label_selector: node-role.kubernetes.io/infra instance_kill_count: 1 timeout: 120 - - actions: + - actions: - stop_start_helper_node_scenario # node chaos scenario for helper node - instance_kill_count: 1 - timeout: 120 + instance_kill_count: 1 + timeout: 120 helper_node_ip: # ip address of the helper node service: # check status of the services on the helper node - haproxy - dhcpd - named ssh_private_key: /root/.ssh/id_rsa # ssh key to access the helper node - cloud_type: openstack + cloud_type: openstack ``` diff --git a/docs/pod_scenarios.md b/docs/pod_scenarios.md index 84aa0a05..24e4c5e4 100644 --- a/docs/pod_scenarios.md +++ b/docs/pod_scenarios.md @@ -1,5 +1,5 @@ ### Pod Scenarios -Kraken consumes [Powerfulseal](https://github.com/powerfulseal/powerfulseal) under the hood to run the pod scenarios. +Kraken consumes [Powerfulseal](https://github.com/powerfulseal/powerfulseal) under the hood to run the pod scenarios. #### Pod chaos scenarios diff --git a/docs/time_scenarios.md b/docs/time_scenarios.md index b407f863..a6d80a8d 100644 --- a/docs/time_scenarios.md +++ b/docs/time_scenarios.md @@ -27,4 +27,4 @@ time_scenarios: - action: skew_date object_type: node label_selector: node-role.kubernetes.io/worker -``` \ No newline at end of file +``` diff --git a/kraken/invoke/command.py b/kraken/invoke/command.py index b9d806cd..0803f4a3 100644 --- a/kraken/invoke/command.py +++ b/kraken/invoke/command.py @@ -5,9 +5,9 @@ # Invokes a given command and returns the stdout def invoke(command): try: - output = subprocess.Popen(command, shell=True, - universal_newlines=True, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + output = subprocess.Popen( + command, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) (out, err) = output.communicate() except Exception as e: logging.error("Failed to run %s, error: %s" % (command, e)) diff --git a/kraken/kubernetes/client.py b/kraken/kubernetes/client.py index fe1d4bdf..e5e7dac1 100644 --- a/kraken/kubernetes/client.py +++ b/kraken/kubernetes/client.py @@ -55,8 +55,11 @@ def list_pods(namespace): try: ret = cli.list_namespaced_pod(namespace, pretty=True) except ApiException as e: - logging.error("Exception when calling \ - CoreV1Api->list_namespaced_pod: %s\n" % e) + logging.error( + "Exception when calling \ + CoreV1Api->list_namespaced_pod: %s\n" + % e + ) for pod in ret.items: pods.append(pod.metadata.name) return pods @@ -76,11 +79,18 @@ def get_all_pods(label_selector=None): # Execute command in pod def exec_cmd_in_pod(command, pod_name, namespace): - exec_command = ['bash', '-c', command] + exec_command = ["bash", "-c", command] try: - ret = stream(cli.connect_get_namespaced_pod_exec, pod_name, namespace, - command=exec_command, stderr=True, stdin=False, - stdout=True, tty=False) + ret = stream( + cli.connect_get_namespaced_pod_exec, + pod_name, + namespace, + command=exec_command, + stderr=True, + stdin=False, + stdout=True, + tty=False, + ) except Exception: return False return ret @@ -91,8 +101,11 @@ def get_node_status(node): try: node_info = cli.read_node_status(node, pretty=True) except ApiException as e: - logging.error("Exception when calling \ - CoreV1Api->read_node_status: %s\n" % e) + logging.error( + "Exception when calling \ + CoreV1Api->read_node_status: %s\n" + % e + ) for condition in node_info.status.conditions: if condition.type == "Ready": return condition.status @@ -107,8 +120,11 @@ def monitor_nodes(): try: node_info = cli.read_node_status(node, pretty=True) except ApiException as e: - logging.error("Exception when calling \ - CoreV1Api->read_node_status: %s\n" % e) + logging.error( + "Exception when calling \ + CoreV1Api->read_node_status: %s\n" + % e + ) for condition in node_info.status.conditions: if condition.type == "KernelDeadlock": node_kerneldeadlock_status = condition.status @@ -116,10 +132,7 @@ def monitor_nodes(): node_ready_status = condition.status else: continue - if ( - node_kerneldeadlock_status != "False" # noqa - or node_ready_status != "True" # noqa - ): + if node_kerneldeadlock_status != "False" or node_ready_status != "True": # noqa # noqa notready_nodes.append(node) if len(notready_nodes) != 0: status = False @@ -135,15 +148,15 @@ def monitor_namespace(namespace): notready_pods = [] for pod in pods: try: - pod_info = cli.read_namespaced_pod_status(pod, namespace, - pretty=True) + pod_info = cli.read_namespaced_pod_status(pod, namespace, pretty=True) except ApiException as e: - logging.error("Exception when calling \ - CoreV1Api->read_namespaced_pod_status: %s\n" % e) + logging.error( + "Exception when calling \ + CoreV1Api->read_namespaced_pod_status: %s\n" + % e + ) pod_status = pod_info.status.phase - if pod_status != "Running" \ - and pod_status != "Completed" \ - and pod_status != "Succeeded": + if pod_status != "Running" and pod_status != "Completed" and pod_status != "Succeeded": notready_pods.append(pod) if len(notready_pods) != 0: status = False @@ -154,10 +167,8 @@ def monitor_namespace(namespace): # Monitor component namespace def monitor_component(iteration, component_namespace): - watch_component_status, failed_component_pods = \ - monitor_namespace(component_namespace) - logging.info("Iteration %s: %s: %s" - % (iteration, component_namespace, watch_component_status)) + watch_component_status, failed_component_pods = monitor_namespace(component_namespace) + logging.info("Iteration %s: %s: %s" % (iteration, component_namespace, watch_component_status)) return watch_component_status, failed_component_pods @@ -178,7 +189,7 @@ def find_kraken_node(): runcommand.invoke("kubectl config set-context --current --namespace=" + str(kraken_project)) pod_json_str = runcommand.invoke("kubectl get pods/" + str(kraken_pod_name) + " -o json") pod_json = json.loads(pod_json_str) - node_name = pod_json['spec']['nodeName'] + node_name = pod_json["spec"]["nodeName"] # Reset to the default project runcommand.invoke("kubectl config set-context --current --namespace=default") diff --git a/kraken/litmus/common_litmus.py b/kraken/litmus/common_litmus.py index 155d8ef6..1b2385e2 100644 --- a/kraken/litmus/common_litmus.py +++ b/kraken/litmus/common_litmus.py @@ -6,12 +6,13 @@ # Install litmus and wait until pod is running def install_litmus(version): - runcommand.invoke("kubectl apply -f " - "https://litmuschaos.github.io/litmus/litmus-operator-%s.yaml" % version) + runcommand.invoke("kubectl apply -f " "https://litmuschaos.github.io/litmus/litmus-operator-%s.yaml" % version) - runcommand.invoke("oc patch -n litmus deployment.apps/chaos-operator-ce --type=json --patch ' " - "[ { \"op\": \"add\", \"path\": \"/spec/template/spec/containers/0/env/-\", " - "\"value\": { \"name\": \"ANALYTICS\", \"value\": \"FALSE\" } } ]'") + runcommand.invoke( + "oc patch -n litmus deployment.apps/chaos-operator-ce --type=json --patch ' " + '[ { "op": "add", "path": "/spec/template/spec/containers/0/env/-", ' + '"value": { "name": "ANALYTICS", "value": "FALSE" } } ]\'' + ) runcommand.invoke("oc wait deploy -n litmus chaos-operator-ce --for=condition=Available") @@ -19,14 +20,13 @@ def install_litmus(version): def deploy_all_experiments(version_string): if not version_string.startswith("v"): - logging.error("Incorrect version string for litmus, needs to start with 'v' " - "followed by a number") + logging.error("Incorrect version string for litmus, needs to start with 'v' " "followed by a number") sys.exit(1) version = version_string[1:] - runcommand.invoke("kubectl apply -f " - "https://hub.litmuschaos.io/api/chaos/%s?file=charts/generic/experiments.yaml" - % version) + runcommand.invoke( + "kubectl apply -f " "https://hub.litmuschaos.io/api/chaos/%s?file=charts/generic/experiments.yaml" % version + ) def delete_experiments(): @@ -35,16 +35,18 @@ def delete_experiments(): # Check status of experiment def check_experiment(engine_name, experiment_name, namespace): - chaos_engine = runcommand.invoke("kubectl get chaosengines/%s -n %s -o jsonpath=" - "'{.status.engineStatus}'" % (engine_name, namespace)) + chaos_engine = runcommand.invoke( + "kubectl get chaosengines/%s -n %s -o jsonpath=" "'{.status.engineStatus}'" % (engine_name, namespace) + ) engine_status = chaos_engine.strip() max_tries = 30 engine_counter = 0 while engine_status.lower() != "running" and engine_status.lower() != "completed": time.sleep(10) logging.info("Waiting for engine to start running.") - chaos_engine = runcommand.invoke("kubectl get chaosengines/%s -n %s -o jsonpath=" - "'{.status.engineStatus}'" % (engine_name, namespace)) + chaos_engine = runcommand.invoke( + "kubectl get chaosengines/%s -n %s -o jsonpath=" "'{.status.engineStatus}'" % (engine_name, namespace) + ) engine_status = chaos_engine.strip() if engine_counter >= max_tries: logging.error("Chaos engine took longer than 5 minutes to be running or complete") @@ -57,19 +59,21 @@ def check_experiment(engine_name, experiment_name, namespace): if not chaos_engine: return False - chaos_result = runcommand.invoke("kubectl get chaosresult %s" - "-%s -n %s -o " - "jsonpath='{.status.experimentstatus.verdict}'" - % (engine_name, experiment_name, namespace)) + chaos_result = runcommand.invoke( + "kubectl get chaosresult %s" + "-%s -n %s -o " + "jsonpath='{.status.experimentstatus.verdict}'" % (engine_name, experiment_name, namespace) + ) result_counter = 0 status = chaos_result.strip() while status == "Awaited": logging.info("Waiting for chaos result to finish, sleeping 10 seconds") time.sleep(10) - chaos_result = runcommand.invoke("kubectl get chaosresult %s" - "-%s -n %s -o " - "jsonpath='{.status.experimentstatus.verdict}'" - % (engine_name, experiment_name, namespace)) + chaos_result = runcommand.invoke( + "kubectl get chaosresult %s" + "-%s -n %s -o " + "jsonpath='{.status.experimentstatus.verdict}'" % (engine_name, experiment_name, namespace) + ) status = chaos_result.strip() if result_counter >= max_tries: logging.error("Chaos results took longer than 5 minutes to get a final result") @@ -82,10 +86,11 @@ def check_experiment(engine_name, experiment_name, namespace): if status == "Pass": return True else: - chaos_result = runcommand.invoke("kubectl get chaosresult %s" - "-%s -n %s -o jsonpath=" - "'{.status.experimentstatus.failStep}'" % - (engine_name, experiment_name, namespace)) + chaos_result = runcommand.invoke( + "kubectl get chaosresult %s" + "-%s -n %s -o jsonpath=" + "'{.status.experimentstatus.failStep}'" % (engine_name, experiment_name, namespace) + ) logging.info("Chaos result failed information: " + str(chaos_result)) return False @@ -99,5 +104,4 @@ def delete_chaos(namespace): # Uninstall litmus operator def uninstall_litmus(version): - runcommand.invoke("kubectl delete -f " - "https://litmuschaos.github.io/litmus/litmus-operator-%s.yaml" % version) + runcommand.invoke("kubectl delete -f " "https://litmuschaos.github.io/litmus/litmus-operator-%s.yaml" % version) diff --git a/kraken/node_actions/abstract_node_scenarios.py b/kraken/node_actions/abstract_node_scenarios.py index eb645283..8aee5756 100644 --- a/kraken/node_actions/abstract_node_scenarios.py +++ b/kraken/node_actions/abstract_node_scenarios.py @@ -46,8 +46,9 @@ def stop_kubelet_scenario(self, instance_kill_count, node, timeout): logging.info("The kubelet of the node %s has been stopped" % (node)) logging.info("stop_kubelet_scenario has been successfuly injected!") except Exception as e: - logging.error("Failed to stop the kubelet of the node. Encountered following " - "exception: %s. Test Failed" % (e)) + logging.error( + "Failed to stop the kubelet of the node. Encountered following " "exception: %s. Test Failed" % (e) + ) logging.error("stop_kubelet_scenario injection failed!") sys.exit(1) @@ -64,12 +65,12 @@ def node_crash_scenario(self, instance_kill_count, node, timeout): try: logging.info("Starting node_crash_scenario injection") logging.info("Crashing the node %s" % (node)) - runcommand.invoke("oc debug node/" + node + " -- chroot /host " - "dd if=/dev/urandom of=/proc/sysrq-trigger") + runcommand.invoke( + "oc debug node/" + node + " -- chroot /host " "dd if=/dev/urandom of=/proc/sysrq-trigger" + ) logging.info("node_crash_scenario has been successfuly injected!") except Exception as e: - logging.error("Failed to crash the node. Encountered following exception: %s. " - "Test Failed" % (e)) + logging.error("Failed to crash the node. Encountered following exception: %s. " "Test Failed" % (e)) logging.error("node_crash_scenario injection failed!") sys.exit(1) diff --git a/kraken/node_actions/aws_node_scenarios.py b/kraken/node_actions/aws_node_scenarios.py index 9a4b23c7..51de2036 100644 --- a/kraken/node_actions/aws_node_scenarios.py +++ b/kraken/node_actions/aws_node_scenarios.py @@ -9,56 +9,42 @@ class AWS: def __init__(self): - self.boto_client = boto3.client('ec2') - self.boto_instance = boto3.resource('ec2').Instance('id') + self.boto_client = boto3.client("ec2") + self.boto_instance = boto3.resource("ec2").Instance("id") # Get the instance ID of the node def get_instance_id(self, node): - return self.boto_client.describe_instances( - Filters=[{'Name': 'private-dns-name', 'Values': [node]}] - )['Reservations'][0]['Instances'][0]['InstanceId'] + return self.boto_client.describe_instances(Filters=[{"Name": "private-dns-name", "Values": [node]}])[ + "Reservations" + ][0]["Instances"][0]["InstanceId"] # Start the node instance def start_instances(self, instance_id): - self.boto_client.start_instances( - InstanceIds=[instance_id] - ) + self.boto_client.start_instances(InstanceIds=[instance_id]) # Stop the node instance def stop_instances(self, instance_id): - self.boto_client.stop_instances( - InstanceIds=[instance_id] - ) + self.boto_client.stop_instances(InstanceIds=[instance_id]) # Terminate the node instance def terminate_instances(self, instance_id): - self.boto_client.terminate_instances( - InstanceIds=[instance_id] - ) + self.boto_client.terminate_instances(InstanceIds=[instance_id]) # Reboot the node instance def reboot_instances(self, instance_id): - self.boto_client.reboot_instances( - InstanceIds=[instance_id] - ) + self.boto_client.reboot_instances(InstanceIds=[instance_id]) # Wait until the node instance is running def wait_until_running(self, instance_id): - self.boto_instance.wait_until_running( - InstanceIds=[instance_id] - ) + self.boto_instance.wait_until_running(InstanceIds=[instance_id]) # Wait until the node instance is stopped def wait_until_stopped(self, instance_id): - self.boto_instance.wait_until_stopped( - InstanceIds=[instance_id] - ) + self.boto_instance.wait_until_stopped(InstanceIds=[instance_id]) # Wait until the node instance is terminated def wait_until_terminated(self, instance_id): - self.boto_instance.wait_until_terminated( - InstanceIds=[instance_id] - ) + self.boto_instance.wait_until_terminated(InstanceIds=[instance_id]) class aws_node_scenarios(abstract_node_scenarios): @@ -78,8 +64,9 @@ def node_start_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance ID: %s is in running state" % (instance_id)) logging.info("node_start_scenario has been successfully injected!") except Exception as e: - logging.error("Failed to start node instance. Encountered following " - "exception: %s. Test Failed" % (e)) + logging.error( + "Failed to start node instance. Encountered following " "exception: %s. Test Failed" % (e) + ) logging.error("node_start_scenario injection failed!") sys.exit(1) @@ -95,8 +82,7 @@ def node_stop_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance ID: %s is in stopped state" % (instance_id)) nodeaction.wait_for_unknown_status(node, timeout) except Exception as e: - logging.error("Failed to stop node instance. Encountered following exception: %s. " - "Test Failed" % (e)) + logging.error("Failed to stop node instance. Encountered following exception: %s. " "Test Failed" % (e)) logging.error("node_stop_scenario injection failed!") sys.exit(1) @@ -118,8 +104,9 @@ def node_termination_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance ID: %s has been terminated" % (instance_id)) logging.info("node_termination_scenario has been successfuly injected!") except Exception as e: - logging.error("Failed to terminate node instance. Encountered following exception:" - " %s. Test Failed" % (e)) + logging.error( + "Failed to terminate node instance. Encountered following exception:" " %s. Test Failed" % (e) + ) logging.error("node_termination_scenario injection failed!") sys.exit(1) @@ -136,7 +123,8 @@ def node_reboot_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance ID: %s has been rebooted" % (instance_id)) logging.info("node_reboot_scenario has been successfuly injected!") except Exception as e: - logging.error("Failed to reboot node instance. Encountered following exception:" - " %s. Test Failed" % (e)) + logging.error( + "Failed to reboot node instance. Encountered following exception:" " %s. Test Failed" % (e) + ) logging.error("node_reboot_scenario injection failed!") sys.exit(1) diff --git a/kraken/node_actions/az_node_scenarios.py b/kraken/node_actions/az_node_scenarios.py index f873d48e..e5c1551b 100644 --- a/kraken/node_actions/az_node_scenarios.py +++ b/kraken/node_actions/az_node_scenarios.py @@ -12,13 +12,13 @@ class Azure: def __init__(self): - logging.info('azure ' + str(self)) + logging.info("azure " + str(self)) # Acquire a credential object using CLI-based authentication. credentials = DefaultAzureCredential() logging.info("credential " + str(credentials)) az_account = runcommand.invoke("az account list -o yaml") az_account_yaml = yaml.load(az_account, Loader=yaml.FullLoader) - subscription_id = az_account_yaml[0]['id'] + subscription_id = az_account_yaml[0]["id"] self.compute_client = ComputeManagementClient(credentials, subscription_id) # Get the instance ID of the node @@ -49,8 +49,7 @@ def reboot_instances(self, group_name, vm_name): self.compute_client.virtual_machines.begin_restart(group_name, vm_name) def get_vm_status(self, resource_group, vm_name): - statuses = self.compute_client.virtual_machines.instance_view(resource_group, vm_name)\ - .statuses + statuses = self.compute_client.virtual_machines.instance_view(resource_group, vm_name).statuses status = len(statuses) >= 2 and statuses[1] return status @@ -58,7 +57,7 @@ def get_vm_status(self, resource_group, vm_name): def wait_until_running(self, resource_group, vm_name, timeout): time_counter = 0 status = self.get_vm_status(resource_group, vm_name) - while status and status.code != 'PowerState/running': + while status and status.code != "PowerState/running": status = self.get_vm_status(resource_group, vm_name) logging.info("Vm %s is still not running, sleeping for 5 seconds" % vm_name) time.sleep(5) @@ -71,7 +70,7 @@ def wait_until_running(self, resource_group, vm_name, timeout): def wait_until_stopped(self, resource_group, vm_name, timeout): time_counter = 0 status = self.get_vm_status(resource_group, vm_name) - while status and status.code != 'PowerState/stopped': + while status and status.code != "PowerState/stopped": status = self.get_vm_status(resource_group, vm_name) logging.info("Vm %s is still stopping, sleeping for 5 seconds" % vm_name) time.sleep(5) @@ -82,13 +81,11 @@ def wait_until_stopped(self, resource_group, vm_name, timeout): # Wait until the node instance is terminated def wait_until_terminated(self, resource_group, vm_name): - statuses = self.compute_client.virtual_machines.instance_view(resource_group, - vm_name).statuses[0] + statuses = self.compute_client.virtual_machines.instance_view(resource_group, vm_name).statuses[0] logging.info("vm status " + str(statuses)) while statuses.code == "ProvisioningState/deleting": try: - statuses = self.compute_client.virtual_machines.instance_view(resource_group, - vm_name).statuses[0] + statuses = self.compute_client.virtual_machines.instance_view(resource_group, vm_name).statuses[0] logging.info("Vm %s is still deleting, waiting 10 seconds" % vm_name) time.sleep(10) except Exception: @@ -114,8 +111,9 @@ def node_start_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance ID: %s is in running state" % node) logging.info("node_start_scenario has been successfully injected!") except Exception as e: - logging.error("Failed to start node instance. Encountered following " - "exception: %s. Test Failed" % (e)) + logging.error( + "Failed to start node instance. Encountered following " "exception: %s. Test Failed" % (e) + ) logging.error("node_start_scenario injection failed!") sys.exit(1) @@ -131,8 +129,7 @@ def node_stop_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance ID: %s is in stopped state" % node) nodeaction.wait_for_unknown_status(node, timeout) except Exception as e: - logging.error("Failed to stop node instance. Encountered following exception: %s. " - "Test Failed" % e) + logging.error("Failed to stop node instance. Encountered following exception: %s. " "Test Failed" % e) logging.error("node_stop_scenario injection failed!") sys.exit(1) @@ -142,8 +139,7 @@ def node_termination_scenario(self, instance_kill_count, node, timeout): try: logging.info("Starting node_termination_scenario injection") resource_group = self.azure.get_instance_id(node) - logging.info("Terminating the node %s with instance ID: %s " - % (node, resource_group)) + logging.info("Terminating the node %s with instance ID: %s " % (node, resource_group)) self.azure.terminate_instances(resource_group, node) self.azure.wait_until_terminated(resource_group, node) for _ in range(timeout): @@ -155,8 +151,9 @@ def node_termination_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance ID: %s has been terminated" % node) logging.info("node_termination_scenario has been successfully injected!") except Exception as e: - logging.error("Failed to terminate node instance. Encountered following exception:" - " %s. Test Failed" % (e)) + logging.error( + "Failed to terminate node instance. Encountered following exception:" " %s. Test Failed" % (e) + ) logging.error("node_termination_scenario injection failed!") sys.exit(1) @@ -173,7 +170,8 @@ def node_reboot_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance ID: %s has been rebooted" % (node)) logging.info("node_reboot_scenario has been successfully injected!") except Exception as e: - logging.error("Failed to reboot node instance. Encountered following exception:" - " %s. Test Failed" % (e)) + logging.error( + "Failed to reboot node instance. Encountered following exception:" " %s. Test Failed" % (e) + ) logging.error("node_reboot_scenario injection failed!") sys.exit(1) diff --git a/kraken/node_actions/common_node_functions.py b/kraken/node_actions/common_node_functions.py index 37bd573a..21da8d48 100644 --- a/kraken/node_actions/common_node_functions.py +++ b/kraken/node_actions/common_node_functions.py @@ -11,8 +11,7 @@ def get_node(node_name, label_selector): if node_name in kubecli.list_killable_nodes(): return node_name else: - logging.info("Node with provided node_name does not exist or the node might " - "be in NotReady state.") + logging.info("Node with provided node_name does not exist or the node might " "be in NotReady state.") nodes = kubecli.list_killable_nodes(label_selector) if not nodes: raise Exception("Ready nodes with the provided label selector do not exist") @@ -24,8 +23,7 @@ def get_node(node_name, label_selector): # Wait till node status becomes Ready def wait_for_ready_status(node, timeout): - runcommand.invoke("kubectl wait --for=condition=Ready " - "node/" + node + " --timeout=" + str(timeout) + "s") + runcommand.invoke("kubectl wait --for=condition=Ready " "node/" + node + " --timeout=" + str(timeout) + "s") # Wait till node status becomes NotReady @@ -40,9 +38,9 @@ def wait_for_unknown_status(node, timeout): # Get the ip of the cluster node def get_node_ip(node): - return runcommand.invoke("kubectl get node %s -o " - "jsonpath='{.status.addresses[?(@.type==\"InternalIP\")].address}'" - % (node)) + return runcommand.invoke( + "kubectl get node %s -o " "jsonpath='{.status.addresses[?(@.type==\"InternalIP\")].address}'" % (node) + ) def check_service_status(node, service, ssh_private_key, timeout): @@ -55,18 +53,20 @@ def check_service_status(node, service, ssh_private_key, timeout): time.sleep(sleeper) i += sleeper logging.info("Trying to ssh to instance: %s" % (node)) - connection = ssh.connect(node, username='root', key_filename=ssh_private_key, - timeout=800, banner_timeout=400) + connection = ssh.connect( + node, username="root", key_filename=ssh_private_key, timeout=800, banner_timeout=400 + ) if connection is None: break except Exception: pass for service_name in service: logging.info("Checking status of Service: %s" % (service_name)) - stdin, stdout, stderr = ssh.exec_command("systemctl status %s | grep '^ Active' " - "| awk '{print $2}'" % (service_name)) + stdin, stdout, stderr = ssh.exec_command( + "systemctl status %s | grep '^ Active' " "| awk '{print $2}'" % (service_name) + ) service_status = stdout.readlines()[0] logging.info("Status of service %s is %s \n" % (service_name, service_status.strip())) - if(service_status.strip() != "active"): + if service_status.strip() != "active": logging.error("Service %s is in %s state" % (service_name, service_status.strip())) ssh.close() diff --git a/kraken/node_actions/gcp_node_scenarios.py b/kraken/node_actions/gcp_node_scenarios.py index 9bc2b432..14a9661a 100644 --- a/kraken/node_actions/gcp_node_scenarios.py +++ b/kraken/node_actions/gcp_node_scenarios.py @@ -12,58 +12,50 @@ class GCP: def __init__(self): - self.project = runcommand.invoke('gcloud config get-value project').split('/n')[0].strip() + self.project = runcommand.invoke("gcloud config get-value project").split("/n")[0].strip() logging.info("project " + str(self.project) + "!") credentials = GoogleCredentials.get_application_default() - self.client = discovery.build('compute', 'v1', credentials=credentials, - cache_discovery=False) + self.client = discovery.build("compute", "v1", credentials=credentials, cache_discovery=False) # Get the instance ID of the node def get_instance_id(self, node): zone_request = self.client.zones().list(project=self.project) while zone_request is not None: zone_response = zone_request.execute() - for zone in zone_response['items']: - instances_request = self.client.instances().list(project=self.project, - zone=zone['name']) + for zone in zone_response["items"]: + instances_request = self.client.instances().list(project=self.project, zone=zone["name"]) while instances_request is not None: instance_response = instances_request.execute() if "items" in instance_response.keys(): - for instance in instance_response['items']: - if instance['name'] in node: - return instance['name'], zone['name'] + for instance in instance_response["items"]: + if instance["name"] in node: + return instance["name"], zone["name"] instances_request = self.client.zones().list_next( - previous_request=instances_request, - previous_response=instance_response) - zone_request = self.client.zones().list_next(previous_request=zone_request, - previous_response=zone_response) - logging.info('no instances ') + previous_request=instances_request, previous_response=instance_response + ) + zone_request = self.client.zones().list_next(previous_request=zone_request, previous_response=zone_response) + logging.info("no instances ") # Start the node instance def start_instances(self, zone, instance_id): - self.client.instances().start(project=self.project, zone=zone, instance=instance_id)\ - .execute() + self.client.instances().start(project=self.project, zone=zone, instance=instance_id).execute() # Stop the node instance def stop_instances(self, zone, instance_id): - self.client.instances().stop(project=self.project, zone=zone, instance=instance_id)\ - .execute() + self.client.instances().stop(project=self.project, zone=zone, instance=instance_id).execute() # Start the node instance def suspend_instances(self, zone, instance_id): - self.client.instances().suspend(project=self.project, zone=zone, instance=instance_id)\ - .execute() + self.client.instances().suspend(project=self.project, zone=zone, instance=instance_id).execute() # Terminate the node instance def terminate_instances(self, zone, instance_id): - self.client.instances().delete(project=self.project, zone=zone, instance=instance_id)\ - .execute() + self.client.instances().delete(project=self.project, zone=zone, instance=instance_id).execute() # Reboot the node instance def reboot_instances(self, zone, instance_id): - response = self.client.instances().reset(project=self.project, zone=zone, - instance=instance_id).execute() - logging.info('response reboot ' + str(response)) + response = self.client.instances().reset(project=self.project, zone=zone, instance=instance_id).execute() + logging.info("response reboot " + str(response)) # Get instance status def get_instance_status(self, zone, instance_id, expected_status, timeout): @@ -72,10 +64,9 @@ def get_instance_status(self, zone, instance_id, expected_status, timeout): i = 0 sleeper = 5 while i <= timeout: - instStatus = self.client.instances().get(project=self.project, zone=zone, - instance=instance_id).execute() - logging.info("Status of vm " + str(instStatus['status'])) - if instStatus['status'] == expected_status: + instStatus = self.client.instances().get(project=self.project, zone=zone, instance=instance_id).execute() + logging.info("Status of vm " + str(instStatus["status"])) + if instStatus["status"] == expected_status: return True time.sleep(sleeper) i += sleeper @@ -83,15 +74,15 @@ def get_instance_status(self, zone, instance_id, expected_status, timeout): # Wait until the node instance is suspended def wait_until_suspended(self, zone, instance_id, timeout): - self.get_instance_status(zone, instance_id, 'SUSPENDED', timeout) + self.get_instance_status(zone, instance_id, "SUSPENDED", timeout) # Wait until the node instance is running def wait_until_running(self, zone, instance_id, timeout): - self.get_instance_status(zone, instance_id, 'RUNNING', timeout) + self.get_instance_status(zone, instance_id, "RUNNING", timeout) # Wait until the node instance is stopped def wait_until_stopped(self, zone, instance_id, timeout): - self.get_instance_status(zone, instance_id, 'TERMINATED', timeout) + self.get_instance_status(zone, instance_id, "TERMINATED", timeout) # Wait until the node instance is terminated def wait_until_terminated(self, zone, instance_id, timeout): @@ -99,9 +90,10 @@ def wait_until_terminated(self, zone, instance_id, timeout): i = 0 sleeper = 5 while i <= timeout: - instStatus = self.client.instances().get(project=self.project, zone=zone, - instance=instance_id).execute() - logging.info("Status of vm " + str(instStatus['status'])) + instStatus = ( + self.client.instances().get(project=self.project, zone=zone, instance=instance_id).execute() + ) + logging.info("Status of vm " + str(instStatus["status"])) time.sleep(sleeper) except Exception as e: logging.info("here " + str(e)) @@ -125,14 +117,15 @@ def node_start_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance ID: %s is in running state" % instance_id) logging.info("node_start_scenario has been successfully injected!") except Exception as e: - logging.error("Failed to start node instance. Encountered following " - "exception: %s. Test Failed" % (e)) + logging.error( + "Failed to start node instance. Encountered following " "exception: %s. Test Failed" % (e) + ) logging.error("node_start_scenario injection failed!") sys.exit(1) # Node scenario to stop the node def node_stop_scenario(self, instance_kill_count, node, timeout): - logging.info('stop scenario') + logging.info("stop scenario") for _ in range(instance_kill_count): try: logging.info("Starting node_stop_scenario injection") @@ -143,8 +136,7 @@ def node_stop_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance ID: %s is in stopped state" % instance_id) nodeaction.wait_for_unknown_status(node, timeout) except Exception as e: - logging.error("Failed to stop node instance. Encountered following exception: %s. " - "Test Failed" % (e)) + logging.error("Failed to stop node instance. Encountered following exception: %s. " "Test Failed" % (e)) logging.error("node_stop_scenario injection failed!") sys.exit(1) @@ -166,8 +158,9 @@ def node_termination_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance ID: %s has been terminated" % instance_id) logging.info("node_termination_scenario has been successfuly injected!") except Exception as e: - logging.error("Failed to terminate node instance. Encountered following exception:" - " %s. Test Failed" % e) + logging.error( + "Failed to terminate node instance. Encountered following exception:" " %s. Test Failed" % e + ) logging.error("node_termination_scenario injection failed!") sys.exit(1) @@ -183,7 +176,8 @@ def node_reboot_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance ID: %s has been rebooted" % instance_id) logging.info("node_reboot_scenario has been successfuly injected!") except Exception as e: - logging.error("Failed to reboot node instance. Encountered following exception:" - " %s. Test Failed" % (e)) + logging.error( + "Failed to reboot node instance. Encountered following exception:" " %s. Test Failed" % (e) + ) logging.error("node_reboot_scenario injection failed!") sys.exit(1) diff --git a/kraken/node_actions/general_cloud_node_scenarios.py b/kraken/node_actions/general_cloud_node_scenarios.py index cb4399f3..248ffc6a 100644 --- a/kraken/node_actions/general_cloud_node_scenarios.py +++ b/kraken/node_actions/general_cloud_node_scenarios.py @@ -13,20 +13,16 @@ def __init__(self): # Node scenario to start the node def node_start_scenario(self, instance_kill_count, node, timeout): - logging.info("Node start is not set up yet for this cloud type, " - "no action is going to be taken") + logging.info("Node start is not set up yet for this cloud type, " "no action is going to be taken") # Node scenario to stop the node def node_stop_scenario(self, instance_kill_count, node, timeout): - logging.info("Node stop is not set up yet for this cloud type," - " no action is going to be taken") + logging.info("Node stop is not set up yet for this cloud type," " no action is going to be taken") # Node scenario to terminate the node def node_termination_scenario(self, instance_kill_count, node, timeout): - logging.info("Node termination is not set up yet for this cloud type, " - "no action is going to be taken") + logging.info("Node termination is not set up yet for this cloud type, " "no action is going to be taken") # Node scenario to reboot the node def node_reboot_scenario(self, instance_kill_count, node, timeout): - logging.info("Node reboot is not set up yet for this cloud type," - " no action is going to be taken") + logging.info("Node reboot is not set up yet for this cloud type," " no action is going to be taken") diff --git a/kraken/node_actions/openstack_node_scenarios.py b/kraken/node_actions/openstack_node_scenarios.py index c7f664df..d7a3b583 100644 --- a/kraken/node_actions/openstack_node_scenarios.py +++ b/kraken/node_actions/openstack_node_scenarios.py @@ -39,12 +39,12 @@ def get_instance_status(self, node, expected_status, timeout): i = 0 sleeper = 1 while i <= timeout: - instStatus = runcommand.invoke("openstack server show %s | tr -d ' ' |" - "grep '^|status' |" - "cut -d '|' -f3 | tr -d '\n'" % (node)) + instStatus = runcommand.invoke( + "openstack server show %s | tr -d ' ' |" "grep '^|status' |" "cut -d '|' -f3 | tr -d '\n'" % (node) + ) logging.info("instance status is %s" % (instStatus)) logging.info("expected status is %s" % (expected_status)) - if (instStatus.strip() == expected_status): + if instStatus.strip() == expected_status: logging.info("instance status has reached desired status %s" % (instStatus)) return True time.sleep(sleeper) @@ -53,7 +53,7 @@ def get_instance_status(self, node, expected_status, timeout): # Get the openstack instance name def get_openstack_nodename(self, os_node_ip): server_list = runcommand.invoke("openstack server list | grep %s" % (os_node_ip)) - list_of_servers = server_list.split('\n') + list_of_servers = server_list.split("\n") for item in list_of_servers: items = item.split("|") counter = 0 @@ -63,7 +63,7 @@ def get_openstack_nodename(self, os_node_ip): logging.info("Openstack node name is %s " % (node_name)) counter += 1 continue - item_list = i.split('=') + item_list = i.split("=") if len(item_list) == 2 and item_list[-1].strip() == os_node_ip: return node_name counter += 1 @@ -87,8 +87,9 @@ def node_start_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance ID: %s is in running state" % (node)) logging.info("node_start_scenario has been successfully injected!") except Exception as e: - logging.error("Failed to start node instance. Encountered following " - "exception: %s. Test Failed" % (e)) + logging.error( + "Failed to start node instance. Encountered following " "exception: %s. Test Failed" % (e) + ) logging.error("node_start_scenario injection failed!") sys.exit(1) @@ -105,8 +106,7 @@ def node_stop_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance name: %s is in stopped state" % (node)) nodeaction.wait_for_ready_status(node, timeout) except Exception as e: - logging.error("Failed to stop node instance. Encountered following exception: %s. " - "Test Failed" % (e)) + logging.error("Failed to stop node instance. Encountered following exception: %s. " "Test Failed" % (e)) logging.error("node_stop_scenario injection failed!") sys.exit(1) @@ -124,8 +124,9 @@ def node_reboot_scenario(self, instance_kill_count, node, timeout): logging.info("Node with instance name: %s has been rebooted" % (node)) logging.info("node_reboot_scenario has been successfuly injected!") except Exception as e: - logging.error("Failed to reboot node instance. Encountered following exception:" - " %s. Test Failed" % (e)) + logging.error( + "Failed to reboot node instance. Encountered following exception:" " %s. Test Failed" % (e) + ) logging.error("node_reboot_scenario injection failed!") sys.exit(1) @@ -141,8 +142,9 @@ def helper_node_start_scenario(self, instance_kill_count, node_ip, timeout): logging.info("Helper node with IP: %s is in running state" % (node_ip)) logging.info("node_start_scenario has been successfully injected!") except Exception as e: - logging.error("Failed to start node instance. Encountered following " - "exception: %s. Test Failed" % (e)) + logging.error( + "Failed to start node instance. Encountered following " "exception: %s. Test Failed" % (e) + ) logging.error("helper_node_start_scenario injection failed!") sys.exit(1) @@ -157,8 +159,7 @@ def helper_node_stop_scenario(self, instance_kill_count, node_ip, timeout): self.openstackcloud.wait_until_stopped(openstack_node_name) logging.info("Helper node with IP: %s is in stopped state" % (node_ip)) except Exception as e: - logging.error("Failed to stop node instance. Encountered following exception: %s. " - "Test Failed" % (e)) + logging.error("Failed to stop node instance. Encountered following exception: %s. " "Test Failed" % (e)) logging.error("helper_node_stop_scenario injection failed!") sys.exit(1) @@ -169,7 +170,6 @@ def helper_node_service_status(self, node_ip, service, ssh_private_key, timeout) logging.info("Service status checked on %s" % (node_ip)) logging.info("Check service status is successfuly injected!") except Exception as e: - logging.error("Failed to check service status. Encountered following exception:" - " %s. Test Failed" % (e)) + logging.error("Failed to check service status. Encountered following exception:" " %s. Test Failed" % (e)) logging.error("helper_node_service_status injection failed!") sys.exit(1) diff --git a/kraken/performance_dashboards/setup.py b/kraken/performance_dashboards/setup.py index 57ebee47..8e63dd62 100644 --- a/kraken/performance_dashboards/setup.py +++ b/kraken/performance_dashboards/setup.py @@ -12,7 +12,7 @@ def setup(repo): # delete repo to clone the latest copy if exists subprocess.run(delete_repo, shell=True, universal_newlines=True, timeout=45) # clone the repo - git.Repo.clone_from(repo, '/tmp/performance-dashboards') + git.Repo.clone_from(repo, "/tmp/performance-dashboards") # deploy performance dashboards subprocess.run(command, shell=True, universal_newlines=True) except Exception as e: diff --git a/kraken/time_actions/common_time_functions.py b/kraken/time_actions/common_time_functions.py index d9d9c758..08a8037c 100644 --- a/kraken/time_actions/common_time_functions.py +++ b/kraken/time_actions/common_time_functions.py @@ -23,52 +23,52 @@ def pod_exec(pod_name, command, namespace): def node_debug(node_name, command): - response = runcommand.invoke("oc debug node/" + node_name + ' -- chroot /host ' + command) + response = runcommand.invoke("oc debug node/" + node_name + " -- chroot /host " + command) return response def skew_time(scenario): skew_command = "date --set " - if scenario['action'] == "skew_date": + if scenario["action"] == "skew_date": skewed_date = "00-01-01" skew_command += skewed_date - elif scenario['action'] == "skew_time": + elif scenario["action"] == "skew_time": skewed_time = "01:01:01" skew_command += skewed_time if "node" in scenario["object_type"]: node_names = [] - if "object_name" in scenario.keys() and scenario['object_name']: - node_names = scenario['object_name'] - elif "label_selector" in scenario.keys() and scenario['label_selector']: - node_names = kubecli.list_nodes(scenario['label_selector']) + if "object_name" in scenario.keys() and scenario["object_name"]: + node_names = scenario["object_name"] + elif "label_selector" in scenario.keys() and scenario["label_selector"]: + node_names = kubecli.list_nodes(scenario["label_selector"]) for node in node_names: node_debug(node, skew_command) logging.info("Reset date/time on node " + str(node)) return "node", node_names - elif "pod" in scenario['object_type']: + elif "pod" in scenario["object_type"]: pod_names = [] - if "object_name" in scenario.keys() and scenario['object_name']: - for name in scenario['object_name']: + if "object_name" in scenario.keys() and scenario["object_name"]: + for name in scenario["object_name"]: if "namespace" not in scenario.keys(): logging.error("Need to set namespace when using pod name") sys.exit(1) - pod_names.append([name, scenario['namespace']]) - elif "label_selector" in scenario.keys() and scenario['label_selector']: - pod_names = kubecli.get_all_pods(scenario['label_selector']) - elif "namespace" in scenario.keys() and scenario['namespace']: - pod_names = kubecli.list_pods(scenario['namespace']) + pod_names.append([name, scenario["namespace"]]) + elif "label_selector" in scenario.keys() and scenario["label_selector"]: + pod_names = kubecli.get_all_pods(scenario["label_selector"]) + elif "namespace" in scenario.keys() and scenario["namespace"]: + pod_names = kubecli.list_pods(scenario["namespace"]) counter = 0 for pod_name in pod_names: - pod_names[counter] = [pod_name, scenario['namespace']] + pod_names[counter] = [pod_name, scenario["namespace"]] counter += 1 for pod in pod_names: if len(pod) > 1: pod_exec(pod[0], skew_command, pod[1]) else: - pod_exec(pod, skew_command, scenario['namespace']) + pod_exec(pod, skew_command, scenario["namespace"]) logging.info("Reset date/time on pod " + str(pod[0])) return "pod", pod_names @@ -76,8 +76,7 @@ def skew_time(scenario): # From kubectl/oc command get time output def parse_string_date(obj_datetime): try: - date_line = re.search(r'[a-zA-Z0-9_() .]*\w{3} \w{3} \d{2} \d{2}:\d{2}:\d{2} \w{3} ' - r'\d{4}\W*', obj_datetime) + date_line = re.search(r"[a-zA-Z0-9_() .]*\w{3} \w{3} \d{2} \d{2}:\d{2}:\d{2} \w{3} " r"\d{4}\W*", obj_datetime) return date_line.group().strip() except Exception: return "" @@ -87,7 +86,7 @@ def parse_string_date(obj_datetime): def string_to_date(obj_datetime): obj_datetime = parse_string_date(obj_datetime) try: - date_time_obj = datetime.datetime.strptime(obj_datetime, '%a %b %d %H:%M:%S %Z %Y') + date_time_obj = datetime.datetime.strptime(obj_datetime, "%a %b %d %H:%M:%S %Z %Y") return date_time_obj except Exception: return datetime.datetime(datetime.MINYEAR, 1, 1) @@ -105,14 +104,12 @@ def check_date_time(object_type, names): counter = 0 while not first_date_time < node_datetime < datetime.datetime.utcnow(): time.sleep(10) - logging.info("Date/time on node %s still not reset, waiting 10 seconds and retrying" - % node_name) + logging.info("Date/time on node %s still not reset, waiting 10 seconds and retrying" % node_name) node_datetime_string = node_debug(node_name, skew_command) node_datetime = string_to_date(node_datetime_string) counter += 1 if counter > max_retries: - logging.error("Date and time in node %s didn't reset properly" - % node_name) + logging.error("Date and time in node %s didn't reset properly" % node_name) not_reset.append(node_name) break if counter < max_retries: @@ -125,15 +122,13 @@ def check_date_time(object_type, names): pod_datetime = string_to_date(pod_datetime_string) while not first_date_time < pod_datetime < datetime.datetime.utcnow(): time.sleep(10) - logging.info("Date/time on pod %s still not reset, waiting 10 seconds and retrying" - % pod_name[0]) + logging.info("Date/time on pod %s still not reset, waiting 10 seconds and retrying" % pod_name[0]) first_date_time = datetime.datetime.utcnow() pod_datetime = pod_exec(pod_name[0], skew_command, pod_name[1]) pod_datetime = string_to_date(pod_datetime) counter += 1 if counter > max_retries: - logging.error("Date and time in pod %s didn't reset properly" - % pod_name[0]) + logging.error("Date and time in pod %s didn't reset properly" % pod_name[0]) not_reset.append(pod_name[0]) break if counter < max_retries: diff --git a/requirements.txt b/requirements.txt index e947263a..38224a06 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,3 @@ python-openstackclient gitpython paramiko setuptools -tox diff --git a/run_kraken.py b/run_kraken.py index faa83a6b..02041b81 100644 --- a/run_kraken.py +++ b/run_kraken.py @@ -25,22 +25,24 @@ # Get the node scenarios object of specfied cloud type def get_node_scenario_object(node_scenario): - if "cloud_type" not in node_scenario.keys() or node_scenario['cloud_type'] == "generic": + if "cloud_type" not in node_scenario.keys() or node_scenario["cloud_type"] == "generic": global node_general node_general = True return general_node_scenarios() - if node_scenario['cloud_type'] == 'aws': + if node_scenario["cloud_type"] == "aws": return aws_node_scenarios() - elif node_scenario['cloud_type'] == 'gcp': + elif node_scenario["cloud_type"] == "gcp": return gcp_node_scenarios() - elif node_scenario['cloud_type'] == 'openstack': + elif node_scenario["cloud_type"] == "openstack": return openstack_node_scenarios() - elif node_scenario['cloud_type'] == 'azure' or node_scenario['cloud_type'] == 'az': + elif node_scenario["cloud_type"] == "azure" or node_scenario["cloud_type"] == "az": return azure_node_scenarios() else: - logging.error("Cloud type " + node_scenario['cloud_type'] + " is not currently supported; " - "try using 'generic' if wanting to stop/start kubelet or fork bomb on any " - "cluster") + logging.error( + "Cloud type " + node_scenario["cloud_type"] + " is not currently supported; " + "try using 'generic' if wanting to stop/start kubelet or fork bomb on any " + "cluster" + ) sys.exit(1) @@ -77,19 +79,23 @@ def inject_node_scenario(action, node_scenario, node_scenario_object): elif action == "node_crash_scenario": node_scenario_object.node_crash_scenario(instance_kill_count, node, timeout) elif action == "stop_start_helper_node_scenario": - if node_scenario['cloud_type'] != "openstack": - logging.error("Scenario: " + action + " is not supported for " - "cloud type " + node_scenario['cloud_type'] + ", skipping action") + if node_scenario["cloud_type"] != "openstack": + logging.error( + "Scenario: " + action + " is not supported for " + "cloud type " + node_scenario["cloud_type"] + ", skipping action" + ) else: - if not node_scenario['helper_node_ip']: + if not node_scenario["helper_node_ip"]: logging.error("Helper node IP address is not provided") sys.exit(1) node_scenario_object.helper_node_stop_start_scenario( - instance_kill_count, node_scenario['helper_node_ip'], timeout) + instance_kill_count, node_scenario["helper_node_ip"], timeout + ) node_scenario_object.helper_node_service_status( - node_scenario['helper_node_ip'], service, ssh_private_key, timeout) + node_scenario["helper_node_ip"], service, ssh_private_key, timeout + ) else: - logging.info('There is no node action that matches %s, skipping scenario' % action) + logging.info("There is no node action that matches %s, skipping scenario" % action) # Get cerberus status @@ -101,15 +107,16 @@ def cerberus_integration(config): logging.error("url where Cerberus publishes True/False signal is not provided.") sys.exit(1) cerberus_status = requests.get(cerberus_url).content - cerberus_status = True if cerberus_status == b'True' else False + cerberus_status = True if cerberus_status == b"True" else False if not cerberus_status: - logging.error("Received a no-go signal from Cerberus, looks like " - "the cluster is unhealthy. Please check the Cerberus " - "report for more details. Test failed.") + logging.error( + "Received a no-go signal from Cerberus, looks like " + "the cluster is unhealthy. Please check the Cerberus " + "report for more details. Test failed." + ) sys.exit(1) else: - logging.info("Received a go signal from Ceberus, the cluster is healthy. " - "Test passed.") + logging.info("Received a go signal from Ceberus, the cluster is healthy. " "Test passed.") return cerberus_status @@ -118,35 +125,36 @@ def publish_kraken_status(config, failed_post_scenarios): cerberus_status = cerberus_integration(config) if not cerberus_status: if failed_post_scenarios: - if config['kraken']['exit_on_failure']: - logging.info("Cerberus status is not healthy and post action scenarios " - "are still failing, exiting kraken run") + if config["kraken"]["exit_on_failure"]: + logging.info( + "Cerberus status is not healthy and post action scenarios " "are still failing, exiting kraken run" + ) sys.exit(1) else: - logging.info("Cerberus status is not healthy and post action scenarios " - "are still failing") + logging.info("Cerberus status is not healthy and post action scenarios " "are still failing") else: if failed_post_scenarios: - if config['kraken']['exit_on_failure']: - logging.info("Cerberus status is healthy but post action scenarios " - "are still failing, exiting kraken run") + if config["kraken"]["exit_on_failure"]: + logging.info( + "Cerberus status is healthy but post action scenarios " "are still failing, exiting kraken run" + ) sys.exit(1) else: - logging.info("Cerberus status is healthy but post action scenarios " - "are still failing") + logging.info("Cerberus status is healthy but post action scenarios " "are still failing") def run_post_action(kubeconfig_path, scenario, pre_action_output=""): if scenario.endswith(".yaml") or scenario.endswith(".yml"): - action_output = runcommand.invoke("powerfulseal autonomous " - "--use-pod-delete-instead-of-ssh-kill" - " --policy-file %s --kubeconfig %s --no-cloud" - " --inventory-kubernetes --headless" - % (scenario, kubeconfig_path)) + action_output = runcommand.invoke( + "powerfulseal autonomous " + "--use-pod-delete-instead-of-ssh-kill" + " --policy-file %s --kubeconfig %s --no-cloud" + " --inventory-kubernetes --headless" % (scenario, kubeconfig_path) + ) # read output to make sure no error if "ERROR" in action_output: - action_output.split("ERROR")[1].split('\n')[0] + action_output.split("ERROR")[1].split("\n")[0] if not pre_action_output: logging.info("Powerful seal pre action check failed for " + str(scenario)) return False @@ -159,7 +167,7 @@ def run_post_action(kubeconfig_path, scenario, pre_action_output=""): if pre_action_output == action_output: logging.info(scenario + " post action checks passed") else: - logging.info(scenario + ' post action response did not match pre check output') + logging.info(scenario + " post action response did not match pre check output") return False elif scenario != "": # invoke custom bash script @@ -168,7 +176,7 @@ def run_post_action(kubeconfig_path, scenario, pre_action_output=""): if pre_action_output == action_output: logging.info(scenario + " post action checks passed") else: - logging.info(scenario + ' post action response did not match pre check output') + logging.info(scenario + " post action response did not match pre check output") return False return action_output @@ -178,12 +186,11 @@ def run_post_action(kubeconfig_path, scenario, pre_action_output=""): def post_actions(kubeconfig_path, scenario, failed_post_scenarios, pre_action_output): for failed_scenario in failed_post_scenarios: - post_action_output = run_post_action(kubeconfig_path, - failed_scenario[0], failed_scenario[1]) + post_action_output = run_post_action(kubeconfig_path, failed_scenario[0], failed_scenario[1]) if post_action_output is not False: failed_post_scenarios.remove(failed_scenario) else: - logging.info('Post action scenario ' + str(failed_scenario) + "is still failing") + logging.info("Post action scenario " + str(failed_scenario) + "is still failing") # check post actions if len(scenario) > 1: @@ -201,11 +208,12 @@ def pod_scenarios(scenarios_list, config, failed_post_scenarios): if len(pod_scenario) > 1: pre_action_output = run_post_action(kubeconfig_path, pod_scenario[1]) else: - pre_action_output = '' - scenario_logs = runcommand.invoke("powerfulseal autonomous --use-pod-delete-instead-" - "of-ssh-kill --policy-file %s --kubeconfig %s " - "--no-cloud --inventory-kubernetes --headless" - % (pod_scenario[0], kubeconfig_path)) + pre_action_output = "" + scenario_logs = runcommand.invoke( + "powerfulseal autonomous --use-pod-delete-instead-" + "of-ssh-kill --policy-file %s --kubeconfig %s " + "--no-cloud --inventory-kubernetes --headless" % (pod_scenario[0], kubeconfig_path) + ) # Display pod scenario logs/actions print(scenario_logs) @@ -214,23 +222,23 @@ def pod_scenarios(scenarios_list, config, failed_post_scenarios): logging.info("Waiting for the specified duration: %s" % (wait_duration)) time.sleep(wait_duration) - failed_post_scenarios = post_actions(kubeconfig_path, pod_scenario, - failed_post_scenarios, pre_action_output) + failed_post_scenarios = post_actions( + kubeconfig_path, pod_scenario, failed_post_scenarios, pre_action_output + ) publish_kraken_status(config, failed_post_scenarios) except Exception as e: - logging.error("Failed to run scenario: %s. Encountered the following " - "exception: %s" % (pod_scenario[0], e)) + logging.error("Failed to run scenario: %s. Encountered the following " "exception: %s" % (pod_scenario[0], e)) return failed_post_scenarios def node_scenarios(scenarios_list, config): for node_scenario_config in scenarios_list: - with open(node_scenario_config, 'r') as f: + with open(node_scenario_config, "r") as f: node_scenario_config = yaml.full_load(f) - for node_scenario in node_scenario_config['node_scenarios']: + for node_scenario in node_scenario_config["node_scenarios"]: node_scenario_object = get_node_scenario_object(node_scenario) - if node_scenario['actions']: - for action in node_scenario['actions']: + if node_scenario["actions"]: + for action in node_scenario["actions"]: inject_node_scenario(action, node_scenario, node_scenario_object) logging.info("Waiting for the specified duration: %s" % (wait_duration)) time.sleep(wait_duration) @@ -240,13 +248,13 @@ def node_scenarios(scenarios_list, config): def time_scenarios(scenarios_list, config): for time_scenario_config in scenarios_list: - with open(time_scenario_config, 'r') as f: + with open(time_scenario_config, "r") as f: scenario_config = yaml.full_load(f) - for time_scenario in scenario_config['time_scenarios']: + for time_scenario in scenario_config["time_scenarios"]: object_type, object_names = time_actions.skew_time(time_scenario) not_reset = time_actions.check_date_time(object_type, object_names) if len(not_reset) > 0: - logging.info('Object times were not reset') + logging.info("Object times were not reset") logging.info("Waiting for the specified duration: %s" % (wait_duration)) time.sleep(wait_duration) publish_kraken_status(config, not_reset) @@ -266,36 +274,31 @@ def litmus_scenarios(scenarios_list, config, litmus_namespaces, litmus_uninstall logging.info("opened yaml" + str(item)) yaml_item = list(yaml.safe_load_all(f))[0] - if yaml_item['kind'] == "ChaosEngine": - engine_name = yaml_item['metadata']['name'] - namespace = yaml_item['metadata']['namespace'] + if yaml_item["kind"] == "ChaosEngine": + engine_name = yaml_item["metadata"]["name"] + namespace = yaml_item["metadata"]["namespace"] litmus_namespaces.append(namespace) - experiment_names = yaml_item['spec']['experiments'] + experiment_names = yaml_item["spec"]["experiments"] for expr in experiment_names: - expr_name = expr['name'] - experiment_result = common_litmus.check_experiment(engine_name, - expr_name, - namespace) + expr_name = expr["name"] + experiment_result = common_litmus.check_experiment(engine_name, expr_name, namespace) if experiment_result: - logging.info("Scenario: %s has been successfully injected!" - % item) + logging.info("Scenario: %s has been successfully injected!" % item) else: - logging.info("Scenario: %s was not successfully injected!" - % item) + logging.info("Scenario: %s was not successfully injected!" % item) if litmus_uninstall: for l_item in l_scenario: - logging.info('item ' + str(l_item)) + logging.info("item " + str(l_item)) runcommand.invoke("kubectl delete -f %s" % l_item) if litmus_uninstall: for item in l_scenario: - logging.info('item ' + str(item)) + logging.info("item " + str(item)) runcommand.invoke("kubectl delete -f %s" % item) logging.info("Waiting for the specified duration: %s" % wait_duration) time.sleep(wait_duration) cerberus_integration(config) except Exception as e: - logging.error("Failed to run litmus scenario: %s. Encountered " - "the following exception: %s" % (item, e)) + logging.error("Failed to run litmus scenario: %s. Encountered " "the following exception: %s" % (item, e)) return litmus_namespaces @@ -307,18 +310,20 @@ def main(cfg): # Parse and read the config if os.path.isfile(cfg): - with open(cfg, 'r') as f: + with open(cfg, "r") as f: config = yaml.full_load(f) global kubeconfig_path, wait_duration kubeconfig_path = config["kraken"].get("kubeconfig_path", "") chaos_scenarios = config["kraken"].get("chaos_scenarios", []) - litmus_version = config['kraken'].get("litmus_version", 'v1.9.1') - litmus_uninstall = config['kraken'].get("litmus_uninstall", False) + litmus_version = config["kraken"].get("litmus_version", "v1.9.1") + litmus_uninstall = config["kraken"].get("litmus_uninstall", False) wait_duration = config["tunings"].get("wait_duration", 60) iterations = config["tunings"].get("iterations", 1) daemon_mode = config["tunings"].get("daemon_mode", False) deploy_performance_dashboards = config["performance_monitoring"].get("deploy_dashboards", False) - dashboard_repo = config["performance_monitoring"].get("repo", "https://github.com/cloud-bulldozer/performance-dashboards.git") # noqa + dashboard_repo = config["performance_monitoring"].get( + "repo", "https://github.com/cloud-bulldozer/performance-dashboards.git" + ) # noqa # Initialize clients if not os.path.isfile(kubeconfig_path): @@ -332,8 +337,9 @@ def main(cfg): # Cluster info logging.info("Fetching cluster info") cluster_version = runcommand.invoke("kubectl get clusterversion") - cluster_info = runcommand.invoke("kubectl cluster-info | awk 'NR==1' | sed -r " - "'s/\x1B\[([0-9]{1,3}(;[0-9]{1,2})?)?[mGK]//g'") # noqa + cluster_info = runcommand.invoke( + "kubectl cluster-info | awk 'NR==1' | sed -r " "'s/\x1B\[([0-9]{1,3}(;[0-9]{1,2})?)?[mGK]//g'" + ) # noqa logging.info("\n%s%s" % (cluster_version, cluster_info)) # Deploy performance dashboards @@ -348,17 +354,16 @@ def main(cfg): if daemon_mode: logging.info("Daemon mode enabled, kraken will cause chaos forever\n") logging.info("Ignoring the iterations set") - iterations = float('inf') + iterations = float("inf") else: - logging.info("Daemon mode not enabled, will run through %s iterations\n" - % str(iterations)) + logging.info("Daemon mode not enabled, will run through %s iterations\n" % str(iterations)) iterations = int(iterations) failed_post_scenarios = [] litmus_namespaces = [] litmus_installed = False # Loop to run the chaos starts here - while (int(iteration) < iterations): + while int(iteration) < iterations: # Inject chaos scenarios specified in the config logging.info("Executing scenarios for iteration " + str(iteration)) if chaos_scenarios: @@ -368,8 +373,7 @@ def main(cfg): if scenarios_list: # Inject pod chaos scenarios specified in the config if scenario_type == "pod_scenarios": - failed_post_scenarios = pod_scenarios(scenarios_list, config, - failed_post_scenarios) + failed_post_scenarios = pod_scenarios(scenarios_list, config, failed_post_scenarios) # Inject node chaos scenarios specified in the config elif scenario_type == "node_scenarios": @@ -383,9 +387,9 @@ def main(cfg): common_litmus.install_litmus(litmus_version) common_litmus.deploy_all_experiments(litmus_version) litmus_installed = True - litmus_namespaces = litmus_scenarios(scenarios_list, config, - litmus_namespaces, - litmus_uninstall) + litmus_namespaces = litmus_scenarios( + scenarios_list, config, litmus_namespaces, litmus_uninstall + ) iteration += 1 logging.info("") @@ -407,21 +411,15 @@ def main(cfg): # Initialize the parser to read the config parser = optparse.OptionParser() parser.add_option( - "-c", "--config", - dest="cfg", - help="config location", - default="config/config.yaml", + "-c", "--config", dest="cfg", help="config location", default="config/config.yaml", ) (options, args) = parser.parse_args() logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", - handlers=[ - logging.FileHandler("kraken.report", mode='w'), - logging.StreamHandler() - ] + handlers=[logging.FileHandler("kraken.report", mode="w"), logging.StreamHandler()], ) - if (options.cfg is None): + if options.cfg is None: logging.error("Please check if you have passed the config") sys.exit(1) else: diff --git a/scenarios/etcd.yml b/scenarios/etcd.yml index 6a5f1152..d1386290 100755 --- a/scenarios/etcd.yml +++ b/scenarios/etcd.yml @@ -29,4 +29,4 @@ scenarios: actions: - checkPodCount: - count: 3 \ No newline at end of file + count: 3 diff --git a/scenarios/node_hog_engine.yaml b/scenarios/node_hog_engine.yaml index c505c57b..7d46ee90 100644 --- a/scenarios/node_hog_engine.yaml +++ b/scenarios/node_hog_engine.yaml @@ -22,4 +22,4 @@ spec: value: '60' # ENTER THE COMMA SEPARATED TARGET NODES NAME - name: TARGET_NODES - value: '' \ No newline at end of file + value: '' diff --git a/scenarios/openshift-apiserver.yml b/scenarios/openshift-apiserver.yml index 56ca5c8d..5018c514 100755 --- a/scenarios/openshift-apiserver.yml +++ b/scenarios/openshift-apiserver.yml @@ -32,4 +32,4 @@ scenarios: actions: - checkPodCount: - count: 3 \ No newline at end of file + count: 3 diff --git a/scenarios/openshift-kube-apiserver.yml b/scenarios/openshift-kube-apiserver.yml index 2112557f..ef118b85 100755 --- a/scenarios/openshift-kube-apiserver.yml +++ b/scenarios/openshift-kube-apiserver.yml @@ -30,4 +30,4 @@ scenarios: timeout: 180 actions: - checkPodCount: - count: 3 \ No newline at end of file + count: 3 diff --git a/scenarios/post_action_etcd_example_py.py b/scenarios/post_action_etcd_example_py.py index 1d840625..1c7a2cf4 100755 --- a/scenarios/post_action_etcd_example_py.py +++ b/scenarios/post_action_etcd_example_py.py @@ -5,9 +5,9 @@ def run(cmd): try: - output = subprocess.Popen(cmd, shell=True, - universal_newlines=True, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + output = subprocess.Popen( + cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) (out, err) = output.communicate() logging.info("out " + str(out)) except Exception as e: diff --git a/scenarios/post_action_prometheus.yml b/scenarios/post_action_prometheus.yml index ebae7879..76405fb0 100644 --- a/scenarios/post_action_prometheus.yml +++ b/scenarios/post_action_prometheus.yml @@ -19,4 +19,3 @@ scenarios: actions: - checkPodCount: count: 2 - \ No newline at end of file diff --git a/scenarios/post_action_regex.py b/scenarios/post_action_regex.py index ce12ab8c..f7e2ce2e 100755 --- a/scenarios/post_action_regex.py +++ b/scenarios/post_action_regex.py @@ -15,8 +15,11 @@ def list_namespaces(): cli = client.CoreV1Api() ret = cli.list_namespace(pretty=True) except ApiException as e: - logging.error("Exception when calling \ - CoreV1Api->list_namespaced_pod: %s\n" % e) + logging.error( + "Exception when calling \ + CoreV1Api->list_namespaced_pod: %s\n" + % e + ) for namespace in ret.items: namespaces.append(namespace.metadata.name) return namespaces @@ -47,9 +50,9 @@ def check_namespaces(namespaces): def run(cmd): try: - output = subprocess.Popen(cmd, shell=True, - universal_newlines=True, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + output = subprocess.Popen( + cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) (out, err) = output.communicate() except Exception as e: logging.error("Failed to run %s, error: %s" % (cmd, e)) diff --git a/scenarios/prometheus.yml b/scenarios/prometheus.yml index 6fc6daa4..d1401275 100644 --- a/scenarios/prometheus.yml +++ b/scenarios/prometheus.yml @@ -32,4 +32,4 @@ scenarios: actions: - checkPodCount: - count: 2 \ No newline at end of file + count: 2 diff --git a/setup.cfg b/setup.cfg index 5287508e..803914e4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,11 +36,3 @@ dists = bdist_wheel [bdist_wheel] # Use this option if your package is pure-python universal = 1 - -[flake8] -# Some sane defaults for the code style checker flake8 -exclude = - .tox - build - dist - .eggsse diff --git a/setup.py b/setup.py index 6d697d14..5ce99b18 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ from setuptools import setup try: - require('setuptools>=38.3') + require("setuptools>=38.3") except VersionConflict: print("Error: version of setuptools is too old (<38.3)!") sys.exit(1) diff --git a/test-requirements.txt b/test-requirements.txt deleted file mode 100644 index 4eead6f8..00000000 --- a/test-requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -setuptools -flake8 diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 1aabff4d..00000000 --- a/tox.ini +++ /dev/null @@ -1,24 +0,0 @@ -[tox] -envlist = pep8 - -[testenv] -usedevelop = True -setenv = - VIRTUAL_ENV={envdir} -deps = -r{toxinidir}/test-requirements.txt -commands = python setup.py develop - -[testenv:pep8] -basepython = python -commands = flake8 {posargs} - -[testenv:venv] -commands = {posargs} - -[flake8] -# E123, E125 skipped as they are invalid PEP-8. -show-source = True -max-line-length = 120 -ignore = E123,E125 -builtins = _ -exclude=.venv,.git,.tox,dist,doc,*lib/python*,*egg,build