From 35002316764b0be29a848226d121aa14b000461c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20M=C3=A9ndez=20Hern=C3=A1ndez?= Date: Wed, 23 Oct 2024 20:27:07 +0200 Subject: [PATCH] Change API from 'Google API Client' to 'Google Cloud Python Client' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the 'Google API Client' GH page: ``` This library is considered complete and is in maintenance mode. This means that we will address critical bugs and security issues but will not add any new features. This library is officially supported by Google. However, the maintainers of this repository recommend using Cloud Client Libraries for Python, where possible, for new code development. ``` So change the code accordingly to adapt it to 'Google Cloud Python Client'. Signed-off-by: Pablo Méndez Hernández --- .../node_actions/gcp_node_scenarios.py | 244 ++++++++++-------- requirements.txt | 4 +- 2 files changed, 136 insertions(+), 112 deletions(-) diff --git a/krkn/scenario_plugins/node_actions/gcp_node_scenarios.py b/krkn/scenario_plugins/node_actions/gcp_node_scenarios.py index 437a91818..ca88796e3 100644 --- a/krkn/scenario_plugins/node_actions/gcp_node_scenarios.py +++ b/krkn/scenario_plugins/node_actions/gcp_node_scenarios.py @@ -1,66 +1,78 @@ -import os -import sys import time import logging -import json +import google.auth import krkn.scenario_plugins.node_actions.common_node_functions as nodeaction from krkn.scenario_plugins.node_actions.abstract_node_scenarios import ( abstract_node_scenarios, ) -from googleapiclient import discovery -from oauth2client.client import GoogleCredentials +from google.cloud import compute_v1 from krkn_lib.k8s import KrknKubernetes class GCP: def __init__(self): try: - gapp_creds = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") - with open(gapp_creds, "r") as f: - f_str = f.read() - self.project = json.loads(f_str)["project_id"] - # self.project = runcommand.invoke("gcloud config get-value project").split("/n")[0].strip() - logging.info("project " + str(self.project) + "!") - credentials = GoogleCredentials.get_application_default() - self.client = discovery.build( - "compute", "v1", credentials=credentials, cache_discovery=False - ) - + _, self.project_id = google.auth.default() + self.instance_client = compute_v1.InstancesClient() except Exception as e: logging.error("Error on setting up GCP connection: " + str(e)) + raise e - # Get the instance ID of the node - def get_instance_id(self, node): - zone_request = self.client.zones().list(project=self.project) - while zone_request is not None: - zone_response = zone_request.execute() - for zone in zone_response["items"]: - instances_request = self.client.instances().list( - project=self.project, zone=zone["name"] - ) - while instances_request is not None: - instance_response = instances_request.execute() - if "items" in instance_response.keys(): - for instance in instance_response["items"]: - if instance["name"] in node: - return instance["name"], zone["name"] - instances_request = self.client.zones().list_next( - previous_request=instances_request, - previous_response=instance_response, - ) - zone_request = self.client.zones().list_next( - previous_request=zone_request, previous_response=zone_response + # Get the instance of the node + def get_node_instance(self, node): + try: + request = compute_v1.AggregatedListInstancesRequest( + project = self.project_id ) - logging.info("no instances ") + agg_list = self.instance_client.aggregated_list(request=request) + for _, response in agg_list: + if response.instances: + for instance in response.instances: + if instance.name in node: + return instance + logging.info("no instances ") + except Exception as e: + logging.error("Error getting the instance of the node: " + str(e)) + + raise e + + # Get the instance name + def get_instance_name(self, instance): + if instance.name: + return instance.name + + # Get the instance zone + def get_instance_zone(self, instance): + if instance.zone: + return instance.zone.split("/")[-1] + + # Get the instance zone of the node + def get_node_instance_zone(self, node): + instance = self.get_node_instance(node) + if instance: + return self.get_instance_zone(instance) + + # Get the instance name of the node + def get_node_instance_name(self, node): + instance = self.get_node_instance(node) + if instance: + return self.get_instance_name(instance) + + # Get the instance name of the node + def get_instance_id(self, node): + return self.get_node_instance_name(node) # Start the node instance - def start_instances(self, zone, instance_id): + def start_instances(self, instance_id): try: - self.client.instances().start( - project=self.project, zone=zone, instance=instance_id - ).execute() - logging.info("vm name " + str(instance_id) + " started") + request = compute_v1.StartInstanceRequest( + instance=instance_id, + project=self.project_id, + zone=self.get_node_instance_zone(instance_id), + ) + self.instance_client.start(request=request) + logging.info("Instance: " + str(instance_id) + " started") except Exception as e: logging.error( "Failed to start node instance %s. Encountered following " @@ -70,12 +82,15 @@ def start_instances(self, zone, instance_id): raise RuntimeError() # Stop the node instance - def stop_instances(self, zone, instance_id): + def stop_instances(self, instance_id): try: - self.client.instances().stop( - project=self.project, zone=zone, instance=instance_id - ).execute() - logging.info("vm name " + str(instance_id) + " stopped") + request = compute_v1.StopInstanceRequest( + instance=instance_id, + project=self.project_id, + zone=self.get_node_instance_zone(instance_id), + ) + self.instance_client.stop(request=request) + logging.info("Instance: " + str(instance_id) + " stopped") except Exception as e: logging.error( "Failed to stop node instance %s. Encountered following " @@ -84,13 +99,16 @@ def stop_instances(self, zone, instance_id): raise RuntimeError() - # Start the node instance - def suspend_instances(self, zone, instance_id): + # Suspend the node instance + def suspend_instances(self, instance_id): try: - self.client.instances().suspend( - project=self.project, zone=zone, instance=instance_id - ).execute() - logging.info("vm name " + str(instance_id) + " suspended") + request = compute_v1.SuspendInstanceRequest( + instance=instance_id, + project=self.project_id, + zone=self.get_node_instance_zone(instance_id), + ) + self.instance_client.suspend(request=request) + logging.info("Instance: " + str(instance_id) + " suspended") except Exception as e: logging.error( "Failed to suspend node instance %s. Encountered following " @@ -100,49 +118,65 @@ def suspend_instances(self, zone, instance_id): raise RuntimeError() # Terminate the node instance - def terminate_instances(self, zone, instance_id): + def terminate_instances(self, instance_id): try: - self.client.instances().delete( - project=self.project, zone=zone, instance=instance_id - ).execute() - logging.info("vm name " + str(instance_id) + " terminated") + request = compute_v1.DeleteInstanceRequest( + instance=instance_id, + project=self.project_id, + zone=self.get_node_instance_zone(instance_id), + ) + self.instance_client.delete(request=request) + logging.info("Instance: " + str(instance_id) + " terminated") except Exception as e: logging.error( - "Failed to start node instance %s. Encountered following " + "Failed to terminate node instance %s. Encountered following " "exception: %s." % (instance_id, e) ) raise RuntimeError() # Reboot the node instance - def reboot_instances(self, zone, instance_id): + def reboot_instances(self, instance_id): try: - self.client.instances().reset( - project=self.project, zone=zone, instance=instance_id - ).execute() - logging.info("vm name " + str(instance_id) + " rebooted") + request = compute_v1.ResetInstanceRequest( + instance=instance_id, + project=self.project_id, + zone=self.get_node_instance_zone(instance_id), + ) + self.instance_client.reset(request=request) + logging.info("Instance: " + str(instance_id) + " rebooted") except Exception as e: logging.error( - "Failed to start node instance %s. Encountered following " + "Failed to reboot node instance %s. Encountered following " "exception: %s." % (instance_id, e) ) raise RuntimeError() # Get instance status - def get_instance_status(self, zone, instance_id, expected_status, timeout): - # statuses: PROVISIONING, STAGING, RUNNING, STOPPING, SUSPENDING, SUSPENDED, REPAIRING, + def get_instance_status(self, instance_id, expected_status, timeout): + # states: PROVISIONING, STAGING, RUNNING, STOPPING, SUSPENDING, SUSPENDED, REPAIRING, # and TERMINATED. i = 0 sleeper = 5 while i <= timeout: - instStatus = ( - self.client.instances() - .get(project=self.project, zone=zone, instance=instance_id) - .execute() - ) - logging.info("Status of vm " + str(instStatus["status"])) - if instStatus["status"] == expected_status: + try: + request = compute_v1.GetInstanceRequest( + instance=instance_id, + project=self.project_id, + zone=self.get_node_instance_zone(instance_id), + ) + instance_status = self.instance_client.get(request=request).status + logging.info("Status of instance " + str(instance_id) + ": " + instance_status) + except Exception as e: + logging.error( + "Failed to get status of instance %s. Encountered following " + "exception: %s." % (instance_id, e) + ) + + raise RuntimeError() + + if instance_status == expected_status: return True time.sleep(sleeper) i += sleeper @@ -153,33 +187,21 @@ def get_instance_status(self, zone, instance_id, expected_status, timeout): return False # Wait until the node instance is suspended - def wait_until_suspended(self, zone, instance_id, timeout): - return self.get_instance_status(zone, instance_id, "SUSPENDED", timeout) + def wait_until_suspended(self, instance_id, timeout): + return self.get_instance_status(instance_id, "SUSPENDED", timeout) # Wait until the node instance is running - def wait_until_running(self, zone, instance_id, timeout): - return self.get_instance_status(zone, instance_id, "RUNNING", timeout) + def wait_until_running(self, instance_id, timeout): + return self.get_instance_status(instance_id, "RUNNING", timeout) # Wait until the node instance is stopped - def wait_until_stopped(self, zone, instance_id, timeout): - return self.get_instance_status(zone, instance_id, "TERMINATED", timeout) + def wait_until_stopped(self, instance_id, timeout): + # In GCP, the next state after STOPPING is TERMINATED + return self.get_instance_status(instance_id, "TERMINATED", timeout) # Wait until the node instance is terminated - def wait_until_terminated(self, zone, instance_id, timeout): - try: - i = 0 - sleeper = 5 - while i <= timeout: - instStatus = ( - self.client.instances() - .get(project=self.project, zone=zone, instance=instance_id) - .execute() - ) - logging.info("Status of vm " + str(instStatus["status"])) - time.sleep(sleeper) - except Exception as e: - logging.info("here " + str(e)) - return True + def wait_until_terminated(self, instance_id, timeout): + return self.get_instance_status(instance_id, "TERMINATED", timeout) # krkn_lib @@ -193,12 +215,13 @@ def node_start_scenario(self, instance_kill_count, node, timeout): for _ in range(instance_kill_count): try: logging.info("Starting node_start_scenario injection") - instance_id, zone = self.gcp.get_instance_id(node) + instance = self.gcp.get_node_instance(node) + instance_id = self.gcp.get_instance_name(instance) logging.info( "Starting the node %s with instance ID: %s " % (node, instance_id) ) - self.gcp.start_instances(zone, instance_id) - self.gcp.wait_until_running(zone, instance_id, timeout) + self.gcp.start_instances(instance_id) + self.gcp.wait_until_running(instance_id, timeout) nodeaction.wait_for_ready_status(node, timeout, self.kubecli) logging.info( "Node with instance ID: %s is in running state" % instance_id @@ -215,16 +238,16 @@ def node_start_scenario(self, instance_kill_count, node, timeout): # Node scenario to stop the node def node_stop_scenario(self, instance_kill_count, node, timeout): - logging.info("stop scenario") for _ in range(instance_kill_count): try: logging.info("Starting node_stop_scenario injection") - instance_id, zone = self.gcp.get_instance_id(node) + instance = self.gcp.get_node_instance(node) + instance_id = self.gcp.get_instance_name(instance) logging.info( "Stopping the node %s with instance ID: %s " % (node, instance_id) ) - self.gcp.stop_instances(zone, instance_id) - self.gcp.wait_until_stopped(zone, instance_id, timeout) + self.gcp.stop_instances(instance_id) + self.gcp.wait_until_stopped(instance_id, timeout) logging.info( "Node with instance ID: %s is in stopped state" % instance_id ) @@ -243,13 +266,14 @@ def node_termination_scenario(self, instance_kill_count, node, timeout): for _ in range(instance_kill_count): try: logging.info("Starting node_termination_scenario injection") - instance_id, zone = self.gcp.get_instance_id(node) + instance = self.gcp.get_node_instance(node) + instance_id = self.gcp.get_instance_name(instance) logging.info( "Terminating the node %s with instance ID: %s " % (node, instance_id) ) - self.gcp.terminate_instances(zone, instance_id) - self.gcp.wait_until_terminated(zone, instance_id, timeout) + self.gcp.terminate_instances(instance_id) + self.gcp.wait_until_terminated(instance_id, timeout) for _ in range(timeout): if node not in self.kubecli.list_nodes(): break @@ -267,19 +291,19 @@ def node_termination_scenario(self, instance_kill_count, node, timeout): ) logging.error("node_termination_scenario injection failed!") - - raise e + raise RuntimeError() # Node scenario to reboot the node def node_reboot_scenario(self, instance_kill_count, node, timeout): for _ in range(instance_kill_count): try: logging.info("Starting node_reboot_scenario injection") - instance_id, zone = self.gcp.get_instance_id(node) + instance = self.gcp.get_node_instance(node) + instance_id = self.gcp.get_instance_name(instance) logging.info( "Rebooting the node %s with instance ID: %s " % (node, instance_id) ) - self.gcp.reboot_instances(zone, instance_id) + self.gcp.reboot_instances(instance_id) nodeaction.wait_for_ready_status(node, timeout, self.kubecli) logging.info( "Node with instance ID: %s has been rebooted" % instance_id diff --git a/requirements.txt b/requirements.txt index 3f69aff16..53aa01e89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,7 +11,8 @@ coverage==7.4.1 datetime==5.4 docker==7.0.0 gitpython==3.1.41 -google-api-python-client==2.116.0 +google-auth==2.35.0 +google-cloud-compute==1.19.2 ibm_cloud_sdk_core==3.18.0 ibm_vpc==0.20.0 jinja2==3.1.4 @@ -19,7 +20,6 @@ krkn-lib==4.0.4 lxml==5.1.0 kubernetes==28.1.0 numpy==1.26.4 -oauth2client==4.1.3 pandas==2.2.0 openshift-client==1.0.21 paramiko==3.4.0