diff --git a/lisa/platform_.py b/lisa/platform_.py index 75b9b4fe2d..c433a5143e 100644 --- a/lisa/platform_.py +++ b/lisa/platform_.py @@ -187,7 +187,12 @@ def deploy_environment(self, environment: Environment) -> None: # initialize features # features may need platform, so create it in platform for node in environment.nodes.list(): - node.features = Features(node, self) + # Baremetal platform needs to initialize SerialConsole feature to + # get serial log from beginning, so the features are created + # already. If recreate the SerialConsole, the service resource + # leaks, and SerialConsole cannot be opend again. + if not hasattr(node, "features"): + node.features = Features(node, self) node.capture_azure_information = platform_runbook.capture_azure_information node.capture_boot_time = platform_runbook.capture_boot_time node.capture_kernel_config = ( diff --git a/lisa/schema.py b/lisa/schema.py index 431dca0dd6..6f9c592d5f 100644 --- a/lisa/schema.py +++ b/lisa/schema.py @@ -1249,6 +1249,50 @@ class LocalNode(Node): type: str = constants.ENVIRONMENTS_NODES_LOCAL +@dataclass_json() +@dataclass +class ConnectionInfo: + address: str = "" + port: int = field( + default=22, + metadata=field_metadata( + field_function=fields.Int, validate=validate.Range(min=1, max=65535) + ), + ) + username: str = constants.DEFAULT_USER_NAME + password: Optional[str] = "" + private_key_file: Optional[str] = "" + + def __post_init__(self, *args: Any, **kwargs: Any) -> None: + add_secret(self.username, PATTERN_HEADTAIL) + add_secret(self.password) + add_secret(self.private_key_file) + + if not self.password and not self.private_key_file: + raise LisaException( + "at least one of password or private_key_file need to be set when " + "connecting" + ) + elif not self.private_key_file: + # use password + # spurplus doesn't process empty string correctly, use None + self.private_key_file = None + elif not self.password: + self.password = None + else: + # Password and private_key_file all exist + # Private key is attempted with high priority for authentication when + # connecting to a remote node using paramiko + if not Path(self.private_key_file).exists(): + raise FileNotFoundError(self.private_key_file) + + if not self.username: + raise LisaException("username must be set") + + def __str__(self) -> str: + return f"{self.username}@{self.address}:{self.port}" + + @dataclass_json() @dataclass class RemoteNode(Node): @@ -1277,6 +1321,17 @@ def __post_init__(self, *args: Any, **kwargs: Any) -> None: add_secret(self.password) add_secret(self.private_key_file) + def get_connection_info(self, is_public: bool = False) -> "ConnectionInfo": + connection = ConnectionInfo( + address=self.public_address if is_public else self.address, + port=self.public_port if is_public else self.port, + username=self.username, + password=self.password, + private_key_file=self.private_key_file, + ) + + return connection + @dataclass_json() @dataclass @@ -1552,50 +1607,6 @@ def type_name(cls) -> str: return constants.TESTCASE_TYPE_LEGACY -@dataclass_json() -@dataclass -class ConnectionInfo: - address: str = "" - port: int = field( - default=22, - metadata=field_metadata( - field_function=fields.Int, validate=validate.Range(min=1, max=65535) - ), - ) - username: str = constants.DEFAULT_USER_NAME - password: Optional[str] = "" - private_key_file: Optional[str] = "" - - def __post_init__(self, *args: Any, **kwargs: Any) -> None: - add_secret(self.username, PATTERN_HEADTAIL) - add_secret(self.password) - add_secret(self.private_key_file) - - if not self.password and not self.private_key_file: - raise LisaException( - "at least one of password or private_key_file need to be set when " - "connecting" - ) - elif not self.private_key_file: - # use password - # spurplus doesn't process empty string correctly, use None - self.private_key_file = None - elif not self.password: - self.password = None - else: - # Password and private_key_file all exist - # Private key is attempted with high priority for authentication when - # connecting to a remote node using paramiko - if not Path(self.private_key_file).exists(): - raise FileNotFoundError(self.private_key_file) - - if not self.username: - raise LisaException("username must be set") - - def __str__(self) -> str: - return f"{self.username}@{self.address}:{self.port}" - - @dataclass_json() @dataclass class ProxyConnectionInfo(ConnectionInfo): diff --git a/lisa/sut_orchestrator/baremetal/cluster/cluster.py b/lisa/sut_orchestrator/baremetal/cluster/cluster.py index ecfe2cbe9d..4e78db2b37 100644 --- a/lisa/sut_orchestrator/baremetal/cluster/cluster.py +++ b/lisa/sut_orchestrator/baremetal/cluster/cluster.py @@ -3,22 +3,23 @@ from typing import Any, Type -from lisa import features, schema +from lisa import features, schema, search_space from lisa.environment import Environment from lisa.util import InitializableMixin, subclasses -from lisa.util.logger import get_logger +from lisa.util.logger import Logger, get_logger -from ..schema import ClientCapabilities, ClientSchema, ClusterSchema +from ..schema import ClientSchema, ClusterSchema class Cluster(subclasses.BaseClassWithRunbookMixin, InitializableMixin): def __init__( self, runbook: ClusterSchema, + parent_logger: Logger, ) -> None: super().__init__(runbook=runbook) self.cluster_runbook: ClusterSchema = self.runbook - self._log = get_logger("cluster", self.__class__.__name__) + self._log = get_logger(name=self.__class__.__name__, parent=parent_logger) @classmethod def type_schema(cls) -> Type[schema.TypedSchema]: @@ -27,8 +28,9 @@ def type_schema(cls) -> Type[schema.TypedSchema]: def deploy(self, environment: Environment) -> Any: raise NotImplementedError() - def has_serial_console(self) -> bool: - raise NotImplementedError() + def delete(self, environment: Environment, log: Logger) -> None: + # the delete is not required for all clusters. + pass def get_serial_console(self) -> Type[features.SerialConsole]: raise NotImplementedError() @@ -36,8 +38,60 @@ def get_serial_console(self) -> Type[features.SerialConsole]: def get_start_stop(self) -> Type[features.StartStop]: raise NotImplementedError() - def get_client_capabilities(self, client: ClientSchema) -> ClientCapabilities: - raise NotImplementedError() + def get_client_capability(self, client: ClientSchema) -> schema.Capability: + # If the cluster doesn't support detecting capability, return an empty + # capability. + if client.capability: + return client.capability + + capability = schema.Capability() + # Give minimun values to pass basic checks. + capability.core_count = 1 + capability.memory_mb = 512 + return capability def cleanup(self) -> None: - raise NotImplementedError() + pass + + def prepare_clients(self) -> None: + client_runbook = self.runbook.client[0] + client_capability = self.get_client_capability(client_runbook) + + # to compatible with previous schema, use the whole client as extended + # runbook. + schema_type = self.runbook.type + extended_schema = client_runbook.to_dict() + + if client_capability.extended_schemas is None: + client_capability.extended_schemas = {} + client_capability.extended_schemas[schema_type] = extended_schema + self._fill_capability(client_capability) + + self.client = client_capability + + def _initialize(self, *args: Any, **kwargs: Any) -> None: + self.prepare_clients() + + def _fill_capability(self, node_capability: schema.NodeSpace) -> None: + node_capability.node_count = 1 + node_capability.disk = schema.DiskOptionSettings( + data_disk_count=search_space.IntRange(min=0), + data_disk_size=search_space.IntRange(min=1), + ) + node_capability.network_interface = schema.NetworkInterfaceOptionSettings() + node_capability.network_interface.max_nic_count = 1 + node_capability.network_interface.nic_count = 1 + node_capability.network_interface.data_path = search_space.SetSpace[ + schema.NetworkDataPath + ]( + is_allow_set=True, + items=[schema.NetworkDataPath.Sriov, schema.NetworkDataPath.Synthetic], + ) + node_capability.gpu_count = 0 + node_capability.features = search_space.SetSpace[schema.FeatureSettings]( + is_allow_set=True, + items=[ + schema.FeatureSettings.create(features.SerialConsole.name()), + schema.FeatureSettings.create(features.StartStop.name()), + ], + ) diff --git a/lisa/sut_orchestrator/baremetal/cluster/idrac.py b/lisa/sut_orchestrator/baremetal/cluster/idrac.py index c3a64f79fc..81a3ec7914 100644 --- a/lisa/sut_orchestrator/baremetal/cluster/idrac.py +++ b/lisa/sut_orchestrator/baremetal/cluster/idrac.py @@ -13,11 +13,10 @@ from lisa import features, schema from lisa.environment import Environment from lisa.util import LisaException, check_till_timeout -from lisa.util.logger import get_logger from lisa.util.perf_timer import create_timer from ..platform_ import BareMetalPlatform -from ..schema import ClientCapabilities, ClientSchema, ClusterSchema, IdracSchema +from ..schema import ClientSchema, ClusterSchema, IdracClientSchema, IdracSchema from .cluster import Cluster @@ -86,15 +85,13 @@ class Idrac(Cluster): "ForceOff": "Off", } - def __init__(self, runbook: ClusterSchema) -> None: - super().__init__(runbook) + def __init__(self, runbook: ClusterSchema, **kwargs: Any) -> None: + super().__init__(runbook, **kwargs) self.idrac_runbook: IdracSchema = self.runbook - self._log = get_logger("idrac", self.__class__.__name__) assert_that(len(self.idrac_runbook.client)).described_as( "only one client is supported for idrac, don't specify more than one client" ).is_equal_to(1) - self.client = self.idrac_runbook.client[0] self._enable_serial_console() @classmethod @@ -114,10 +111,13 @@ def get_serial_console(self) -> Type[features.SerialConsole]: def deploy(self, environment: Environment) -> Any: self.login() self._eject_virtual_media() - assert self.client.iso_http_url, "iso_http_url is required for idrac client" + client_runbook: IdracClientSchema = self.client.get_extended_runbook( + IdracClientSchema, "idrac" + ) + assert client_runbook.iso_http_url, "iso_http_url is required for idrac client" self._change_boot_order_once("VCD-DVD") self.reset("ForceOff") - self._insert_virtual_media(self.client.iso_http_url) + self._insert_virtual_media(client_runbook.iso_http_url) self.reset("On", force_run=True) self.logout() @@ -126,22 +126,23 @@ def cleanup(self) -> None: self._clear_serial_console_log() self.logout() - def get_client_capabilities(self, client: ClientSchema) -> ClientCapabilities: - if client.capabilities: - return client.capabilities + def get_client_capability(self, client: ClientSchema) -> schema.Capability: + if client.capability: + return client.capability self.login() response = self.redfish_instance.get( "/redfish/v1/Systems/System.Embedded.1/", ) - cluster_capabilities = ClientCapabilities() - cluster_capabilities.core_count = int( + capability = schema.Capability() + capability.core_count = int( response.dict["ProcessorSummary"]["LogicalProcessorCount"] ) - cluster_capabilities.free_memory_mb = ( + capability.memory_mb = ( int(response.dict["MemorySummary"]["TotalSystemMemoryGiB"]) * 1024 ) self.logout() - return cluster_capabilities + + return capability def get_serial_console_log(self) -> str: response = self.redfish_instance.post( diff --git a/lisa/sut_orchestrator/baremetal/cluster/rackmanager.py b/lisa/sut_orchestrator/baremetal/cluster/rackmanager.py index 0889e19777..088f696d39 100644 --- a/lisa/sut_orchestrator/baremetal/cluster/rackmanager.py +++ b/lisa/sut_orchestrator/baremetal/cluster/rackmanager.py @@ -5,10 +5,9 @@ from lisa import features, schema from lisa.environment import Environment from lisa.node import quick_connect -from lisa.util.logger import get_logger from ..platform_ import BareMetalPlatform -from ..schema import ClientCapabilities, ClientSchema, RackManagerSchema +from ..schema import RackManagerSchema from .cluster import Cluster @@ -37,10 +36,9 @@ def _restart(self, wait: bool = True) -> None: class RackManager(Cluster): - def __init__(self, runbook: RackManagerSchema) -> None: - super().__init__(runbook) + def __init__(self, runbook: RackManagerSchema, **kwargs: Any) -> None: + super().__init__(runbook, **kwargs) self.rm_runbook: RackManagerSchema = self.runbook - self._log = get_logger("rackmanager", self.__class__.__name__) @classmethod def type_name(cls) -> str: @@ -73,14 +71,3 @@ def reset(self, operation: str) -> None: ), "management_port is required for rackmanager client" self.rm_node.execute(f"set system {operation} -i {client.management_port}") self._log.debug(f"client has been {operation} successfully") - - def get_client_capabilities(self, client: ClientSchema) -> ClientCapabilities: - if client.capabilities: - return client.capabilities - cluster_capabilities = ClientCapabilities() - cluster_capabilities.core_count = 0 - cluster_capabilities.free_memory_mb = 0 - return cluster_capabilities - - def cleanup(self) -> None: - pass diff --git a/lisa/sut_orchestrator/baremetal/context.py b/lisa/sut_orchestrator/baremetal/context.py index 0347b98808..0536faf06e 100644 --- a/lisa/sut_orchestrator/baremetal/context.py +++ b/lisa/sut_orchestrator/baremetal/context.py @@ -1,10 +1,9 @@ from dataclasses import dataclass, field -from functools import partial -from lisa import schema from lisa.environment import Environment from lisa.node import Node +from . import schema as baremetal_schema from .build import Build @@ -15,8 +14,11 @@ class EnvironmentContext: @dataclass class NodeContext: - connection: schema.ConnectionInfo = field( - default_factory=partial(schema.ConnectionInfo, password="mock") + cluster: baremetal_schema.ClusterSchema = field( + default_factory=baremetal_schema.ClusterSchema + ) + client: baremetal_schema.ClientSchema = field( + default_factory=baremetal_schema.ClientSchema ) diff --git a/lisa/sut_orchestrator/baremetal/features.py b/lisa/sut_orchestrator/baremetal/features.py index f157c1f66a..9c653bc6eb 100644 --- a/lisa/sut_orchestrator/baremetal/features.py +++ b/lisa/sut_orchestrator/baremetal/features.py @@ -2,7 +2,7 @@ # Licensed under the MIT license. -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Type from lisa import schema from lisa.feature import Feature @@ -27,14 +27,17 @@ def _initialize(self, *args: Any, **kwargs: Any) -> None: ) self._inner.initialize() + def _get_inner_type(self) -> Type[Feature]: + raise NotImplementedError() + class StartStop(ClusterFeature): - def _get_inner_type(self) -> Feature: + def _get_inner_type(self) -> Type[Feature]: platform: BareMetalPlatform = self._platform # type: ignore - return platform.cluster.get_start_stop() # type: ignore + return platform.cluster.get_start_stop() class SerialConsole(ClusterFeature): - def _get_inner_type(self) -> Feature: + def _get_inner_type(self) -> Type[Feature]: platform: BareMetalPlatform = self._platform # type: ignore - return platform.cluster.get_serial_console() # type: ignore + return platform.cluster.get_serial_console() diff --git a/lisa/sut_orchestrator/baremetal/platform_.py b/lisa/sut_orchestrator/baremetal/platform_.py index d7ec6c14b0..9adfffd026 100644 --- a/lisa/sut_orchestrator/baremetal/platform_.py +++ b/lisa/sut_orchestrator/baremetal/platform_.py @@ -4,11 +4,10 @@ from pathlib import Path from typing import Any, List, Optional, Type -from lisa import RemoteNode, feature, schema, search_space +from lisa import feature, schema from lisa.environment import Environment from lisa.platform_ import Platform from lisa.util.logger import Logger -from lisa.util.shell import try_connect from lisa.util.subclasses import Factory from .. import BAREMETAL @@ -20,7 +19,7 @@ from .ip_getter import IpGetterChecker from .key_loader import KeyLoader from .readychecker import ReadyChecker -from .schema import BareMetalPlatformSchema, BuildSchema, ClientCapabilities +from .schema import BareMetalPlatformSchema, BuildSchema from .source import Source @@ -56,15 +55,21 @@ def _initialize(self, *args: Any, **kwargs: Any) -> None: # currently only support one cluster assert self._baremetal_runbook.cluster, "no cluster is specified in the runbook" self._cluster_runbook = self._baremetal_runbook.cluster[0] - self.cluster = self.cluster_factory.create_by_runbook(self._cluster_runbook) + + self.cluster = self.cluster_factory.create_by_runbook( + self._cluster_runbook, parent_logger=self._log + ) + self.cluster.initialize() def _prepare_environment(self, environment: Environment, log: Logger) -> bool: assert self.cluster.runbook.client, "no client is specified in the runbook" - client_capabilities = self.cluster.get_client_capabilities( - self.cluster.runbook.client[0] - ) - return self._configure_node_capabilities(environment, log, client_capabilities) + assert environment.runbook.nodes_requirement, "nodes requirement is required" + if len(environment.runbook.nodes_requirement) > 1: + # so far only supports one node + return False + + return self._check_capability(environment, log, self.cluster.client) def _deploy_environment(self, environment: Environment, log: Logger) -> None: # process the cluster elements from runbook @@ -85,19 +90,23 @@ def _deploy_environment(self, environment: Environment, log: Logger) -> None: if ready_checker: ready_checker.is_ready(node) + assert node_context.client.connection, "connection is required" # get ip address if self._cluster_runbook.ip_getter: ip_getter = self.ip_getter_factory.create_by_runbook( self._cluster_runbook.ip_getter ) - node_context.connection.address = ip_getter.get_ip() - assert isinstance(node, RemoteNode), f"actual: {type(node)}" + node_context.client.connection.address = ip_getter.get_ip() + node.name = f"node_{index}" - try_connect(node_context.connection) + node.initialize() self._log.debug(f"deploy environment {environment.name} successfully") + def _delete_environment(self, environment: Environment, log: Logger) -> None: + self.cluster.delete(environment, log) + def _copy(self, build_schema: BuildSchema, sources_path: List[Path]) -> None: if sources_path: build = self.build_factory.create_by_runbook(build_schema) @@ -161,13 +170,13 @@ def _predeploy_environment(self, environment: Environment, log: Logger) -> None: key_file = key_loader.load_key(self.local_artifacts_path) assert environment.runbook.nodes_requirement, "no node is specified" - for node_space in environment.runbook.nodes_requirement: + for index, node_space in enumerate(environment.runbook.nodes_requirement): assert isinstance( node_space, schema.NodeSpace ), f"actual: {type(node_space)}" - environment.create_node_from_requirement(node_space) + node = environment.create_node_from_requirement(node_space) - for index, node in enumerate(environment.nodes.list()): + node.features = feature.Features(node, self) node_context = get_node_context(node) if ( @@ -178,74 +187,29 @@ def _predeploy_environment(self, environment: Environment, log: Logger) -> None: index ].connection.private_key_file = key_file - connection_info = schema.ConnectionInfo( - address=self.cluster.runbook.client[index].connection.address, - port=self.cluster.runbook.client[index].connection.port, - username=self.cluster.runbook.client[index].connection.username, - private_key_file=self.cluster.runbook.client[ - index - ].connection.private_key_file, - password=self.cluster.runbook.client[index].connection.password, - ) - - node_context.connection = connection_info + node_context.client = self.cluster.runbook.client[index] + node_context.cluster = self.cluster.runbook index = index + 1 - def _configure_node_capabilities( + def _check_capability( self, environment: Environment, log: Logger, - cluster_capabilities: ClientCapabilities, + client_capability: schema.NodeSpace, ) -> bool: if not environment.runbook.nodes_requirement: return True - nodes_capabilities = self._create_node_capabilities(cluster_capabilities) - nodes_requirement = [] for node_space in environment.runbook.nodes_requirement: - if not node_space.check(nodes_capabilities): + if not node_space.check(client_capability): return False - node_requirement = node_space.generate_min_capability(nodes_capabilities) + node_requirement = node_space.generate_min_capability(client_capability) nodes_requirement.append(node_requirement) environment.runbook.nodes_requirement = nodes_requirement return True - def _create_node_capabilities( - self, cluster_capabilities: ClientCapabilities - ) -> schema.NodeSpace: - node_capabilities = schema.NodeSpace() - node_capabilities.name = "baremetal" - node_capabilities.node_count = 1 - node_capabilities.core_count = search_space.IntRange( - min=1, max=cluster_capabilities.core_count - ) - node_capabilities.memory_mb = cluster_capabilities.free_memory_mb - node_capabilities.disk = schema.DiskOptionSettings( - data_disk_count=search_space.IntRange(min=0), - data_disk_size=search_space.IntRange(min=1), - ) - node_capabilities.network_interface = schema.NetworkInterfaceOptionSettings() - node_capabilities.network_interface.max_nic_count = 1 - node_capabilities.network_interface.nic_count = 1 - node_capabilities.network_interface.data_path = search_space.SetSpace[ - schema.NetworkDataPath - ]( - is_allow_set=True, - items=[schema.NetworkDataPath.Sriov, schema.NetworkDataPath.Synthetic], - ) - node_capabilities.gpu_count = 0 - node_capabilities.features = search_space.SetSpace[schema.FeatureSettings]( - is_allow_set=True, - items=[ - schema.FeatureSettings.create(SerialConsole.name()), - schema.FeatureSettings.create(StartStop.name()), - ], - ) - - return node_capabilities - def _cleanup(self) -> None: self.cluster.cleanup() diff --git a/lisa/sut_orchestrator/baremetal/readychecker.py b/lisa/sut_orchestrator/baremetal/readychecker.py index 3559f3097b..dc9070b770 100644 --- a/lisa/sut_orchestrator/baremetal/readychecker.py +++ b/lisa/sut_orchestrator/baremetal/readychecker.py @@ -82,19 +82,13 @@ def is_ready(self, node: Node) -> bool: return os.path.exists(self.file_single_runbook.file) -@dataclass_json() -@dataclass -class SshSchema(ReadyCheckerSchema): - ... - - class SshChecker(ReadyChecker): def __init__( self, - runbook: SshSchema, + runbook: ReadyCheckerSchema, ) -> None: super().__init__(runbook=runbook) - self.ssh_runbook: SshSchema = self.runbook + self.ssh_runbook: ReadyCheckerSchema = self.runbook self._log = get_logger("ssh", self.__class__.__name__) @classmethod @@ -103,26 +97,27 @@ def type_name(cls) -> str: @classmethod def type_schema(cls) -> Type[schema.TypedSchema]: - return SshSchema + return ReadyCheckerSchema def is_ready(self, node: Node) -> bool: context = get_node_context(node) remote_node = cast(RemoteNode, node) + + assert context.client.connection, "connection is required for ssh checker" + connection = context.client.connection remote_node.set_connection_info( - address=context.connection.address, - public_port=context.connection.port, - username=context.connection.username, - password=cast( - str, - context.connection.password, - ), - private_key_file=cast( - str, - context.connection.private_key_file, - ), + address=connection.address, + port=connection.port, + username=connection.username, + password=connection.password, + private_key_file=connection.private_key_file, + use_public_address=False, ) self._log.debug(f"try to connect to client: {node}") - try_connect(context.connection, ssh_timeout=self.ssh_runbook.timeout) + try_connect( + connection.get_connection_info(is_public=False), + ssh_timeout=self.ssh_runbook.timeout, + ) self._log.debug("client has been connected successfully") return True diff --git a/lisa/sut_orchestrator/baremetal/schema.py b/lisa/sut_orchestrator/baremetal/schema.py index 49e90e5a4b..42d3b46457 100644 --- a/lisa/sut_orchestrator/baremetal/schema.py +++ b/lisa/sut_orchestrator/baremetal/schema.py @@ -10,20 +10,13 @@ from lisa.util import field_metadata -@dataclass_json() -@dataclass -class ClientCapabilities: - core_count: int = field(default=-1) - free_memory_mb: int = field(default=-1) - - @dataclass_json() @dataclass class ClientSchema: connection: Optional[schema.RemoteNode] = field( default=None, metadata=field_metadata(required=True) ) - capabilities: Optional[ClientCapabilities] = None + capability: Optional[schema.Capability] = None @dataclass_json() diff --git a/pyproject.toml b/pyproject.toml index 4fc509d126..d5c72ddfc3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ azure = [ "azure-mgmt-storage ~= 21.2.1", "azure-storage-blob ~= 12.23.0", "azure-storage-file-share ~= 12.18.0", - "azure-keyvault-secrets ~= 4.7.0", + "azure-keyvault-secrets ~= 4.7.0", "azure-keyvault-certificates ~= 4.7.0", "msrestazure ~= 0.6.4", "cachetools ~= 5.2.0", @@ -104,6 +104,8 @@ libvirt = [ baremetal = [ "pysmb ~= 1.2.9.1", "redfish ~= 3.2.1", + "azure-devops ~= 7.1.0b3", + "requests ~= 2.32.0", ] mypy = [