Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BareMetal refactoring: Move capability to cluster, add logger for clusters, remove unused schema, fix dependencies and so on #3569

Merged
merged 15 commits into from
Jan 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion lisa/platform_.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,12 @@ def deploy_environment(self, environment: Environment) -> None:
# initialize features
# features may need platform, so create it in platform
for node in environment.nodes.list():
node.features = Features(node, self)
# Baremetal platform needs to initialize SerialConsole feature to
# get serial log from beginning, so the features are created
# already. If recreate the SerialConsole, the service resource
# leaks, and SerialConsole cannot be opend again.
if not hasattr(node, "features"):
node.features = Features(node, self)
node.capture_azure_information = platform_runbook.capture_azure_information
node.capture_boot_time = platform_runbook.capture_boot_time
node.capture_kernel_config = (
Expand Down
99 changes: 55 additions & 44 deletions lisa/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1249,6 +1249,50 @@ class LocalNode(Node):
type: str = constants.ENVIRONMENTS_NODES_LOCAL


@dataclass_json()
@dataclass
class ConnectionInfo:
address: str = ""
port: int = field(
default=22,
metadata=field_metadata(
field_function=fields.Int, validate=validate.Range(min=1, max=65535)
),
)
username: str = constants.DEFAULT_USER_NAME
password: Optional[str] = ""
private_key_file: Optional[str] = ""

def __post_init__(self, *args: Any, **kwargs: Any) -> None:
add_secret(self.username, PATTERN_HEADTAIL)
add_secret(self.password)
add_secret(self.private_key_file)

if not self.password and not self.private_key_file:
raise LisaException(
"at least one of password or private_key_file need to be set when "
"connecting"
)
elif not self.private_key_file:
# use password
# spurplus doesn't process empty string correctly, use None
self.private_key_file = None
elif not self.password:
self.password = None
else:
# Password and private_key_file all exist
# Private key is attempted with high priority for authentication when
# connecting to a remote node using paramiko
if not Path(self.private_key_file).exists():
raise FileNotFoundError(self.private_key_file)

if not self.username:
raise LisaException("username must be set")

def __str__(self) -> str:
return f"{self.username}@{self.address}:{self.port}"


@dataclass_json()
@dataclass
class RemoteNode(Node):
Expand Down Expand Up @@ -1277,6 +1321,17 @@ def __post_init__(self, *args: Any, **kwargs: Any) -> None:
add_secret(self.password)
add_secret(self.private_key_file)

def get_connection_info(self, is_public: bool = False) -> "ConnectionInfo":
connection = ConnectionInfo(
address=self.public_address if is_public else self.address,
port=self.public_port if is_public else self.port,
username=self.username,
password=self.password,
private_key_file=self.private_key_file,
)

return connection


@dataclass_json()
@dataclass
Expand Down Expand Up @@ -1552,50 +1607,6 @@ def type_name(cls) -> str:
return constants.TESTCASE_TYPE_LEGACY


@dataclass_json()
@dataclass
class ConnectionInfo:
address: str = ""
port: int = field(
default=22,
metadata=field_metadata(
field_function=fields.Int, validate=validate.Range(min=1, max=65535)
),
)
username: str = constants.DEFAULT_USER_NAME
password: Optional[str] = ""
private_key_file: Optional[str] = ""

def __post_init__(self, *args: Any, **kwargs: Any) -> None:
add_secret(self.username, PATTERN_HEADTAIL)
add_secret(self.password)
add_secret(self.private_key_file)

if not self.password and not self.private_key_file:
raise LisaException(
"at least one of password or private_key_file need to be set when "
"connecting"
)
elif not self.private_key_file:
# use password
# spurplus doesn't process empty string correctly, use None
self.private_key_file = None
elif not self.password:
self.password = None
else:
# Password and private_key_file all exist
# Private key is attempted with high priority for authentication when
# connecting to a remote node using paramiko
if not Path(self.private_key_file).exists():
raise FileNotFoundError(self.private_key_file)

if not self.username:
raise LisaException("username must be set")

def __str__(self) -> str:
return f"{self.username}@{self.address}:{self.port}"


@dataclass_json()
@dataclass
class ProxyConnectionInfo(ConnectionInfo):
Expand Down
72 changes: 63 additions & 9 deletions lisa/sut_orchestrator/baremetal/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,23 @@

from typing import Any, Type

from lisa import features, schema
from lisa import features, schema, search_space
from lisa.environment import Environment
from lisa.util import InitializableMixin, subclasses
from lisa.util.logger import get_logger
from lisa.util.logger import Logger, get_logger

from ..schema import ClientCapabilities, ClientSchema, ClusterSchema
from ..schema import ClientSchema, ClusterSchema


class Cluster(subclasses.BaseClassWithRunbookMixin, InitializableMixin):
def __init__(
self,
runbook: ClusterSchema,
parent_logger: Logger,
) -> None:
super().__init__(runbook=runbook)
self.cluster_runbook: ClusterSchema = self.runbook
self._log = get_logger("cluster", self.__class__.__name__)
self._log = get_logger(name=self.__class__.__name__, parent=parent_logger)

@classmethod
def type_schema(cls) -> Type[schema.TypedSchema]:
Expand All @@ -27,17 +28,70 @@ def type_schema(cls) -> Type[schema.TypedSchema]:
def deploy(self, environment: Environment) -> Any:
raise NotImplementedError()

def has_serial_console(self) -> bool:
raise NotImplementedError()
def delete(self, environment: Environment, log: Logger) -> None:
# the delete is not required for all clusters.
pass

def get_serial_console(self) -> Type[features.SerialConsole]:
raise NotImplementedError()

def get_start_stop(self) -> Type[features.StartStop]:
raise NotImplementedError()

def get_client_capabilities(self, client: ClientSchema) -> ClientCapabilities:
raise NotImplementedError()
def get_client_capability(self, client: ClientSchema) -> schema.Capability:
# If the cluster doesn't support detecting capability, return an empty
# capability.
if client.capability:
return client.capability

capability = schema.Capability()
# Give minimun values to pass basic checks.
capability.core_count = 1
capability.memory_mb = 512
return capability

def cleanup(self) -> None:
raise NotImplementedError()
pass

def prepare_clients(self) -> None:
client_runbook = self.runbook.client[0]
client_capability = self.get_client_capability(client_runbook)

# to compatible with previous schema, use the whole client as extended
# runbook.
schema_type = self.runbook.type
extended_schema = client_runbook.to_dict()

if client_capability.extended_schemas is None:
client_capability.extended_schemas = {}
client_capability.extended_schemas[schema_type] = extended_schema
self._fill_capability(client_capability)

self.client = client_capability

def _initialize(self, *args: Any, **kwargs: Any) -> None:
self.prepare_clients()

def _fill_capability(self, node_capability: schema.NodeSpace) -> None:
node_capability.node_count = 1
node_capability.disk = schema.DiskOptionSettings(
data_disk_count=search_space.IntRange(min=0),
data_disk_size=search_space.IntRange(min=1),
)
node_capability.network_interface = schema.NetworkInterfaceOptionSettings()
node_capability.network_interface.max_nic_count = 1
node_capability.network_interface.nic_count = 1
node_capability.network_interface.data_path = search_space.SetSpace[
schema.NetworkDataPath
](
is_allow_set=True,
items=[schema.NetworkDataPath.Sriov, schema.NetworkDataPath.Synthetic],
)
node_capability.gpu_count = 0
node_capability.features = search_space.SetSpace[schema.FeatureSettings](
is_allow_set=True,
items=[
schema.FeatureSettings.create(features.SerialConsole.name()),
schema.FeatureSettings.create(features.StartStop.name()),
],
)
31 changes: 16 additions & 15 deletions lisa/sut_orchestrator/baremetal/cluster/idrac.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,10 @@
from lisa import features, schema
from lisa.environment import Environment
from lisa.util import LisaException, check_till_timeout
from lisa.util.logger import get_logger
from lisa.util.perf_timer import create_timer

from ..platform_ import BareMetalPlatform
from ..schema import ClientCapabilities, ClientSchema, ClusterSchema, IdracSchema
from ..schema import ClientSchema, ClusterSchema, IdracClientSchema, IdracSchema
from .cluster import Cluster


Expand Down Expand Up @@ -86,15 +85,13 @@ class Idrac(Cluster):
"ForceOff": "Off",
}

def __init__(self, runbook: ClusterSchema) -> None:
super().__init__(runbook)
def __init__(self, runbook: ClusterSchema, **kwargs: Any) -> None:
super().__init__(runbook, **kwargs)
self.idrac_runbook: IdracSchema = self.runbook
self._log = get_logger("idrac", self.__class__.__name__)
assert_that(len(self.idrac_runbook.client)).described_as(
"only one client is supported for idrac, don't specify more than one client"
).is_equal_to(1)

self.client = self.idrac_runbook.client[0]
self._enable_serial_console()

@classmethod
Expand All @@ -114,10 +111,13 @@ def get_serial_console(self) -> Type[features.SerialConsole]:
def deploy(self, environment: Environment) -> Any:
self.login()
self._eject_virtual_media()
assert self.client.iso_http_url, "iso_http_url is required for idrac client"
client_runbook: IdracClientSchema = self.client.get_extended_runbook(
IdracClientSchema, "idrac"
)
assert client_runbook.iso_http_url, "iso_http_url is required for idrac client"
self._change_boot_order_once("VCD-DVD")
self.reset("ForceOff")
self._insert_virtual_media(self.client.iso_http_url)
self._insert_virtual_media(client_runbook.iso_http_url)
self.reset("On", force_run=True)
self.logout()

Expand All @@ -126,22 +126,23 @@ def cleanup(self) -> None:
self._clear_serial_console_log()
self.logout()

def get_client_capabilities(self, client: ClientSchema) -> ClientCapabilities:
if client.capabilities:
return client.capabilities
def get_client_capability(self, client: ClientSchema) -> schema.Capability:
if client.capability:
return client.capability
self.login()
response = self.redfish_instance.get(
"/redfish/v1/Systems/System.Embedded.1/",
)
cluster_capabilities = ClientCapabilities()
cluster_capabilities.core_count = int(
capability = schema.Capability()
capability.core_count = int(
response.dict["ProcessorSummary"]["LogicalProcessorCount"]
)
cluster_capabilities.free_memory_mb = (
capability.memory_mb = (
int(response.dict["MemorySummary"]["TotalSystemMemoryGiB"]) * 1024
)
self.logout()
return cluster_capabilities

return capability

def get_serial_console_log(self) -> str:
response = self.redfish_instance.post(
Expand Down
19 changes: 3 additions & 16 deletions lisa/sut_orchestrator/baremetal/cluster/rackmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
from lisa import features, schema
from lisa.environment import Environment
from lisa.node import quick_connect
from lisa.util.logger import get_logger

from ..platform_ import BareMetalPlatform
from ..schema import ClientCapabilities, ClientSchema, RackManagerSchema
from ..schema import RackManagerSchema
from .cluster import Cluster


Expand Down Expand Up @@ -37,10 +36,9 @@ def _restart(self, wait: bool = True) -> None:


class RackManager(Cluster):
def __init__(self, runbook: RackManagerSchema) -> None:
super().__init__(runbook)
def __init__(self, runbook: RackManagerSchema, **kwargs: Any) -> None:
super().__init__(runbook, **kwargs)
self.rm_runbook: RackManagerSchema = self.runbook
self._log = get_logger("rackmanager", self.__class__.__name__)

@classmethod
def type_name(cls) -> str:
Expand Down Expand Up @@ -73,14 +71,3 @@ def reset(self, operation: str) -> None:
), "management_port is required for rackmanager client"
self.rm_node.execute(f"set system {operation} -i {client.management_port}")
self._log.debug(f"client has been {operation} successfully")

def get_client_capabilities(self, client: ClientSchema) -> ClientCapabilities:
if client.capabilities:
return client.capabilities
cluster_capabilities = ClientCapabilities()
cluster_capabilities.core_count = 0
cluster_capabilities.free_memory_mb = 0
return cluster_capabilities

def cleanup(self) -> None:
pass
10 changes: 6 additions & 4 deletions lisa/sut_orchestrator/baremetal/context.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from dataclasses import dataclass, field
from functools import partial

from lisa import schema
from lisa.environment import Environment
from lisa.node import Node

from . import schema as baremetal_schema
from .build import Build


Expand All @@ -15,8 +14,11 @@ class EnvironmentContext:

@dataclass
class NodeContext:
connection: schema.ConnectionInfo = field(
default_factory=partial(schema.ConnectionInfo, password="mock")
cluster: baremetal_schema.ClusterSchema = field(
default_factory=baremetal_schema.ClusterSchema
)
client: baremetal_schema.ClientSchema = field(
default_factory=baremetal_schema.ClientSchema
)


Expand Down
Loading
Loading