Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

set-2 cases and enhancing set-1 smartswitch cases #16020

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
186 changes: 174 additions & 12 deletions tests/smartswitch/common/device_utils_dpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
import logging
import pytest
from tests.common.devices.sonic import * # noqa: F401,F403
import re
from tests.common.platform.device_utils import platform_api_conn # noqa: F401,F403
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This import is not required. You can also remove "noqa" tags where platform_api_conn is used. For example,

-def num_dpu_modules(platform_api_conn): # noqa F811
+def num_dpu_modules(platform_api_conn):

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

F811 was introduced here in this commit by yotongzhang-microsoft on smartswitch files as well.
So, I followed the same suit.

475f52f#diff-1ff5cd58b35cd3c24b8e95e46748a838458539db3f66b1bf0915980c82d94416

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, can you please fix it in the files you are modifying? As I mentioned previously in one of the comment, this can be simply avoided by removing import of platform_api_conn line at 7. Please give it a try. I have already tried it locally and that works.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we go ahead and remove line 7 totally ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will change it, test it and update it here.

from tests.common.helpers.platform_api import chassis, module
from tests.common.utilities import wait_until
Expand Down Expand Up @@ -90,7 +90,8 @@ def dpu_power_on(duthost, platform_api_conn, num_dpu_modules): # noqa F811
duthost.shell("config chassis modules startup %s" % (dpu))

pytest_assert(wait_until(180, 60, 0, check_dpu_ping_status, # noqa: F405
duthost, ip_address_list), "Not all DPUs are operationally up")
duthost, ip_address_list),
"Not all DPUs are operationally up")


def check_dpu_ping_status(duthost, ip_address_list):
Expand Down Expand Up @@ -128,39 +129,200 @@ def check_dpu_module_status(duthost, power_status, dpu_name):
'show chassis module status | grep %s' % (dpu_name))

if "Offline" in output_dpu_status["stdout"]:
logging.info("'{}' is offline ...".format(dpu_name))
if power_status == "off":
logging.info("'{}' is offline ...".format(dpu_name))
return True
else:
logging.info("'{}' is online ...".format(dpu_name))
return False
else:
logging.info("'{}' is online ...".format(dpu_name))
if power_status == "on":
logging.info("'{}' is online ...".format(dpu_name))
return True
else:
logging.info("'{}' is offline ...".format(dpu_name))
return False


def check_dpu_reboot_cause(duthost, dpu_name):
def check_dpu_reboot_cause(duthost, dpu_name, reason):
"""
Check reboot cause of all DPU modules
Args:
duthost : Host handle
dpu_name: name of the dpu module
reason: check against the reason for reboot
Returns:
Returns True or False based on reboot cause of all DPU modules
"""

output_reboot_cause = duthost.shell(
'show reboot-cause all | grep %s' % (dpu_name))

if 'Unknown' in output_reboot_cause["stdout"]:
# Checking for Unknown as of now and
# implementation for other reasons are not in place now
# TODO: Needs to be extend the function for other reasons
logging.info("'{}' - reboot cause is Unkown...".format(dpu_name))
output_str = output_reboot_cause["stdout"]
if reason in output_str.strip():
logging.info("'{}' - reboot cause is {} as expected".format(dpu_name,
reason))
return True

logging.error("'{}' - reboot cause is not {}".format(dpu_name,
reason))
return False


def check_pmon_status(duthost):
"""
Check the status of PMON Container
Args:
duthost : Host handle
Returns:
Returns True or False based on pmon status
"""

output_pmon_status = duthost.command('docker ps')
output_docker_command = output_pmon_status['stdout']
lines = output_docker_command.strip().split("\n")
for line in lines:
if "pmon" in line and "Up" in line:
logging.info("pmon container is up")
return True

logging.error("pmon container is not up")
return False


def execute_dpu_commands(duthost, ipaddress, command, output=True):
"""
Runs commands on dpu through ssh and returns the output
Username and Password for dpu access comes from platform.json
Args:
duthost : Host handle
ipaddress: ip address of dpu interface
command: command to be run on DPU
output: Flag to turn on or off for the output
of the command executed on dpu
Default it is on true.
Returns:
Returns the output of the given command
"""
username = duthost.facts['ssh_dpu']['username']
password = duthost.facts['ssh_dpu']['password']

if output:
log = 'print(stdout.read().decode()); '
else:
log = 'print(' '); '

ssh_cmd = ('python -c "import paramiko; '
'client = paramiko.SSHClient(); '
'client.set_missing_host_key_policy(paramiko.AutoAddPolicy()); '
'client.connect(\'%s\', username=\'%s\', password=\'%s\'); '
'_, stdout, _ = client.exec_command(\'%s\'); '
'%s '
'client.close()"'
% (ipaddress, username, password, command, log))
cmd_output = duthost.shell(ssh_cmd)
return cmd_output['stdout']


def parse_dpu_memory_usage(dpu_memory):
"""
Parse the DPU memory output and returns memory usuage value
Args:
dpu_memory : output of show system-memory on DPU
Returns:
Returns the memory used as percentage value
"""

# Regular expression pattern to extract the total and used values
pattern = r"Mem:\s+(\d+)\s+(\d+)\s+"

# Search for the pattern in the data
match = re.search(pattern, dpu_memory)

if match:
total_mem = int(match.group(1))
used_mem = int(match.group(2))
else:
print("Memory information not found.")
return 0

return int(used_mem/total_mem * 100)


def parse_system_health_summary(output_health_summary):
"""
Parse the show system health summary cli output
Checks for HW, Service and SW status are OK
and returns True/False
Args:
output_health_summary : output of show system-health summary
on Switch and DPU
Returns:
Returns True or False
"""
# Regex to find all status names and values
status_data = re.findall(r"(\w+):\s+Status:\s+(\w+)",
output_health_summary)

status_dict = {name: status for name, status in status_data}

# Check if all statuses are "OK"
result = all(status == "OK" for status in status_dict.values())

return result


def check_dpu_link_and_status(duthost, dpu_on_list,
dpu_off_list, ip_address_list):
"""
Checks whether the intended DPUs are ON/OFF
and their connectivity
Args:
duthost: Host handle
dpu_on_list: List of dpu names that are On
dpu_off_list: List of dpu names that are Off
ip_address_list: List of dpu ip address which
are on
"""

for index in range(len(dpu_on_list)):
pytest_assert(wait_until(210, 70, 0,
check_dpu_module_status,
duthost, "on", dpu_on_list[index]),
"DPU is not operationally up")

for index in range(len(dpu_off_list)):
pytest_assert(wait_until(210, 70, 0,
check_dpu_module_status,
duthost, "off", dpu_off_list[index]),
"DPU is not operationally down")

ping_status = check_dpu_ping_status(duthost, ip_address_list)
pytest_assert(ping_status == 1, "Ping to DPU has failed")


def get_dpu_link_status(duthost, num_dpu_modules,
platform_api_conn): # noqa F811
"""
Checks whether DPU status is ON/OFF and store it.
Args:
duthost: Host handle
num_dpu_modules: Gets number of DPU modules
Returns:
Returns ip_address_list, dpu_on_list and dpu_off_list
"""

ip_address_list = []
dpu_on_list = []
dpu_off_list = []

for index in range(num_dpu_modules):
dpu_name = module.get_name(platform_api_conn, index)
ip_address = module.get_midplane_ip(platform_api_conn, index)
rc = check_dpu_module_status(duthost, "on", dpu_name)
if rc:
dpu_on_list.append(dpu_name)
ip_address_list.append(ip_address)
else:
dpu_off_list.append(dpu_name)
continue

return ip_address_list, dpu_on_list, dpu_off_list
Loading
Loading