diff --git a/ic-os/components/setupos-scripts/check-hardware.sh b/ic-os/components/setupos-scripts/check-hardware.sh index e413647575e..0655e220c2a 100644 --- a/ic-os/components/setupos-scripts/check-hardware.sh +++ b/ic-os/components/setupos-scripts/check-hardware.sh @@ -249,6 +249,36 @@ function verify_disks() { fi } +############################################################################### +# Drive Health Verification +############################################################################### + +function verify_drive_health() { + echo "* Verifying drive health..." + + local drives=($(get_large_drives)) + local warning_triggered=0 + + for drive in "${drives[@]}"; do + echo "* Checking drive /dev/${drive} health..." + local smartctl_output + if ! smartctl_output=$(smartctl -H /dev/${drive} 2>&1); then + echo -e "\033[1;31mWARNING: Failed to run smartctl on /dev/${drive}.\033[0m" + warning_triggered=1 + elif ! echo "${smartctl_output}" | grep -qi "PASSED"; then + echo -e "\033[1;31mWARNING: Drive /dev/${drive} did not pass the SMART health check.\033[0m" + warning_triggered=1 + else + echo "Drive /dev/${drive} health is OK." + fi + done + + if [ "${warning_triggered}" -eq 1 ]; then + echo "Pausing for 5 minutes before continuing installation..." + sleep 300 + fi +} + ############################################################################### # Deployment Path Verification ############################################################################### @@ -279,6 +309,7 @@ main() { verify_cpu verify_memory verify_disks + verify_drive_health verify_deployment_path else echo "* Hardware checks skipped by request via kernel command line" diff --git a/ic-os/components/setupos-scripts/setup-disk.sh b/ic-os/components/setupos-scripts/setup-disk.sh index 7f55b0523e2..fb51517af0a 100755 --- a/ic-os/components/setupos-scripts/setup-disk.sh +++ b/ic-os/components/setupos-scripts/setup-disk.sh @@ -11,29 +11,32 @@ source /opt/ic/bin/functions.sh function purge_partitions() { echo "* Purging partitions..." - # Destroy guest partitions + # Destroy guest partitions (for redeployments) vgscan --mknodes loop_device=$(losetup -P -f /dev/mapper/hostlvm-guestos --show) - if [ "${loop_device}" != "" ]; then + echo "Loop device detected: ${loop_device}. Wiping partitions." wipefs --all --force "${loop_device}"* if [ "${?}" -ne 0 ]; then - echo "Unable to purge GuestOS partitions" + echo "WARNING: Unable to purge GuestOS partitions on ${loop_device}" fi losetup -d "${loop_device}" + else + echo "Unable to detect GuestOS loop device (may not exist)" fi - # Destroy host partitions + # Destroy host partitions (for redeployments) wipefs --all --force "/dev/mapper/hostlvm"* if [ "${?}" -ne 0 ]; then - echo "Unable to purge HostOS partitions" + echo "Unable to purge HostOS partitions (may not exist)" fi vgremove --force hostlvm # Destroy master boot record and partition table large_drives=($(get_large_drives)) + for drive in "${large_drives[@]}"; do + echo "Wiping partitions on drive: /dev/${drive}." - for drive in $(echo ${large_drives[@]}); do wipefs --all --force "/dev/${drive}"* if [ "${?}" -ne 0 ]; then echo "Unable to purge partitions on drive: /dev/${drive}" @@ -42,11 +45,12 @@ function purge_partitions() { } function setup_storage() { - system_drive=$(find_first_drive) + echo "Starting storage setup..." + system_drive=$(find_first_drive) # Create PVs on each additional drive large_drives=($(get_large_drives)) - for drive in $(echo ${large_drives[@]}); do + for drive in "${large_drives[@]}"; do # Avoid creating PV on system drive if [ "/dev/${drive}" == "/dev/${system_drive}" ]; then continue @@ -55,8 +59,10 @@ function setup_storage() { test -b "/dev/${drive}" log_and_halt_installation_on_error "${?}" "Drive '/dev/${drive}' not found. Are all drives correctly installed?" + echo "Creating physical volume on /dev/${drive}." pvcreate "/dev/${drive}" log_and_halt_installation_on_error "${?}" "Unable to setup PV on drive '/dev/${drive}'." + echo "Physical volume created on /dev/${drive}." done } diff --git a/ic-os/setupos/context/packages.common b/ic-os/setupos/context/packages.common index aa5658d6604..3757682fa66 100644 --- a/ic-os/setupos/context/packages.common +++ b/ic-os/setupos/context/packages.common @@ -36,6 +36,7 @@ lvm2 net-tools parted python-is-python3 +smartmontools sudo udev usbutils