From e0344215d35aed8f09ddc17806096af1e92e28a6 Mon Sep 17 00:00:00 2001 From: Hanwen Date: Tue, 25 Jun 2024 06:40:54 -0700 Subject: [PATCH] Configure multi-nics on Amazon Linux 2023 This configuration only happens when the instance has more than one network interfaces attached. This configuration guarantees two things: 1. Traffic from each network interface looks up its own route table to ensure the responding traffic go through the same network interface. 2. In the default route table, primary network interface has the best priority. Therefore, traffic initiated from the instance uses the interface. This is useful to guarantee Elastic IP access, because Elastic IP is attached to the primary interface using ParallelCluster logic. The configuration files are written in `/etc/systemd/network`, which overwrites configurations from ec2-net-utils in `/run/systemd/network`. This commit also add retries to a resource, which caused sporadic failures during testing this commit. Signed-off-by: Hanwen --- .../configure_nw_interface.sh | 61 +++++++++++++++++++ .../recipes/config/network_interfaces.rb | 6 +- .../network_service_alinux2023.rb | 20 +++++- .../recipes/init/init_dns.rb | 2 + 4 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh diff --git a/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh b/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh new file mode 100644 index 000000000..a9cfdf103 --- /dev/null +++ b/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh @@ -0,0 +1,61 @@ +#!/bin/sh + +set -ex + +if + [ -z "${DEVICE_NAME}" ] || # name of the device + [ -z "${DEVICE_NUMBER}" ] || # number of the device + [ -z "${DEVICE_IP_ADDRESS}" ] || # ip of the device + [ -z "${MAC}" ] || # mac address of the device + [ -z "${CIDR_BLOCK}" ] # CIDR block of the subnet +then + echo 'One or more environment variables missing' + exit 1 +fi +echo "Configuring NIC, Device name: ${DEVICE_NAME}, Device number: ${DEVICE_NUMBER}" + +configuration_directory="/etc/systemd/network" +file_name="70-${DEVICE_NAME}.network" +sub_directory="${configuration_directory}/${file_name}.d" +if [ ! -d "$sub_directory" ]; then + mkdir -p "$sub_directory"; +fi + +cd "$configuration_directory" + +ROUTE_TABLE=100${DEVICE_NUMBER} + +ln -s /usr/lib/systemd/network/80-ec2.network ${file_name} # Use default EC2 configuration. This include MTU, etc. + +/bin/cat < ${sub_directory}/eni.conf +# Configuration for ${DEVICE_NUMBER} generated by ParallelCluster +# This is inspired by https://github.com/amazonlinux/amazon-ec2-net-utils/blob/v2.4.1/lib/lib.sh +[Match] +MACAddress=${MAC} +[Network] +DHCP=yes + +[DHCPv4] +RouteMetric=$ROUTE_TABLE +UseRoutes=true +UseGateway=true + +[IPv6AcceptRA] +RouteMetric=$ROUTE_TABLE +UseGateway=true + +[Route] +Table=$ROUTE_TABLE +Gateway=_ipv6ra + +[Route] +Gateway=_dhcp4 +Table=$ROUTE_TABLE +[Route] +Table=$ROUTE_TABLE +Destination=$CIDR_BLOCK +[RoutingPolicyRule] +From=${DEVICE_IP_ADDRESS} +Priority=$ROUTE_TABLE +Table=$ROUTE_TABLE +EOF \ No newline at end of file diff --git a/cookbooks/aws-parallelcluster-environment/recipes/config/network_interfaces.rb b/cookbooks/aws-parallelcluster-environment/recipes/config/network_interfaces.rb index 7293a606d..ef5898cb9 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/config/network_interfaces.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/config/network_interfaces.rb @@ -13,7 +13,7 @@ # limitations under the License. # amazon-ec2-net-utils is pre-installed in AL2023 and handles multi-nics instances properly -return if on_docker? || platform?('amazon') && node['platform_version'].to_i == 2023 +return if on_docker? def network_card_index(mac, token) uri = URI("http://169.254.169.254/latest/meta-data/network/interfaces/macs/#{mac}/network-card") @@ -82,13 +82,15 @@ def cidr_to_netmask(cidr) group 'root' cwd "/tmp" environment( + # TODO: The variables are a superset of what's required by individual scripts. Consider simplification. 'DEVICE_NAME' => device_name, 'DEVICE_NUMBER' => "#{network_card_index}", # in configure_nw_interface DEVICE_NUMBER actually means network card index 'GW_IP_ADDRESS' => gw_ip_address, 'DEVICE_IP_ADDRESS' => device_ip_address, 'CIDR_PREFIX_LENGTH' => cidr_prefix_length, 'NETMASK' => netmask, - 'CIDR_BLOCK' => cidr_block + 'CIDR_BLOCK' => cidr_block, + 'MAC' => mac ) command 'sh /tmp/configure_nw_interface.sh' diff --git a/cookbooks/aws-parallelcluster-environment/resources/network_service/network_service_alinux2023.rb b/cookbooks/aws-parallelcluster-environment/resources/network_service/network_service_alinux2023.rb index 45cb06a60..56b0803f3 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/network_service/network_service_alinux2023.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/network_service/network_service_alinux2023.rb @@ -17,7 +17,25 @@ end use 'partial/_network_service' +use 'partial/_network_service_redhat_based' def network_service_name - 'systemd-resolved' + 'systemd-networkd' +end + +action :restart do + log "Restarting 'systemd-networkd systemd-resolved' service, platform #{node['platform']} '#{node['platform_version']}'" + + execute "Reload system configuration files before restarting services" do + command "systemctl daemon-reload" + end + + %w(systemd-networkd systemd-resolved).each do |service_name| + # Restart systemd-networkd to load configuration about NICs. + # Restart systemd-resolved to load configuration about DNS. + service service_name do + action :restart + ignore_failure true + end + end end diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/init/init_dns.rb b/cookbooks/aws-parallelcluster-slurm/recipes/init/init_dns.rb index 4eaad5e25..3a5cdede7 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/init/init_dns.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/init/init_dns.rb @@ -90,4 +90,6 @@ path "/etc/hosts" line(lazy { "#{get_primary_ip} #{node['cluster']['assigned_hostname'].chomp('.')} #{node['cluster']['assigned_short_hostname']}" }) notifies :reload, "ohai[reload_hostname]" + retries 20 + retry_delay 5 end