From d3b60eae5b9fdc691e59c782c8b87947f22a8065 Mon Sep 17 00:00:00 2001 From: Hanwen Date: Tue, 25 Jun 2024 06:40:54 -0700 Subject: [PATCH] Configure multi-nic Signed-off-by: Hanwen --- .../configure_nw_interface.sh | 62 +++++++++++++++++++ .../recipes/config/network_interfaces.rb | 5 +- .../network_service_alinux2023.rb | 18 +++++- .../recipes/init/init_dns.rb | 2 + 4 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh diff --git a/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh b/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh new file mode 100644 index 000000000..a53596877 --- /dev/null +++ b/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh @@ -0,0 +1,62 @@ +#!/bin/sh + +set -ex + +if + [ -z "${DEVICE_NAME}" ] || # name of the device + [ -z "${DEVICE_NUMBER}" ] || # number of the device + [ -z "${DEVICE_IP_ADDRESS}" ] || # ip of the device + [ -z "${MAC}" ] || # mac address of the device + [ -z "${CIDR_BLOCK}" ] # CIDR block of the subnet +then + echo 'One or more environment variables missing' + exit 1 +fi +echo "Device name: ${DEVICE_NAME}, Device number: ${DEVICE_NUMBER}" + +configuration_directory="/etc/systemd/network" +file_name="70-${DEVICE_NAME}.network" +sub_directory="${configuration_directory}/${file_name}.d" +if [ ! -d "$sub_directory" ]; then + mkdir -p "$sub_directory"; +fi + +cd "$configuration_directory" + +ROUTE_TABLE=200${DEVICE_NUMBER} + +ln -s /usr/lib/systemd/network/80-ec2.network ${file_name} + +if ! grep "RoutingPolicyRule" $files_list; then +/bin/cat < ${sub_directory}/eni.conf +# Configuration for ${DEVICE_NUMBER} generated by ParallelCluster +[Match] +MACAddress=${MAC} +[Network] +DHCP=yes + +[DHCPv4] +RouteMetric=$ROUTE_TABLE +UseRoutes=true +UseGateway=true + +[IPv6AcceptRA] +RouteMetric=$ROUTE_TABLE +UseGateway=true + +[Route] +Table=$ROUTE_TABLE +Gateway=_ipv6ra + +[Route] +Gateway=_dhcp4 +Table=$ROUTE_TABLE +[Route] +Table=$ROUTE_TABLE +Destination=$CIDR_BLOCK +[RoutingPolicyRule] +From=${DEVICE_IP_ADDRESS} +Priority=$ROUTE_TABLE +Table=$ROUTE_TABLE +EOF +fi \ No newline at end of file diff --git a/cookbooks/aws-parallelcluster-environment/recipes/config/network_interfaces.rb b/cookbooks/aws-parallelcluster-environment/recipes/config/network_interfaces.rb index 7293a606d..fa308c3f5 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/config/network_interfaces.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/config/network_interfaces.rb @@ -13,7 +13,7 @@ # limitations under the License. # amazon-ec2-net-utils is pre-installed in AL2023 and handles multi-nics instances properly -return if on_docker? || platform?('amazon') && node['platform_version'].to_i == 2023 +return if on_docker? def network_card_index(mac, token) uri = URI("http://169.254.169.254/latest/meta-data/network/interfaces/macs/#{mac}/network-card") @@ -88,7 +88,8 @@ def cidr_to_netmask(cidr) 'DEVICE_IP_ADDRESS' => device_ip_address, 'CIDR_PREFIX_LENGTH' => cidr_prefix_length, 'NETMASK' => netmask, - 'CIDR_BLOCK' => cidr_block + 'CIDR_BLOCK' => cidr_block, + 'MAC' => mac ) command 'sh /tmp/configure_nw_interface.sh' diff --git a/cookbooks/aws-parallelcluster-environment/resources/network_service/network_service_alinux2023.rb b/cookbooks/aws-parallelcluster-environment/resources/network_service/network_service_alinux2023.rb index 45cb06a60..c15d78b0e 100644 --- a/cookbooks/aws-parallelcluster-environment/resources/network_service/network_service_alinux2023.rb +++ b/cookbooks/aws-parallelcluster-environment/resources/network_service/network_service_alinux2023.rb @@ -17,7 +17,23 @@ end use 'partial/_network_service' +use 'partial/_network_service_redhat_based' def network_service_name - 'systemd-resolved' + 'systemd-networkd' +end + +action :restart do + log "Restarting 'systemd-networkd systemd-resolved' service, platform #{node['platform']} '#{node['platform_version']}'" + + execute "Reload system configuration" do + command "systemctl daemon-reload" + end + + %w(systemd-networkd systemd-resolved).each do |service_name| + service service_name do + action :restart + ignore_failure true + end + end end diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/init/init_dns.rb b/cookbooks/aws-parallelcluster-slurm/recipes/init/init_dns.rb index 4eaad5e25..3a5cdede7 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/init/init_dns.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/init/init_dns.rb @@ -90,4 +90,6 @@ path "/etc/hosts" line(lazy { "#{get_primary_ip} #{node['cluster']['assigned_hostname'].chomp('.')} #{node['cluster']['assigned_short_hostname']}" }) notifies :reload, "ohai[reload_hostname]" + retries 20 + retry_delay 5 end