From 7f2f9e5e0c6cd57c53f5a08882203bc4c1cfec50 Mon Sep 17 00:00:00 2001 From: Prateek Chaudhry Date: Tue, 17 Oct 2023 01:19:24 +0000 Subject: [PATCH] add al2keplergpu build recipe to build gpu amis for kepler arch --- Makefile | 4 +++ README.md | 5 ++- al2.pkr.hcl | 1 + al2keplergpu.pkr.hcl | 33 +++++++++++++++++ scripts/enable-ecs-agent-gpu-support.sh | 48 +++++++++++++++++-------- 5 files changed, 76 insertions(+), 15 deletions(-) create mode 100644 al2keplergpu.pkr.hcl diff --git a/Makefile b/Makefile index 6284adf..a43977a 100644 --- a/Makefile +++ b/Makefile @@ -57,6 +57,10 @@ al2arm: check-region init validate release.auto.pkrvars.hcl al2gpu: check-region init validate release.auto.pkrvars.hcl ./packer build -only="amazon-ebs.al2gpu" -var "region=${REGION}" . +.PHONY: al2keplergpu +al2keplergpu: check-region init validate release.auto.pkrvars.hcl + ./packer build -only="amazon-ebs.al2keplergpu" -var "region=${REGION}" . + .PHONY: al2inf al2inf: check-region init validate release.auto.pkrvars.hcl ./packer build -only="amazon-ebs.al2inf" -var "region=${REGION}" . diff --git a/README.md b/README.md index 61cdcf2..2cbf2a1 100644 --- a/README.md +++ b/README.md @@ -6,12 +6,15 @@ It will create a private AMI in whatever account you are running it in. ## Instructions 1. Setup AWS cli credentials. -2. Make the recipe that you want, REGION must be specified. Options are: al1, al2, al2arm, al2gpu, al2inf, +2. Make the recipe that you want, REGION must be specified. Options are: al1, al2, al2arm, al2gpu, al2keplergpu, al2inf, al2kernel5dot10, al2kernel5dot10arm, al2023, al2023arm, al2023neu. ``` REGION=us-west-2 make al2 ``` +**NOTE**: `al2keplergpu` is a build recipe that this package supports to build ECS-Optimized GPU AMIs for instances with GPUs +with Kepler architecture (such as P2 type instances). ECS-Optimized GPU AMIs for this target are not officially built and published. + ## Configuration This recipe allows for configuration of your AMI. All configuration variables are defined and documented diff --git a/al2.pkr.hcl b/al2.pkr.hcl index 5c47e8b..225661d 100644 --- a/al2.pkr.hcl +++ b/al2.pkr.hcl @@ -37,6 +37,7 @@ build { "source.amazon-ebs.al2", "source.amazon-ebs.al2arm", "source.amazon-ebs.al2gpu", + "source.amazon-ebs.al2keplergpu", "source.amazon-ebs.al2inf", "source.amazon-ebs.al2kernel5dot10", "source.amazon-ebs.al2kernel5dot10arm" diff --git a/al2keplergpu.pkr.hcl b/al2keplergpu.pkr.hcl new file mode 100644 index 0000000..84dcdc8 --- /dev/null +++ b/al2keplergpu.pkr.hcl @@ -0,0 +1,33 @@ +locals { + ami_name_al2keplergpu = "${var.ami_name_prefix_al2}-kepler-gpu-hvm-2.0.${var.ami_version}-x86_64-ebs" +} + +source "amazon-ebs" "al2keplergpu" { + ami_name = "${local.ami_name_al2keplergpu}" + ami_description = "Amazon Linux AMI 2.0.${var.ami_version} x86_64 ECS HVM GP2" + instance_type = var.gpu_instance_types[0] + launch_block_device_mappings { + volume_size = var.block_device_size_gb + delete_on_termination = true + volume_type = "gp2" + device_name = "/dev/xvda" + } + region = var.region + source_ami_filter { + filters = { + name = "${var.source_ami_al2}" + } + owners = ["amazon"] + most_recent = true + } + ssh_interface = "public_ip" + ssh_username = "ec2-user" + tags = { + os_version = "Amazon Linux 2" + source_image_name = "{{ .SourceAMIName }}" + ecs_runtime_version = "Docker version ${var.docker_version}" + ecs_agent_version = "${var.ecs_agent_version}" + ami_type = "al2keplergpu" + ami_version = "2.0.${var.ami_version}" + } +} diff --git a/scripts/enable-ecs-agent-gpu-support.sh b/scripts/enable-ecs-agent-gpu-support.sh index df274c8..b1c622f 100644 --- a/scripts/enable-ecs-agent-gpu-support.sh +++ b/scripts/enable-ecs-agent-gpu-support.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -ex -if [[ $AMI_TYPE != "al2gpu" ]]; then +if [[ $AMI_TYPE != "al2gpu" && $AMI_TYPE != "al2keplergpu" ]]; then exit 0 fi @@ -28,21 +28,41 @@ sudo mv $tmpfile /etc/yum.repos.d/amzn2-nvidia-tmp.repo sudo yum install -y system-release-nvidia sudo rm /etc/yum.repos.d/amzn2-nvidia-tmp.repo -sudo yum install -y kernel-devel-$(uname -r) \ - system-release-nvidia \ - nvidia-driver-latest-dkms \ - nvidia-fabric-manager \ - pciutils \ - xorg-x11-server-Xorg \ - docker-runtime-nvidia \ - oci-add-hooks \ - libnvidia-container \ - libnvidia-container-tools \ - nvidia-container-runtime-hook +# for building AMIs for GPUs with Kepler architecture, fix package versions +# also exclude nvidia and cuda packages to update. Newer Nvidia drivers do not support Kepler architecture +# TODO: The package versions are fixed for Kepler. They have to be manually updated when there is a minor version update in AL repo. +if [[ $AMI_TYPE == "al2keplergpu" ]]; then + sudo yum install -y kernel-devel-$(uname -r) \ + system-release-nvidia \ + nvidia-driver-latest-dkms-470.182.03 \ + nvidia-fabric-manager-470.182.03-1 \ + pciutils-3.5.1-2.amzn2 \ + xorg-x11-server-Xorg \ + docker-runtime-nvidia-1 \ + oci-add-hooks \ + libnvidia-container-1.4.0 \ + libnvidia-container-tools-1.4.0 \ + nvidia-container-runtime-hook-1.4.0 -sudo yum install -y cuda-drivers \ - cuda + sudo yum install -y cuda-toolkit-11-4 + echo "exclude=*nvidia* *cuda*" | sudo tee -a /etc/yum.conf +else + # Default GPU AMI + sudo yum install -y kernel-devel-$(uname -r) \ + system-release-nvidia \ + nvidia-driver-latest-dkms \ + nvidia-fabric-manager \ + pciutils \ + xorg-x11-server-Xorg \ + docker-runtime-nvidia \ + oci-add-hooks \ + libnvidia-container \ + libnvidia-container-tools \ + nvidia-container-runtime-hook + sudo yum install -y cuda-drivers \ + cuda +fi # The Fabric Manager service needs to be started and enabled on EC2 P4d instances # in order to configure NVLinks and NVSwitches sudo systemctl enable nvidia-fabricmanager