diff --git a/Makefile b/Makefile index 59a60ba..29f63a7 100644 --- a/Makefile +++ b/Makefile @@ -93,6 +93,10 @@ al2kernel5dot10inf: check-region init validate release-al2.auto.pkrvars.hcl al2023: check-region init validate release-al2023.auto.pkrvars.hcl ./packer build -only="amazon-ebs.al2023" -var "region=${REGION}" . +.PHONY: al2023gpu +al2023gpu: check-region init validate release-al2023.auto.pkrvars.hcl + ./packer build -only="amazon-ebs.al2023gpu" -var "region=${REGION}" . + .PHONY: al2023arm al2023arm: check-region init validate release-al2023.auto.pkrvars.hcl ./packer build -only="amazon-ebs.al2023arm" -var "region=${REGION}" . diff --git a/al2023.pkr.hcl b/al2023.pkr.hcl index b4041de..8ad2967 100644 --- a/al2023.pkr.hcl +++ b/al2023.pkr.hcl @@ -42,6 +42,7 @@ source "amazon-ebs" "al2023" { build { sources = [ "source.amazon-ebs.al2023", + "source.amazon-ebs.al2023gpu", "source.amazon-ebs.al2023arm", "source.amazon-ebs.al2023neu" ] @@ -172,6 +173,13 @@ build { script = "scripts/enable-ecs-agent-inferentia-support.sh" } + provisioner "shell" { + environment_vars = [ + "AMI_TYPE=${source.name}" + ] + script = "scripts/enable-ecs-agent-gpu-support.sh" + } + provisioner "shell" { inline_shebang = "/bin/sh -ex" inline = [ diff --git a/al2023gpu.pkr.hcl b/al2023gpu.pkr.hcl new file mode 100644 index 0000000..64c5f52 --- /dev/null +++ b/al2023gpu.pkr.hcl @@ -0,0 +1,40 @@ +locals { + ami_name_al2023gpu = "${var.ami_name_prefix_al2023}-gpu-hvm-2023.0.${var.ami_version_al2023}${var.kernel_version_al2023}-x86_64" + default_tags = { + os_version = "Amazon Linux 2023" + source_image_name = "{{ .SourceAMIName }}" + ecs_runtime_version = "Docker version ${var.docker_version_al2023}" + ecs_agent_version = "${var.ecs_agent_version}" + ami_type = "al2023gpu" + ami_version = "2023.0.${var.ami_version_al2023}" + } + merged_tags = merge("${local.default_tags}", "${var.tags}") +} + +source "amazon-ebs" "al2023gpu" { + ami_name = "${local.ami_name_al2023gpu}" + ami_description = "Amazon Linux AMI 2023.0.${var.ami_version_al2023} x86_64 ECS GPU HVM EBS" + instance_type = var.gpu_instance_types[0] + launch_block_device_mappings { + volume_size = var.block_device_size_gb + delete_on_termination = true + volume_type = "gp3" + device_name = "/dev/xvda" + } + region = var.region + source_ami_filter { + filters = { + name = "${var.source_ami_al2023}" + } + owners = ["amazon"] + most_recent = true + include_deprecated = true + } + ami_ou_arns = "${var.ami_ou_arns}" + ami_org_arns = "${var.ami_org_arns}" + ami_users = "${var.ami_users}" + ssh_interface = "public_ip" + ssh_username = "ec2-user" + tags = "${local.merged_tags}" + run_tags = "${var.run_tags}" +} diff --git a/scripts/enable-ecs-agent-gpu-support.sh b/scripts/enable-ecs-agent-gpu-support.sh index 9677eb8..0330f65 100644 --- a/scripts/enable-ecs-agent-gpu-support.sh +++ b/scripts/enable-ecs-agent-gpu-support.sh @@ -15,14 +15,34 @@ if [[ $AMI_TYPE != "al2"*"gpu" ]]; then exit 0 fi -# set up amzn2-nvidia repo -GPG_CHECK=1 -# don't do the gpg check in air-gapped regions -if [ -n "$AIR_GAPPED" ]; then - GPG_CHECK=0 -fi -tmpfile=$(mktemp) -cat >$tmpfile <$tmpfile <<"EOF" +[nvidia-container-toolkit] +name=nvidia-container-toolkit +baseurl=https://nvidia.github.io/libnvidia-container/stable/rpm/$basearch +repo_gpgcheck=1 +gpgcheck=0 +enabled=1 +gpgkey=https://nvidia.github.io/libnvidia-container/gpgkey +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt +EOF + sudo mv $tmpfile "/etc/yum.repos.d/nvidia-container-toolkit.repo" + + # https://github.com/aws/amazon-ecs-ami/issues/319#issuecomment-2471834667 + sudo dnf install -y nvidia-release + sudo dnf clean all +else + # set up amzn2-nvidia repo + GPG_CHECK=1 + # don't do the gpg check in air-gapped regions + if [ -n "$AIR_GAPPED" ]; then + GPG_CHECK=0 + fi + tmpfile=$(mktemp) + cat >$tmpfile <