Skip to content

Commit

Permalink
add al2keplergpu build recipe to build gpu amis for kepler arch
Browse files Browse the repository at this point in the history
  • Loading branch information
prateekchaudhry committed Oct 18, 2023
1 parent f8e9d67 commit ee97bc4
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 15 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ al2arm: check-region init validate release.auto.pkrvars.hcl
al2gpu: check-region init validate release.auto.pkrvars.hcl
./packer build -only="amazon-ebs.al2gpu" -var "region=${REGION}" .

.PHONY: al2keplergpu
al2keplergpu: check-region init validate release.auto.pkrvars.hcl
./packer build -only="amazon-ebs.al2keplergpu" -var "region=${REGION}" .

.PHONY: al2inf
al2inf: check-region init validate release.auto.pkrvars.hcl
./packer build -only="amazon-ebs.al2inf" -var "region=${REGION}" .
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ It will create a private AMI in whatever account you are running it in.
## Instructions

1. Setup AWS cli credentials.
2. Make the recipe that you want, REGION must be specified. Options are: al1, al2, al2arm, al2gpu, al2inf,
2. Make the recipe that you want, REGION must be specified. Options are: al1, al2, al2arm, al2gpu, al2keplergpu, al2inf,
al2kernel5dot10, al2kernel5dot10arm, al2023, al2023arm, al2023neu.
```
REGION=us-west-2 make al2
```

**NOTE**: `al2keplergpu` is a build recipe that this package supports to build ECS-Optimized GPU AMIs for instances with GPUs
with Kepler architecture (such as P2 type instances). ECS-Optimized GPU AMIs for this target are not officially built and published.

## Configuration

This recipe allows for configuration of your AMI. All configuration variables are defined and documented
Expand Down
1 change: 1 addition & 0 deletions al2.pkr.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ build {
"source.amazon-ebs.al2",
"source.amazon-ebs.al2arm",
"source.amazon-ebs.al2gpu",
"source.amazon-ebs.al2keplergpu",
"source.amazon-ebs.al2inf",
"source.amazon-ebs.al2kernel5dot10",
"source.amazon-ebs.al2kernel5dot10arm"
Expand Down
33 changes: 33 additions & 0 deletions al2keplergpu.pkr.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
locals {
ami_name_al2keplergpu = "${var.ami_name_prefix_al2}-kepler-gpu-hvm-2.0.${var.ami_version}-x86_64-ebs"
}

source "amazon-ebs" "al2keplergpu" {
ami_name = "${local.ami_name_al2keplergpu}"
ami_description = "Amazon Linux AMI 2.0.${var.ami_version} x86_64 ECS HVM GP2"
instance_type = var.gpu_instance_types[0]
launch_block_device_mappings {
volume_size = var.block_device_size_gb
delete_on_termination = true
volume_type = "gp2"
device_name = "/dev/xvda"
}
region = var.region
source_ami_filter {
filters = {
name = "${var.source_ami_al2}"
}
owners = ["amazon"]
most_recent = true
}
ssh_interface = "public_ip"
ssh_username = "ec2-user"
tags = {
os_version = "Amazon Linux 2"
source_image_name = "{{ .SourceAMIName }}"
ecs_runtime_version = "Docker version ${var.docker_version}"
ecs_agent_version = "${var.ecs_agent_version}"
ami_type = "al2keplergpu"
ami_version = "2.0.${var.ami_version}"
}
}
48 changes: 34 additions & 14 deletions scripts/enable-ecs-agent-gpu-support.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
set -ex

if [[ $AMI_TYPE != "al2gpu" ]]; then
if [[ $AMI_TYPE != "al2gpu" && $AMI_TYPE != "al2keplergpu" ]]; then
exit 0
fi

Expand All @@ -28,21 +28,41 @@ sudo mv $tmpfile /etc/yum.repos.d/amzn2-nvidia-tmp.repo
sudo yum install -y system-release-nvidia
sudo rm /etc/yum.repos.d/amzn2-nvidia-tmp.repo

sudo yum install -y kernel-devel-$(uname -r) \
system-release-nvidia \
nvidia-driver-latest-dkms \
nvidia-fabric-manager \
pciutils \
xorg-x11-server-Xorg \
docker-runtime-nvidia \
oci-add-hooks \
libnvidia-container \
libnvidia-container-tools \
nvidia-container-runtime-hook
# for building AMIs for GPUs with Kepler architecture, fix package versions
# also exclude nvidia and cuda packages to update. Newer Nvidia drivers do not support Kepler architecture
# TODO: The package versions are fixed for Kepler. They have to be manually updated when there is a minor version update in AL repo.
if [[ $AMI_TYPE == "al2keplergpu" ]]; then
sudo yum install -y kernel-devel-$(uname -r) \
system-release-nvidia \
nvidia-driver-latest-dkms-470.182.03 \
nvidia-fabric-manager-470.182.03-1 \
pciutils-3.5.1-2.amzn2 \
xorg-x11-server-Xorg \
docker-runtime-nvidia-1 \
oci-add-hooks \
libnvidia-container-1.4.0 \
libnvidia-container-tools-1.4.0 \
nvidia-container-runtime-hook-1.4.0

sudo yum install -y cuda-drivers \
cuda
sudo yum install -y cuda-toolkit-11-4
echo "exclude=*nvidia* *cuda*" | sudo tee -a /etc/yum.conf
else
# Default GPU AMI
sudo yum install -y kernel-devel-$(uname -r) \
system-release-nvidia \
nvidia-driver-latest-dkms \
nvidia-fabric-manager \
pciutils \
xorg-x11-server-Xorg \
docker-runtime-nvidia \
oci-add-hooks \
libnvidia-container \
libnvidia-container-tools \
nvidia-container-runtime-hook

sudo yum install -y cuda-drivers \
cuda
fi
# The Fabric Manager service needs to be started and enabled on EC2 P4d instances
# in order to configure NVLinks and NVSwitches
sudo systemctl enable nvidia-fabricmanager
Expand Down

0 comments on commit ee97bc4

Please sign in to comment.