update

Signed-off-by: Charlie Drage <[email protected]>
cdrage · Nov 23, 2024 · edaf54c · edaf54c
1 parent f3d94cb
commit edaf54c
Show file tree

Hide file tree

Showing 2 changed files with 55 additions and 0 deletions.
diff --git a/instructlab-nvidia/Containerfile b/instructlab-nvidia/Containerfile
@@ -0,0 +1,22 @@
+# **Description:**
+
+FROM quay.io/rh-aiservices-bu/instructlab-workbench-code-server-cuda:0.21.0
+
+#! TODO: Completely remove /usr/local/cuda/compat from $LD_LIBRARY_PATH, export it then remove /usr/local/cuda/compat
+
+#! Required arguments
+ARG HUGGINGFACE_API
+
+#! Initialize the "ilab" configuration so it does not complain
+RUN ilab config init --non-interactive
+
+#! Download models to the default directory so we can bundle them in the image when training
+RUN ilab model download --hf-token $HUGGINGFACE_API
+
+RUN ilab model download -rp instructlab/granite-7b-lab
+
+#! Add the script
+ADD main.sh /main.sh
+
+#! Entrypoint
+ENTRYPOINT ["/main.sh"]
diff --git a/instructlab-nvidia/main.sh b/instructlab-nvidia/main.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# Completely remove /usr/local/cuda/compat from $LD_LIBRARY_PATH, export it then remove /usr/local/cuda/compat
+# Why? Because we are running this in nvidia-container-toolkit, the drivers can get confused if they see the compat directory
+# since it'll be different versions sometimes vs the host.
+export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | sed 's|/usr/local/cuda/compat:||g')
+
+# Make directories
+mkdir -p /output/generated_data
+mkdir -p /output/trained_model
+
+# Git clone from ARG in container to /workspace folder
+git clone $GIT_REPO workspace
+
+# Remove any previous configurations
+rm -rf ~/.config/instructlab
+
+# Copy the config file over / overriding the current one
+ilab config init --config workspace/config.yaml --taxonomy-path workspace --non-interactive
+
+# Run the synthetic data generation command
+ilab data generate --taxonomy-base empty --output-dir /output/generated_data
+
+# Find what file starts with train_* and use that
+TRAIN_FILE=$(find /output/generated_data -type f -name "train_*" | head -n 1)
+
+# Train
+# TODO: Specify the ACTUAL model you want to train with...
+# training can be done on any model I believe.
+ilab train --data-path /output/generated_data --data-output-dir $(echo $TRAIN_FILE) --model-path ~/.cache/instructlab/models/instructlab/granite-7b-lab --device cuda
+
+# Convert to GGUF?
+ilab model convert --model-dir /output/trained_model