Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
Signed-off-by: Charlie Drage <[email protected]>
  • Loading branch information
cdrage committed Nov 23, 2024
1 parent f3d94cb commit edaf54c
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 0 deletions.
22 changes: 22 additions & 0 deletions instructlab-nvidia/Containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# **Description:**

FROM quay.io/rh-aiservices-bu/instructlab-workbench-code-server-cuda:0.21.0

#! TODO: Completely remove /usr/local/cuda/compat from $LD_LIBRARY_PATH, export it then remove /usr/local/cuda/compat

#! Required arguments
ARG HUGGINGFACE_API

#! Initialize the "ilab" configuration so it does not complain
RUN ilab config init --non-interactive

#! Download models to the default directory so we can bundle them in the image when training
RUN ilab model download --hf-token $HUGGINGFACE_API

RUN ilab model download -rp instructlab/granite-7b-lab

#! Add the script
ADD main.sh /main.sh

#! Entrypoint
ENTRYPOINT ["/main.sh"]
33 changes: 33 additions & 0 deletions instructlab-nvidia/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash

# Completely remove /usr/local/cuda/compat from $LD_LIBRARY_PATH, export it then remove /usr/local/cuda/compat
# Why? Because we are running this in nvidia-container-toolkit, the drivers can get confused if they see the compat directory
# since it'll be different versions sometimes vs the host.
export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | sed 's|/usr/local/cuda/compat:||g')

# Make directories
mkdir -p /output/generated_data
mkdir -p /output/trained_model

# Git clone from ARG in container to /workspace folder
git clone $GIT_REPO workspace

# Remove any previous configurations
rm -rf ~/.config/instructlab

# Copy the config file over / overriding the current one
ilab config init --config workspace/config.yaml --taxonomy-path workspace --non-interactive

# Run the synthetic data generation command
ilab data generate --taxonomy-base empty --output-dir /output/generated_data

# Find what file starts with train_* and use that
TRAIN_FILE=$(find /output/generated_data -type f -name "train_*" | head -n 1)

# Train
# TODO: Specify the ACTUAL model you want to train with...
# training can be done on any model I believe.
ilab train --data-path /output/generated_data --data-output-dir $(echo $TRAIN_FILE) --model-path ~/.cache/instructlab/models/instructlab/granite-7b-lab --device cuda

# Convert to GGUF?
ilab model convert --model-dir /output/trained_model

0 comments on commit edaf54c

Please sign in to comment.