Skip to content

Commit

Permalink
Merge pull request #297 from vishnoianil/fix-ansible-sdg
Browse files Browse the repository at this point in the history
SDG needs certs, tls-insecure is only required for precheck endpoint
  • Loading branch information
nerdalert authored Apr 23, 2024
2 parents 9e3fde5 + 859f56c commit 9f97446
Show file tree
Hide file tree
Showing 9 changed files with 46 additions and 34 deletions.
10 changes: 9 additions & 1 deletion deploy/ansible/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,18 @@ This playbook installs all the components in the containers.
ansible-playbook -i inventory.txt -e @secrets.enc --ask-vault-pass deploy-bot-stack.yml
```

## Install or reconfigure the Worker stack
## Install the Worker stack (first time installation)

This playbook installs all the required component on the host itself

```console
ansible-playbook -i inventory.txt -e @secrets.enc --ask-vault-pass deploy-worker-stack.yml
```

## Reconfigure the existing Worker stack

To reconfigure the existing running worker stack, run the following command. This will reconfigure the worker stack with the latest changes and skip all the steps that only required for the first time installation.

```console
ansible-playbook -i inventory.txt -e @secrets.enc --ask-vault-pass deploy-worker-stack.yml --extra-vars "reconfigure_worker=true"
```
2 changes: 1 addition & 1 deletion deploy/ansible/deploy-bot-prereqs.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
- name: Deploy Node Prereqs
hosts: labNodes
hosts: botNode
roles:
- role: geerlingguy.docker
become: true
Expand Down
2 changes: 1 addition & 1 deletion deploy/ansible/deploy-gobot.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
- name: Deploy Lab Bot (Go version)
hosts: labNodes
hosts: botNode
roles:
- role: gobot
2 changes: 1 addition & 1 deletion deploy/ansible/deploy-grafana.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
- name: Deploy Redis
hosts: labNodes
hosts: botNode
roles:
- role: grafana
become: true
2 changes: 1 addition & 1 deletion deploy/ansible/deploy-nexodus.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
- name: Deploy Nexodus Agent
hosts: labNodes
hosts: labNodes:botNode
roles:
- role: nexodus
become: true
2 changes: 1 addition & 1 deletion deploy/ansible/deploy-redis-docker.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
- name: Deploy Redis in container
hosts: labNodes
hosts: botNode
roles:
- role: redis-docker
become: true
2 changes: 1 addition & 1 deletion deploy/ansible/deploy-redis.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
- name: Deploy Redis
hosts: labNodes
hosts: botNode
roles:
- role: redis
become: true
25 changes: 13 additions & 12 deletions deploy/ansible/worker/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
ansible.builtin.git:
repo: 'https://{{ github_token }}@github.com/instructlab/instructlab-bot'
dest: "~/instructlab-bot"
version: "main"

- name: Set default args for install-worker
ansible.builtin.set_fact:
Expand Down Expand Up @@ -51,13 +52,6 @@
precheck_endpoint_url != None and
precheck_endpoint_url | trim != ''

- name: Set args for sdg_endpoint_url
ansible.builtin.set_fact:
worker_args: "{{ worker_args }} --sdg-endpoint-url {{ sdg_endpoint_url }}"
when: sdg_endpoint_url is defined and
sdg_endpoint_url != None and
sdg_endpoint_url | trim != ''

- name: Set tls-insecure if enabled
ansible.builtin.set_fact:
worker_args: "{{ worker_args }} --tls-insecure \"true\""
Expand All @@ -70,15 +64,13 @@

- name: Set certs for sdg_endpoint_url if provided
ansible.builtin.set_fact:
worker_args: "{{ worker_args }} --tls-client-cert \"{{ tls_client_cert }}\" \
worker_args: "{{ worker_args }} --sdg-endpoint-url {{ sdg_endpoint_url }} \
--tls-client-cert \"{{ tls_client_cert }}\" \
--tls-client-key \"{{ tls_client_key }}\" \
--tls-server-ca-cert \"{{ tls_server_ca_cert }}\""
when: sdg_endpoint_url is defined and
sdg_endpoint_url != None and
sdg_endpoint_url | trim != '' and
tls_insecure is defined and
tls_insecure != "true" and
tls_insecure | trim != ''
sdg_endpoint_url | trim != ''

- name: Dump the args
ansible.builtin.debug:
Expand All @@ -105,17 +97,26 @@
- name: Reboot the system
ansible.builtin.reboot:
become: true
when: reconfigure_worker is not defined or
reconfigure_worker == "false"


- name: Wait for the system to come back
ansible.builtin.wait_for_connection:
delay: 60
timeout: 500
when: reconfigure_worker is not defined or
reconfigure_worker == "false"

- name: Run the install script again to setup worker and log output # noqa no-changed-when
ansible.builtin.shell: |
cd ~/instructlab-bot/scripts
nohup ./install-worker.sh install {{ worker_args }} > install-worker.log 2>&1 &
when: reconfigure_worker is not defined or
reconfigure_worker == "false"

- name: Pause for 10 minutes to allow the script to complete
ansible.builtin.pause:
minutes: 10
when: reconfigure_worker is not defined or
reconfigure_worker == "false"
33 changes: 18 additions & 15 deletions scripts/install-worker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ WORK_DIR=${WORK_DIR:-"${HOME}/instructlab-bot"}

PRECHECK_ENDPOINT_URL=${PRECHECK_ENDPOINT_URL:-"http://localhost:8000/v1"}
SDG_ENDPOINT_URL=${SDG_ENDPOINT_URL:-""}
TLS_INSECURE=${TLS_INSECURE:-"true"}
TLS_INSECURE=${TLS_INSECURE:-"false"}

TLS_CLIENT_KEY=${TLS_CLIENT_KEY:-""}
TLS_CLIENT_CERT=${TLS_CLIENT_CERT:-""}
Expand All @@ -25,6 +25,9 @@ TLS_SERVER_CA_CERT=${TLS_SERVER_CA_CERT:-""}
TLS_SECRETS_DIR=${TLS_SECRETS_DIR:-"${HOME}/instructlab-bot"}
TLS_SECRETS_EXISTS=0

readonly TLS_CLIENT_KEY_FILE="client-tls-key.pem2"
readonly TLS_CLIENT_CERT_FILE="client-tls-crt.pem2"
readonly TLS_SERVER_CA_CERT_FILE="server-ca-crt.pem2"
EXTRA_ARGS=""

# Export CUDA environment variables
Expand Down Expand Up @@ -188,12 +191,12 @@ install_nexodus() {
install_tls_secrets() {
if [ -n "${TLS_CLIENT_KEY}" ] && [ -n "${TLS_CLIENT_CERT}" ] && [ -n "${TLS_SERVER_CA_CERT}" ]; then
sudo mkdir -p "${TLS_SECRETS_DIR}"
sudo echo "${TLS_CLIENT_KEY}" | sudo install -m 0644 -D /dev/stdin "${TLS_SECRETS_DIR}/tls-client.key"
sudo echo "${TLS_CLIENT_CERT}" | sudo install -m 0644 -D /dev/stdin "${TLS_SECRETS_DIR}/tls-cert.key"
sudo echo "${TLS_SERVER_CA_CERT}" | sudo install -m 0644 -D /dev/stdin "${TLS_SECRETS_DIR}/tls-server-ca.crt"
sudo echo "${TLS_CLIENT_KEY}" | sudo install -m 0644 -D /dev/stdin "${TLS_SECRETS_DIR}/$TLS_CLIENT_KEY_FILE"
sudo echo "${TLS_CLIENT_CERT}" | sudo install -m 0644 -D /dev/stdin "${TLS_SECRETS_DIR}/$TLS_CLIENT_CERT_FILE"
sudo echo "${TLS_SERVER_CA_CERT}" | sudo install -m 0644 -D /dev/stdin "${TLS_SECRETS_DIR}/$TLS_SERVER_CA_CERT_FILE"
TLS_SECRETS_EXISTS=1
elif
[ -f "${TLS_SECRETS_DIR}/tls-client.key" ] && [ -f "${TLS_SECRETS_DIR}/tls-cert.key" ] && [ -f "${TLS_SECRETS_DIR}/tls-server-ca.crt" ]; then
[ -f "${TLS_SECRETS_DIR}/$TLS_CLIENT_KEY_FILE" ] && [ -f "${TLS_SECRETS_DIR}/$TLS_CLIENT_CERT_FILE" ] && [ -f "${TLS_SECRETS_DIR}/$TLS_SERVER_CA_CERT_FILE" ]; then
echo "TLS secrets already exist in ${TLS_SECRETS_DIR}"
TLS_SECRETS_EXISTS=1
fi
Expand Down Expand Up @@ -234,14 +237,14 @@ EOF

install_lab() {
cd "${WORK_DIR}" || (echo "Failed to change to work directory: ${WORK_DIR}" && exit 1)
if ! command_exists "ilab"; then
sudo pip install "git+https://instructlab-bot:${GITHUB_TOKEN}@github.com/instructlab/instructlab.git@stable"
if [ "${GPU_TYPE}" = "cuda" ]; then
CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install --force-reinstall --no-cache-dir llama-cpp-python
elif [ -n "${GPU_TYPE}" ]; then
echo "Unsupported GPU_TYPE: ${GPU_TYPE}"
exit 1
fi
# Always attempt to install instructlab to make sure bot is running the latest version of stable branch.

sudo pip install --upgrade --force-reinstall "git+https://instructlab-bot:${GITHUB_TOKEN}@github.com/instructlab/instructlab.git@stable"
if [ "${GPU_TYPE}" = "cuda" ]; then
CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install --force-reinstall --no-cache-dir llama-cpp-python
elif [ -n "${GPU_TYPE}" ]; then
echo "Unsupported GPU_TYPE: ${GPU_TYPE}"
exit 1
fi
if [ ! -f config.yaml ]; then
ilab init --non-interactive
Expand Down Expand Up @@ -276,7 +279,7 @@ EOF
EXTRA_ARGS="${EXTRA_ARGS} --precheck-endpoint-url ${PRECHECK_ENDPOINT_URL}"
fi

# Check if SDG_ENDPOINT_URL is set and TLS_SECRETS_EXISTS is true
# Check if SDG_ENDPOINT_URL is set
if [ -n "${SDG_ENDPOINT_URL}" ]; then
EXTRA_ARGS="${EXTRA_ARGS} --sdg-endpoint-url ${SDG_ENDPOINT_URL} "
fi
Expand All @@ -288,7 +291,7 @@ EOF

# Check if tls cert and key are set
if [ "${TLS_SECRETS_EXISTS}" -eq 1 ]; then
EXTRA_ARGS="${EXTRA_ARGS} --tls-client-key ${TLS_SECRETS_DIR}/tls-client.key --tls-client-cert ${TLS_SECRETS_DIR}/tls-cert.key --tls-server-ca-cert ${TLS_SECRETS_DIR}/tls-server-ca.crt"
EXTRA_ARGS="${EXTRA_ARGS} --tls-client-key ${TLS_SECRETS_DIR}/$TLS_CLIENT_KEY_FILE --tls-client-cert ${TLS_SECRETS_DIR}/$TLS_CLIENT_CERT_FILE --tls-server-ca-cert ${TLS_SECRETS_DIR}/$TLS_SERVER_CA_CERT_FILE"
fi

cat << EOF > labbotworker.service
Expand Down

0 comments on commit 9f97446

Please sign in to comment.