From f2bf47e4ae32411d8f4e4bc63b6aca73d685b2bd Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 4 Feb 2025 17:08:23 -0500 Subject: [PATCH 01/19] Move user setup script inside src for installation with package --- {scripts => src/htr2hpc/train}/user_setup.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {scripts => src/htr2hpc/train}/user_setup.sh (100%) diff --git a/scripts/user_setup.sh b/src/htr2hpc/train/user_setup.sh similarity index 100% rename from scripts/user_setup.sh rename to src/htr2hpc/train/user_setup.sh From e615c00979f125f3c6e567422317ec34844a7d80 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 4 Feb 2025 17:09:38 -0500 Subject: [PATCH 02/19] Tweak language and conda module version --- src/htr2hpc/train/user_setup.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/htr2hpc/train/user_setup.sh b/src/htr2hpc/train/user_setup.sh index c7ce97c..0de0589 100755 --- a/src/htr2hpc/train/user_setup.sh +++ b/src/htr2hpc/train/user_setup.sh @@ -6,7 +6,7 @@ # - create htr2hpc working directory in scratch echo "Setting up your account for htr2hpc ...." -echo "This process may take at least five minutes. Please do not exit until the process completes." +echo "This process may take five minutes or more on first run. Do not exit until the process completes." # ensure ssh directory exists if [ ! -d "$HOME/.ssh" ]; then @@ -25,7 +25,7 @@ fi # create conda environment named htr2hpc conda_env_name=htr2hpc -module load anaconda3/2024.2 +module load anaconda3/2024.6 if { conda env list | grep $conda_env_name; } >/dev/null 2>&1; then echo "htr2hpc conda env already exists" else From bf56f9b9e8e518d613689c653b6e17f55482fcfa Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 4 Feb 2025 17:11:33 -0500 Subject: [PATCH 03/19] Uninstall and reinstall htr2hpc to make it easy to update --- src/htr2hpc/train/user_setup.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/htr2hpc/train/user_setup.sh b/src/htr2hpc/train/user_setup.sh index 0de0589..5516a60 100755 --- a/src/htr2hpc/train/user_setup.sh +++ b/src/htr2hpc/train/user_setup.sh @@ -34,6 +34,8 @@ else conda env create -f environment_cuda.yml -n $conda_env_name conda activate $conda_env_name pip install -q torchvision torch==2.1 torchaudio==2.1 + # ensure we have the most recent of htr2hpc version installed + pip uninstall -q htr2hpc pip install -q git+https://github.com/Princeton-CDH/htr2hpc.git@develop#egg=htr2hpc # go back to previous directory cd - From a9fd9ef8704b6f62c1a42f7e7ca5a39acc5a9ef8 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 4 Feb 2025 18:03:00 -0500 Subject: [PATCH 04/19] Add options to skip ssh setup and reinstall htr2hpc --- src/htr2hpc/train/user_setup.sh | 59 ++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/src/htr2hpc/train/user_setup.sh b/src/htr2hpc/train/user_setup.sh index 5516a60..c2a3973 100755 --- a/src/htr2hpc/train/user_setup.sh +++ b/src/htr2hpc/train/user_setup.sh @@ -1,26 +1,45 @@ -#!/usr/bin/bash +#!/bin/bash # bash script to setup user account for htr2hpc pilot integration # - adds ssh key to authorized keys # - conda env setup # - create htr2hpc working directory in scratch +# defaults +ssh_setup=true +reinstall_htr2hpc=false + +# supported options: +# --skip-ssh-setup +# --reinstall-htr2hpc +for arg in "$@"; do + if [[ "$arg" == "--skip-ssh-setup" ]]; then + ssh_setup=false + elif [[ "$arg" == "--reinstall-htr2hpc" ]]; then + reinstall_htr2hpc=true + fi +done + echo "Setting up your account for htr2hpc ...." echo "This process may take five minutes or more on first run. Do not exit until the process completes." -# ensure ssh directory exists -if [ ! -d "$HOME/.ssh" ]; then - echo "Creating $HOME/.ssh directory" - mkdir ~/.ssh -fi -# add test-htr public key to authorized keys if not already present -ssh_key='ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJzoR8jstrofzFKVoiXSFP5jGw/WbXHxFyIaS5b4vSWC test-htr.lib.princeton.edu' -if ! grep -q "$ssh_key" $HOME/.ssh/authorized_keys; then - echo "Adding htr2hpc ssh key to authorized keys" - echo $ssh_key >> ~/.ssh/authorized_keys -else - echo "ssh key is already in authorized keys" +# skip ssh setup if --skip-ssh-setup is specified +if $ssh_setup; then + # ensure ssh directory exists + if [ ! -d "$HOME/.ssh" ]; then + echo "Creating $HOME/.ssh directory" + mkdir ~/.ssh + fi + + # add test-htr public key to authorized keys if not already present + ssh_key='ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJzoR8jstrofzFKVoiXSFP5jGw/WbXHxFyIaS5b4vSWC test-htr.lib.princeton.edu' + if ! grep -q "$ssh_key" $HOME/.ssh/authorized_keys; then + echo "Adding htr2hpc ssh key to authorized keys" + echo $ssh_key >> ~/.ssh/authorized_keys + else + echo "ssh key is already in authorized keys" + fi fi # create conda environment named htr2hpc @@ -28,17 +47,25 @@ conda_env_name=htr2hpc module load anaconda3/2024.6 if { conda env list | grep $conda_env_name; } >/dev/null 2>&1; then echo "htr2hpc conda env already exists" + + # when conda env already exists, if requested + # uninstall and reinstall htr2hpc + if $reinstall_htr2hpc; then + echo "Reinstalling htr2hpc" + conda activate $conda_env_name + pip uninstall -q --yes htr2hpc + pip install -q git+https://github.com/Princeton-CDH/htr2hpc.git@develop#egg=htr2hpc + fi + else echo "Creating conda environment and installing dependencies" cd /scratch/gpfs/rkoeser/htr2hpc_setup/kraken conda env create -f environment_cuda.yml -n $conda_env_name conda activate $conda_env_name pip install -q torchvision torch==2.1 torchaudio==2.1 - # ensure we have the most recent of htr2hpc version installed - pip uninstall -q htr2hpc pip install -q git+https://github.com/Princeton-CDH/htr2hpc.git@develop#egg=htr2hpc # go back to previous directory - cd - + cd - fi htrworkingdir=/scratch/gpfs/$USER/htr2hpc From c93a073ea00eaca03294f2337eccbcca66066ae2 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 4 Feb 2025 18:03:37 -0500 Subject: [PATCH 05/19] Add task and view to run hpc user setup script --- src/htr2hpc/tasks.py | 51 ++++++++++++++++++++++++ src/htr2hpc/templates/users/profile.html | 16 ++++++++ src/htr2hpc/urls.py | 3 ++ src/htr2hpc/views.py | 20 ++++++++++ 4 files changed, 90 insertions(+) create mode 100644 src/htr2hpc/templates/users/profile.html create mode 100644 src/htr2hpc/views.py diff --git a/src/htr2hpc/tasks.py b/src/htr2hpc/tasks.py index 7745d20..c6e1aef 100644 --- a/src/htr2hpc/tasks.py +++ b/src/htr2hpc/tasks.py @@ -455,3 +455,54 @@ def train( # - mark model as no longer being trained model.training = False model.save() + + +@shared_task(default_retry_delay=60 * 60) +def hpc_user_setup(user_pk=None): + try: + user = User.objects.get(pk=user_pk) + except User.DoesNotExist: + # error / bail out + logger.error(f"hpc_user_setup called with invalid user_pk {user_pk}") + return + + # bash setup script is included with this package + user_setup_script = settings.HTR2HPC_INSTALL_DIR / "train" / "user_setup.sh" + + try: + with Connection( + host=settings.HPC_HOSTNAME, + user=user.username, + connect_timeout=10, + connect_kwargs={"key_filename": settings.HPC_SSH_KEYFILE}, + ) as conn: + # copy setup script to server + conn.put(user_setup_script) + # run the script with options; skip ssh setup (must already be setup + # for this task to run) and ensure htr2hpc install is up to date + result = conn.run( + f"./{user_setup_script.name} --skip-ssh-setup --reinstall-htr2hpc" + ) + # remove the setup script from the server + conn.run(f"rm ./{user_setup_script.name}") + if "Setup complete" in result.stdout: + user.notify( + "Remote setup completed", + id="htr2hpc-setup-success", + level="success", + ) + except AuthenticationException as err: + logger.error(f"Authentication exception to remote connection: {err}") + # notify the user of the error + user.notify( + "Authentication failed; check that your account on della is set up for remote access", + id="setup-error", + level="danger", + ) + except UnexpectedExit as err: + logger.error(f"Error running remote setup script: {err}") + user.notify( + "Something went wrong running remote user setup", + id="setup-error", + level="danger", + ) diff --git a/src/htr2hpc/templates/users/profile.html b/src/htr2hpc/templates/users/profile.html new file mode 100644 index 0000000..3d0b3e2 --- /dev/null +++ b/src/htr2hpc/templates/users/profile.html @@ -0,0 +1,16 @@ +{% extends "users/profile.html" %} + +{% block body %} +{{ block.super }} + +
+

HPC account setup

+

Setup or update your account on the HPC server della.

+ +
+ {% csrf_token %} + +
+
+ +{% endblock %} diff --git a/src/htr2hpc/urls.py b/src/htr2hpc/urls.py index dee8606..ab380a1 100644 --- a/src/htr2hpc/urls.py +++ b/src/htr2hpc/urls.py @@ -2,6 +2,9 @@ from escriptorium.urls import urlpatterns +from htr2hpc.views import remote_user_setup + urlpatterns += [ (path("accounts/", include("pucas.cas_urls"))), + path("profile/hpc-setup/", remote_user_setup, name="hpc-setup"), ] diff --git a/src/htr2hpc/views.py b/src/htr2hpc/views.py new file mode 100644 index 0000000..7222346 --- /dev/null +++ b/src/htr2hpc/views.py @@ -0,0 +1,20 @@ +from django.contrib.auth.decorators import login_required +from django.http import HttpResponseRedirect +from django.urls import reverse +from django.views.decorators.http import require_http_methods + +from htr2hpc.tasks import hpc_user_setup + + +@login_required +@require_http_methods(["POST"]) +def remote_user_setup(request): + # it seems that the taskreport must be associated with a document, + # so skip the task reporting logic here and just use notifications + + # queue the celery setup task + hpc_user_setup.delay(user_pk=request.user.pk) + # redirect back to the profile page + redirect = HttpResponseRedirect(reverse("profile")) + redirect.status_code = 303 # See other + return redirect From f183abe49be3cec7a0868cb3999596fa4ba31ff6 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 5 Feb 2025 11:25:39 -0500 Subject: [PATCH 06/19] Log output from remote run of user setup script --- src/htr2hpc/tasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/htr2hpc/tasks.py b/src/htr2hpc/tasks.py index c6e1aef..ce410e3 100644 --- a/src/htr2hpc/tasks.py +++ b/src/htr2hpc/tasks.py @@ -491,6 +491,8 @@ def hpc_user_setup(user_pk=None): id="htr2hpc-setup-success", level="success", ) + # log script output for debugging + logger.debug(result.stdout) except AuthenticationException as err: logger.error(f"Authentication exception to remote connection: {err}") # notify the user of the error From 48baa1dde9828eb45007cb58d51d903c95e1d55d Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 5 Feb 2025 11:26:03 -0500 Subject: [PATCH 07/19] Add command for initial ssh-key setup to profile --- src/htr2hpc/templates/users/profile.html | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/htr2hpc/templates/users/profile.html b/src/htr2hpc/templates/users/profile.html index 3d0b3e2..f27ae4e 100644 --- a/src/htr2hpc/templates/users/profile.html +++ b/src/htr2hpc/templates/users/profile.html @@ -5,7 +5,15 @@

HPC account setup

-

Setup or update your account on the HPC server della.

+ + +

If you have not already done so, enable key-based ssh access to your account + from the eScriptorium server. (This is a one-time step.)

+ + ssh {{ request.user.username }}@della.princeton.edu 'mkdir -p ~/.ssh && echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJzoR8jstrofzFKVoiXSFP5jGw/WbXHxFyIaS5b4vSWC test-htr.lib.princeton.edu" >> ~/.ssh/authorized_keys' +

+ +

Run HPC setup for initial environment setup or to update your account on the HPC server.

{% csrf_token %} From 36aa2b4378392af9b10cd0794e5221c340490ba1 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 5 Feb 2025 11:48:18 -0500 Subject: [PATCH 08/19] Add notification when user setup task starts --- src/htr2hpc/tasks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/htr2hpc/tasks.py b/src/htr2hpc/tasks.py index ce410e3..f7c6598 100644 --- a/src/htr2hpc/tasks.py +++ b/src/htr2hpc/tasks.py @@ -468,7 +468,11 @@ def hpc_user_setup(user_pk=None): # bash setup script is included with this package user_setup_script = settings.HTR2HPC_INSTALL_DIR / "train" / "user_setup.sh" - + user.notify( + "Running user setup script, on first run this may take a while...", + id="htr2hpc-setup", + level="info", + ) try: with Connection( host=settings.HPC_HOSTNAME, From 98bcf30ea3ac9eebf140b9effdd75f9fb45d09b2 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 5 Feb 2025 11:52:04 -0500 Subject: [PATCH 09/19] Improve user setup task logging --- src/htr2hpc/tasks.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/htr2hpc/tasks.py b/src/htr2hpc/tasks.py index f7c6598..6a7338b 100644 --- a/src/htr2hpc/tasks.py +++ b/src/htr2hpc/tasks.py @@ -466,6 +466,11 @@ def hpc_user_setup(user_pk=None): logger.error(f"hpc_user_setup called with invalid user_pk {user_pk}") return + # hostname and ssh key path set in django config + logger.debug( + f"Connecting to {settings.HPC_HOSTNAME} as {user.username} with keyfile {settings.HPC_SSH_KEYFILE}" + ) + # bash setup script is included with this package user_setup_script = settings.HTR2HPC_INSTALL_DIR / "train" / "user_setup.sh" user.notify( @@ -496,7 +501,7 @@ def hpc_user_setup(user_pk=None): level="success", ) # log script output for debugging - logger.debug(result.stdout) + logger.debug(f"user setup script output:\n{result.stdout}") except AuthenticationException as err: logger.error(f"Authentication exception to remote connection: {err}") # notify the user of the error From 28b89419040bd97c229e78e95566046df041a1f5 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 5 Feb 2025 12:02:56 -0500 Subject: [PATCH 10/19] Don't error on user setup script removal --- src/htr2hpc/tasks.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/htr2hpc/tasks.py b/src/htr2hpc/tasks.py index 6a7338b..75012cf 100644 --- a/src/htr2hpc/tasks.py +++ b/src/htr2hpc/tasks.py @@ -475,7 +475,7 @@ def hpc_user_setup(user_pk=None): user_setup_script = settings.HTR2HPC_INSTALL_DIR / "train" / "user_setup.sh" user.notify( "Running user setup script, on first run this may take a while...", - id="htr2hpc-setup", + id="htr2hpc-setup-start", level="info", ) try: @@ -492,8 +492,9 @@ def hpc_user_setup(user_pk=None): result = conn.run( f"./{user_setup_script.name} --skip-ssh-setup --reinstall-htr2hpc" ) - # remove the setup script from the server - conn.run(f"rm ./{user_setup_script.name}") + # remove the setup script from the server; don't error if not there + # (if user clicks the button twice it may already be removed) + conn.run(f"rm -f ./{user_setup_script.name}") if "Setup complete" in result.stdout: user.notify( "Remote setup completed", From b88d35543a0dfe60e8bc4214234f8e9df913765b Mon Sep 17 00:00:00 2001 From: cmroughan Date: Fri, 7 Feb 2025 09:46:49 -0500 Subject: [PATCH 11/19] updating hpc account management instructions --- src/htr2hpc/templates/users/profile.html | 38 ++++++++++++++---------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/src/htr2hpc/templates/users/profile.html b/src/htr2hpc/templates/users/profile.html index f27ae4e..b78ca4a 100644 --- a/src/htr2hpc/templates/users/profile.html +++ b/src/htr2hpc/templates/users/profile.html @@ -4,21 +4,29 @@ {{ block.super }}
-

HPC account setup

- - -

If you have not already done so, enable key-based ssh access to your account - from the eScriptorium server. (This is a one-time step.)

- - ssh {{ request.user.username }}@della.princeton.edu 'mkdir -p ~/.ssh && echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJzoR8jstrofzFKVoiXSFP5jGw/WbXHxFyIaS5b4vSWC test-htr.lib.princeton.edu" >> ~/.ssh/authorized_keys' -

- -

Run HPC setup for initial environment setup or to update your account on the HPC server.

- - - {% csrf_token %} - - +

HPC account management

+

First time setup

+
    +
  1. Enable key-based ssh access to your account from the eScriptorium server. + (This is a one-time step.) Open Terminal or Command Line on your computer, + paste in the below code, and press enter. You will be prompted for your NetID password.
    + ssh {{ request.user.username }}@della.princeton.edu 'mkdir -p ~/.ssh && echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJzoR8jstrofzFKVoiXSFP5jGw/WbXHxFyIaS5b4vSWC test-htr.lib.princeton.edu" >> ~/.ssh/authorized_keys' +
  2. + +
  3. Click the "HPC setup" button below for initial environment setup. This step may take + a few minutes to complete -- a notification should appear when it has finished. + After this, your account will be prepared to run training tasks on Princeton's HPC. +
    +
    + {% csrf_token %} + +
    +
  4. +
+

HPC account updates

+

This instance of eScriptorium is currently being tested and occasional updates might be necessary. + When prompted to update your HPC account, simply click the "HPC setup" button above to automatically + run the update process.

{% endblock %} From 8bfc51930ba6af63e1a33ed2e64735959ee5d7b8 Mon Sep 17 00:00:00 2001 From: cmroughan Date: Fri, 7 Feb 2025 11:40:00 -0500 Subject: [PATCH 12/19] clarifying need to be on uni network for ssh setup step --- src/htr2hpc/templates/users/profile.html | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/htr2hpc/templates/users/profile.html b/src/htr2hpc/templates/users/profile.html index b78ca4a..f4492c7 100644 --- a/src/htr2hpc/templates/users/profile.html +++ b/src/htr2hpc/templates/users/profile.html @@ -6,9 +6,13 @@

HPC account management

First time setup

+

To enable key-based ssh access to your account from the eScriptorium server, follow + the below three steps. (This is a one-time process.)

    -
  1. Enable key-based ssh access to your account from the eScriptorium server. - (This is a one-time step.) Open Terminal or Command Line on your computer, +
  2. Ensure that you are either on-campus connected to the University wifi or + that you are connected to the Princeton network through the VPN + like so.
  3. +
  4. Open Terminal or Command Line on your computer, paste in the below code, and press enter. You will be prompted for your NetID password.
    ssh {{ request.user.username }}@della.princeton.edu 'mkdir -p ~/.ssh && echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJzoR8jstrofzFKVoiXSFP5jGw/WbXHxFyIaS5b4vSWC test-htr.lib.princeton.edu" >> ~/.ssh/authorized_keys'
  5. From 6dbcd06c62ca3cf8d5fecd52ab7cac47544729a6 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 10 Feb 2025 15:42:48 -0500 Subject: [PATCH 13/19] Add setup script command and output to task report message --- src/htr2hpc/tasks.py | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/htr2hpc/tasks.py b/src/htr2hpc/tasks.py index 75012cf..a57a000 100644 --- a/src/htr2hpc/tasks.py +++ b/src/htr2hpc/tasks.py @@ -457,8 +457,8 @@ def train( model.save() -@shared_task(default_retry_delay=60 * 60) -def hpc_user_setup(user_pk=None): +@shared_task(default_retry_delay=60 * 60, bind=True) +def hpc_user_setup(self, user_pk=None): try: user = User.objects.get(pk=user_pk) except User.DoesNotExist: @@ -466,6 +466,12 @@ def hpc_user_setup(user_pk=None): logger.error(f"hpc_user_setup called with invalid user_pk {user_pk}") return + # by default, escriptorium reporting code attaches signal handlers + # that should create a task group and task report for this task id + TaskReport = apps.get_model("reporting", "TaskReport") + # don't error if the task report can't be found + task_report = TaskReport.objects.filter(task_id=self.request.id).first() + # hostname and ssh key path set in django config logger.debug( f"Connecting to {settings.HPC_HOSTNAME} as {user.username} with keyfile {settings.HPC_SSH_KEYFILE}" @@ -489,12 +495,26 @@ def hpc_user_setup(user_pk=None): conn.put(user_setup_script) # run the script with options; skip ssh setup (must already be setup # for this task to run) and ensure htr2hpc install is up to date - result = conn.run( + + setup_cmd = ( f"./{user_setup_script.name} --skip-ssh-setup --reinstall-htr2hpc" ) + # document setup command options in task report + if task_report: + task_report.append(f"Running setup script:\n {setup_cmd}\n\n") + + result = conn.run(setup_cmd) # remove the setup script from the server; don't error if not there # (if user clicks the button twice it may already be removed) conn.run(f"rm -f ./{user_setup_script.name}") + + # add script output to task report + if task_report: + # script output is stored in result.stdout/result.stderr + task_report.append( + f"\n\nsetup script output:\n\n{result.stdout}\n\n{result.stderr}\n\n" + ) + if "Setup complete" in result.stdout: user.notify( "Remote setup completed", @@ -504,7 +524,10 @@ def hpc_user_setup(user_pk=None): # log script output for debugging logger.debug(f"user setup script output:\n{result.stdout}") except AuthenticationException as err: - logger.error(f"Authentication exception to remote connection: {err}") + error_message = f"Authentication exception to remote connection: {err}" + logger.error(error_message) + if task_report: + task_report.append(error_message) # notify the user of the error user.notify( "Authentication failed; check that your account on della is set up for remote access", @@ -512,7 +535,11 @@ def hpc_user_setup(user_pk=None): level="danger", ) except UnexpectedExit as err: - logger.error(f"Error running remote setup script: {err}") + error_message = f"Error running remote setup script: {err}" + logger.error(error_message) + if task_report: + task_report.append(error_message) + logger.error(error_message) user.notify( "Something went wrong running remote user setup", id="setup-error", From c99fe5a9a608c3ff5cdfb099199445355b83d0a5 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 10 Feb 2025 16:08:20 -0500 Subject: [PATCH 14/19] Use anaconda3/2024.2 since initial installation with 2024.6 errors --- src/htr2hpc/train/slurm.py | 4 ++-- src/htr2hpc/train/user_setup.sh | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/htr2hpc/train/slurm.py b/src/htr2hpc/train/slurm.py index 42ad5a7..f900079 100644 --- a/src/htr2hpc/train/slurm.py +++ b/src/htr2hpc/train/slurm.py @@ -37,7 +37,7 @@ def segtrain( # add commands for setup steps segtrain_slurm.add_cmd("module purge") - segtrain_slurm.add_cmd("module load anaconda3/2024.6") + segtrain_slurm.add_cmd("module load anaconda3/2024.2") segtrain_slurm.add_cmd("conda activate htr2hpc") logger.info(f"sbatch file\n: {segtrain_slurm}") # sbatch returns the job id for the created job @@ -74,7 +74,7 @@ def recognition_train( # time=datetime.timedelta(hours=2), ) recogtrain_slurm.add_cmd("module purge") - recogtrain_slurm.add_cmd("module load anaconda3/2024.6") + recogtrain_slurm.add_cmd("module load anaconda3/2024.2") recogtrain_slurm.add_cmd("conda activate htr2hpc") logger.info(f"sbatch file\n: {recogtrain_slurm}") # sbatch returns the job id for the created job diff --git a/src/htr2hpc/train/user_setup.sh b/src/htr2hpc/train/user_setup.sh index c2a3973..699aa66 100755 --- a/src/htr2hpc/train/user_setup.sh +++ b/src/htr2hpc/train/user_setup.sh @@ -44,9 +44,9 @@ fi # create conda environment named htr2hpc conda_env_name=htr2hpc -module load anaconda3/2024.6 +module load anaconda3/2024.2 if { conda env list | grep $conda_env_name; } >/dev/null 2>&1; then - echo "htr2hpc conda env already exists" + echo "conda env $conda_env_name already exists" # when conda env already exists, if requested # uninstall and reinstall htr2hpc @@ -58,7 +58,7 @@ if { conda env list | grep $conda_env_name; } >/dev/null 2>&1; then fi else - echo "Creating conda environment and installing dependencies" + echo "Creating conda environment $conda_env_name and installing dependencies" cd /scratch/gpfs/rkoeser/htr2hpc_setup/kraken conda env create -f environment_cuda.yml -n $conda_env_name conda activate $conda_env_name From 9abacd281f535f71ce624ba610b4bc59604ef070 Mon Sep 17 00:00:00 2001 From: cmroughan Date: Fri, 14 Feb 2025 15:33:11 -0500 Subject: [PATCH 15/19] testing update to hide hpc button after user clicks it --- src/htr2hpc/templates/users/profile.html | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/htr2hpc/templates/users/profile.html b/src/htr2hpc/templates/users/profile.html index f4492c7..ad34f05 100644 --- a/src/htr2hpc/templates/users/profile.html +++ b/src/htr2hpc/templates/users/profile.html @@ -21,9 +21,12 @@

    First time setup

    a few minutes to complete -- a notification should appear when it has finished. After this, your account will be prepared to run training tasks on Princeton's HPC.
    -
    + {% csrf_token %} - + +
@@ -33,4 +36,11 @@

HPC account updates

run the update process.

+ + {% endblock %} From 6d74b7d53a460525b202152634f6ae4a010b3a63 Mon Sep 17 00:00:00 2001 From: cmroughan Date: Fri, 14 Feb 2025 16:38:03 -0500 Subject: [PATCH 16/19] reverting 9abacd2 --- src/htr2hpc/templates/users/profile.html | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/htr2hpc/templates/users/profile.html b/src/htr2hpc/templates/users/profile.html index ad34f05..f4492c7 100644 --- a/src/htr2hpc/templates/users/profile.html +++ b/src/htr2hpc/templates/users/profile.html @@ -21,12 +21,9 @@

First time setup

a few minutes to complete -- a notification should appear when it has finished. After this, your account will be prepared to run training tasks on Princeton's HPC.
-
+ {% csrf_token %} - - +
@@ -36,11 +33,4 @@

HPC account updates

run the update process.

- - {% endblock %} From ab56afc4012df44eeb9adbecb85f037bd3dc158b Mon Sep 17 00:00:00 2001 From: cmroughan Date: Fri, 14 Feb 2025 16:40:00 -0500 Subject: [PATCH 17/19] checking if `level="info"` is failing to display notifs --- src/htr2hpc/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/htr2hpc/tasks.py b/src/htr2hpc/tasks.py index a57a000..47ec02d 100644 --- a/src/htr2hpc/tasks.py +++ b/src/htr2hpc/tasks.py @@ -482,7 +482,7 @@ def hpc_user_setup(self, user_pk=None): user.notify( "Running user setup script, on first run this may take a while...", id="htr2hpc-setup-start", - level="info", + #level="info", ) try: with Connection( From d9f693bf0bc4b6ef2adb5a37493f006f737da621 Mon Sep 17 00:00:00 2001 From: cmroughan Date: Sun, 16 Feb 2025 12:36:54 -0500 Subject: [PATCH 18/19] fix for failed pip dependency install --- src/htr2hpc/train/user_setup.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/htr2hpc/train/user_setup.sh b/src/htr2hpc/train/user_setup.sh index 699aa66..d6301f5 100755 --- a/src/htr2hpc/train/user_setup.sh +++ b/src/htr2hpc/train/user_setup.sh @@ -59,13 +59,16 @@ if { conda env list | grep $conda_env_name; } >/dev/null 2>&1; then else echo "Creating conda environment $conda_env_name and installing dependencies" - cd /scratch/gpfs/rkoeser/htr2hpc_setup/kraken + mkdir /scratch/gpfs/$USER/setup_htr2hpc + cp -r /scratch/gpfs/rkoeser/htr2hpc_setup/kraken /scratch/gpfs/$USER/setup_htr2hpc + cd /scratch/gpfs/$USER/setup_htr2hpc/kraken conda env create -f environment_cuda.yml -n $conda_env_name conda activate $conda_env_name - pip install -q torchvision torch==2.1 torchaudio==2.1 pip install -q git+https://github.com/Princeton-CDH/htr2hpc.git@develop#egg=htr2hpc - # go back to previous directory - cd - + pip install -q torchvision torch==2.1 torchaudio==2.1 + # go back to scratch and delete temp directory + cd /scratch/gpfs/$USER + rm -rf /scratch/gpfs/$USER/setup_htr2hpc fi htrworkingdir=/scratch/gpfs/$USER/htr2hpc From f71d73b1492828b11685be51a958e69aa0a9f0a5 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 18 Feb 2025 15:06:49 -0500 Subject: [PATCH 19/19] Redirect to profile-api-key to force api key generation --- src/htr2hpc/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/htr2hpc/views.py b/src/htr2hpc/views.py index 7222346..917fc0f 100644 --- a/src/htr2hpc/views.py +++ b/src/htr2hpc/views.py @@ -15,6 +15,6 @@ def remote_user_setup(request): # queue the celery setup task hpc_user_setup.delay(user_pk=request.user.pk) # redirect back to the profile page - redirect = HttpResponseRedirect(reverse("profile")) + redirect = HttpResponseRedirect(reverse("profile-api-key")) redirect.status_code = 303 # See other return redirect