diff --git a/scripts/user_setup.sh b/scripts/user_setup.sh deleted file mode 100755 index c7ce97c..0000000 --- a/scripts/user_setup.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/bash - -# bash script to setup user account for htr2hpc pilot integration -# - adds ssh key to authorized keys -# - conda env setup -# - create htr2hpc working directory in scratch - -echo "Setting up your account for htr2hpc ...." -echo "This process may take at least five minutes. Please do not exit until the process completes." - -# ensure ssh directory exists -if [ ! -d "$HOME/.ssh" ]; then - echo "Creating $HOME/.ssh directory" - mkdir ~/.ssh -fi - -# add test-htr public key to authorized keys if not already present -ssh_key='ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJzoR8jstrofzFKVoiXSFP5jGw/WbXHxFyIaS5b4vSWC test-htr.lib.princeton.edu' -if ! grep -q "$ssh_key" $HOME/.ssh/authorized_keys; then - echo "Adding htr2hpc ssh key to authorized keys" - echo $ssh_key >> ~/.ssh/authorized_keys -else - echo "ssh key is already in authorized keys" -fi - -# create conda environment named htr2hpc -conda_env_name=htr2hpc -module load anaconda3/2024.2 -if { conda env list | grep $conda_env_name; } >/dev/null 2>&1; then - echo "htr2hpc conda env already exists" -else - echo "Creating conda environment and installing dependencies" - cd /scratch/gpfs/rkoeser/htr2hpc_setup/kraken - conda env create -f environment_cuda.yml -n $conda_env_name - conda activate $conda_env_name - pip install -q torchvision torch==2.1 torchaudio==2.1 - pip install -q git+https://github.com/Princeton-CDH/htr2hpc.git@develop#egg=htr2hpc - # go back to previous directory - cd - -fi - -htrworkingdir=/scratch/gpfs/$USER/htr2hpc -# create working directory -if [ ! -d $htrworkingdir ]; then - echo "Creating htr2hpc working directory in scratch: $htrworkingdir" - mkdir $htrworkingdir -else - echo "htr2hpc scratch working directory already exists: $htrworkingdir" -fi - -echo "Setup complete! 🚀 🚃" diff --git a/src/htr2hpc/tasks.py b/src/htr2hpc/tasks.py index 7745d20..47ec02d 100644 --- a/src/htr2hpc/tasks.py +++ b/src/htr2hpc/tasks.py @@ -455,3 +455,93 @@ def train( # - mark model as no longer being trained model.training = False model.save() + + +@shared_task(default_retry_delay=60 * 60, bind=True) +def hpc_user_setup(self, user_pk=None): + try: + user = User.objects.get(pk=user_pk) + except User.DoesNotExist: + # error / bail out + logger.error(f"hpc_user_setup called with invalid user_pk {user_pk}") + return + + # by default, escriptorium reporting code attaches signal handlers + # that should create a task group and task report for this task id + TaskReport = apps.get_model("reporting", "TaskReport") + # don't error if the task report can't be found + task_report = TaskReport.objects.filter(task_id=self.request.id).first() + + # hostname and ssh key path set in django config + logger.debug( + f"Connecting to {settings.HPC_HOSTNAME} as {user.username} with keyfile {settings.HPC_SSH_KEYFILE}" + ) + + # bash setup script is included with this package + user_setup_script = settings.HTR2HPC_INSTALL_DIR / "train" / "user_setup.sh" + user.notify( + "Running user setup script, on first run this may take a while...", + id="htr2hpc-setup-start", + #level="info", + ) + try: + with Connection( + host=settings.HPC_HOSTNAME, + user=user.username, + connect_timeout=10, + connect_kwargs={"key_filename": settings.HPC_SSH_KEYFILE}, + ) as conn: + # copy setup script to server + conn.put(user_setup_script) + # run the script with options; skip ssh setup (must already be setup + # for this task to run) and ensure htr2hpc install is up to date + + setup_cmd = ( + f"./{user_setup_script.name} --skip-ssh-setup --reinstall-htr2hpc" + ) + # document setup command options in task report + if task_report: + task_report.append(f"Running setup script:\n {setup_cmd}\n\n") + + result = conn.run(setup_cmd) + # remove the setup script from the server; don't error if not there + # (if user clicks the button twice it may already be removed) + conn.run(f"rm -f ./{user_setup_script.name}") + + # add script output to task report + if task_report: + # script output is stored in result.stdout/result.stderr + task_report.append( + f"\n\nsetup script output:\n\n{result.stdout}\n\n{result.stderr}\n\n" + ) + + if "Setup complete" in result.stdout: + user.notify( + "Remote setup completed", + id="htr2hpc-setup-success", + level="success", + ) + # log script output for debugging + logger.debug(f"user setup script output:\n{result.stdout}") + except AuthenticationException as err: + error_message = f"Authentication exception to remote connection: {err}" + logger.error(error_message) + if task_report: + task_report.append(error_message) + # notify the user of the error + user.notify( + "Authentication failed; check that your account on della is set up for remote access", + id="setup-error", + level="danger", + ) + except UnexpectedExit as err: + error_message = f"Error running remote setup script: {err}" + logger.error(error_message) + if task_report: + task_report.append(error_message) + logger.error(error_message) + user.notify( + "Something went wrong running remote user setup", + id="setup-error", + level="danger", + ) diff --git a/src/htr2hpc/templates/users/profile.html b/src/htr2hpc/templates/users/profile.html new file mode 100644 index 0000000..f4492c7 --- /dev/null +++ b/src/htr2hpc/templates/users/profile.html @@ -0,0 +1,36 @@ +{% extends "users/profile.html" %} + +{% block body %} +{{ block.super }} + +
To enable key-based ssh access to your account from the eScriptorium server, follow + the below three steps. (This is a one-time process.)
+ssh {{ request.user.username }}@della.princeton.edu 'mkdir -p ~/.ssh && echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJzoR8jstrofzFKVoiXSFP5jGw/WbXHxFyIaS5b4vSWC test-htr.lib.princeton.edu" >> ~/.ssh/authorized_keys'
+ This instance of eScriptorium is currently being tested and occasional updates might be necessary. + When prompted to update your HPC account, simply click the "HPC setup" button above to automatically + run the update process.
+