From f05869eed74fb50ce28cfcab1c4a8b800fd869be Mon Sep 17 00:00:00 2001
From: Jagger Denhof <58100760+jdenhof@users.noreply.github.com>
Date: Thu, 21 Nov 2024 16:15:51 -0500
Subject: [PATCH] Added parallel configuration for conditional layers (#57)

* added version to run-compare

* updated run-compare to take command line args

* fixed argument parsing order for extra args

* proposed meaned loss function after recon + kl

* added experiment to isolate adv-cond config

* updated workflow memory requirements

* fixed find issues in run-compare

* fixed run-compare ls issues

* added mean and variance to tensorboard log

* renamed experiment config to parallel

* removed layer and made concat_config ConcatBlockConfig

* added parallel support for conditional layers

* added comments for alternate loss addition

* added scripts to gather from tsv file
---
 .../parallel/adversarial-conditional.yaml     | 130 +++++++++++++++++
 configs/model/parallel/conditional.yaml       | 127 +++++++++++++++++
 scripts/gather-norms.py                       |  18 +++
 scripts/run-compare.sh                        | 133 +++++++++++++++---
 src/cmmvae/models/cmmvae_model.py             |   5 +
 src/cmmvae/modules/base/__init__.py           |   2 +
 src/cmmvae/modules/base/components.py         |  33 ++++-
 src/cmmvae/modules/clvae.py                   |  47 +++++--
 src/cmmvae/modules/vae.py                     |  16 ++-
 workflow/profile/slurm/config.yaml            |  24 ++--
 10 files changed, 487 insertions(+), 48 deletions(-)
 create mode 100644 configs/model/parallel/adversarial-conditional.yaml
 create mode 100644 configs/model/parallel/conditional.yaml
 create mode 100644 scripts/gather-norms.py

diff --git a/configs/model/parallel/adversarial-conditional.yaml b/configs/model/parallel/adversarial-conditional.yaml
new file mode 100644
index 0000000..c18d5c4
--- /dev/null
+++ b/configs/model/parallel/adversarial-conditional.yaml
@@ -0,0 +1,130 @@
+class_path: cmmvae.models.CMMVAEModel
+init_args:
+  kl_annealing_fn:
+    class_path: cmmvae.modules.base.annealing_fn.LinearKLAnnealingFn
+    init_args:
+      min_kl_weight: 0.1
+      max_kl_weight: 0.5
+      warmup_steps: 1e4
+      climax_steps: 6e4
+  record_gradients: false
+  adv_weight: 25
+  gradient_record_cap: 20
+  autograd_config:
+    class_path: cmmvae.config.AutogradConfig
+    init_args:
+      adversarial_gradient_clip:
+        class_path: cmmvae.config.GradientClipConfig
+        init_args:
+          val: 10
+          algorithm: norm
+      vae_gradient_clip:
+        class_path: cmmvae.config.GradientClipConfig
+        init_args:
+          val: 10
+          algorithm: norm
+      expert_gradient_clip:
+        class_path: cmmvae.config.GradientClipConfig
+        init_args:
+          val: 10
+          algorithm: norm
+  module:
+    class_path: cmmvae.modules.CMMVAE
+    init_args:
+      vae:
+        class_path: cmmvae.modules.CLVAE
+        init_args:
+          latent_dim: 128
+          encoder_config:
+            class_path: cmmvae.modules.base.FCBlockConfig
+            init_args:
+              layers: [ 512, 256 ]
+              dropout_rate: 0.0
+              use_batch_norm: True
+              use_layer_norm: False
+              activation_fn: torch.nn.ReLU
+              return_hidden: True
+          decoder_config:
+            class_path: cmmvae.modules.base.FCBlockConfig
+            init_args:
+              layers: [ 128, 256, 512 ]
+              dropout_rate: 0.0
+              use_batch_norm: False
+              use_layer_norm: False
+              activation_fn: torch.nn.ReLU
+          conditional_config:
+            class_path: cmmvae.modules.base.FCBlockConfig
+            init_args:
+              layers: [ 128 ]
+              dropout_rate: 0.0
+              use_batch_norm: False
+              use_layer_norm: True
+              activation_fn: null
+          concat_config:
+            class_path: cmmvae.modules.base.ConcatBlockConfig
+            init_args:
+              dropout_rate: 0.0
+              use_batch_norm: False
+              use_layer_norm: False
+              activation_fn: torch.nn.ReLU
+          conditionals:
+          - assay
+          - dataset_id
+          - donor_id
+          - species
+          - tissue
+          selection_order:
+          - parallel
+      experts:
+        class_path: cmmvae.modules.base.Experts
+        init_args:
+          experts:
+          - class_path: cmmvae.modules.base.Expert
+            init_args:
+              id: human
+              encoder_config:
+                class_path: cmmvae.modules.base.FCBlockConfig
+                init_args:
+                  layers: [ 60530, 1024, 512 ]
+                  dropout_rate: [ 0.1, 0.0 ]
+                  use_batch_norm: True
+                  use_layer_norm: False
+                  activation_fn: torch.nn.ReLU
+              decoder_config:
+                class_path: cmmvae.modules.base.FCBlockConfig
+                init_args:
+                  layers: [ 512, 1024, 60530 ]
+                  dropout_rate: 0.0
+                  use_batch_norm: False
+                  use_layer_norm: False
+                  activation_fn: torch.nn.ReLU
+          - class_path: cmmvae.modules.base.Expert
+            init_args:
+              id: mouse
+              encoder_config:
+                class_path: cmmvae.modules.base.FCBlockConfig
+                init_args:
+                  layers: [ 52437, 1024, 512 ]
+                  dropout_rate: [ 0.1, 0.0 ]
+                  use_batch_norm: True
+                  use_layer_norm: False
+                  activation_fn: torch.nn.ReLU
+              decoder_config:
+                class_path: cmmvae.modules.base.FCBlockConfig
+                init_args:
+                  layers: [ 512, 1024, 52437 ]
+                  dropout_rate: 0.0
+                  use_batch_norm: False
+                  use_layer_norm: False
+                  activation_fn: torch.nn.ReLU
+      adversarials:
+      - class_path: cmmvae.modules.base.FCBlockConfig
+        init_args:
+          layers: [ 256, 128, 64, 1 ]
+          dropout_rate: 0.0
+          use_batch_norm: False
+          use_layer_norm: False
+          activation_fn:
+          - torch.nn.ReLU
+          - torch.nn.ReLU
+          - torch.nn.Sigmoid
diff --git a/configs/model/parallel/conditional.yaml b/configs/model/parallel/conditional.yaml
new file mode 100644
index 0000000..92b7420
--- /dev/null
+++ b/configs/model/parallel/conditional.yaml
@@ -0,0 +1,127 @@
+class_path: cmmvae.models.CMMVAEModel
+init_args:
+  kl_annealing_fn:
+    class_path: cmmvae.modules.base.annealing_fn.LinearKLAnnealingFn
+    init_args:
+      min_kl_weight: 0.1
+      max_kl_weight: 1.0
+      warmup_steps: 1e4
+      climax_steps: 4e4
+  record_gradients: false
+  adv_weight: 0
+  gradient_record_cap: 20
+  autograd_config:
+    class_path: cmmvae.config.AutogradConfig
+    init_args:
+      adversarial_gradient_clip:
+        class_path: cmmvae.config.GradientClipConfig
+        init_args:
+          val: 10
+          algorithm: norm
+      vae_gradient_clip:
+        class_path: cmmvae.config.GradientClipConfig
+        init_args:
+          val: 10
+          algorithm: norm
+      expert_gradient_clip:
+        class_path: cmmvae.config.GradientClipConfig
+        init_args:
+          val: 10
+          algorithm: norm
+  module:
+    class_path: cmmvae.modules.CMMVAE
+    init_args:
+      vae:
+        class_path: cmmvae.modules.CLVAE
+        init_args:
+          latent_dim: 128
+          encoder_config:
+            class_path: cmmvae.modules.base.FCBlockConfig
+            init_args:
+              layers: [ 512, 256 ]
+              dropout_rate: 0.0
+              use_batch_norm: True
+              use_layer_norm: False
+              activation_fn: torch.nn.ReLU
+              return_hidden: True
+          decoder_config:
+            class_path: cmmvae.modules.base.FCBlockConfig
+            init_args:
+              layers: [ 128, 256, 512 ]
+              dropout_rate: 0.0
+              use_batch_norm: False
+              use_layer_norm: False
+              activation_fn: torch.nn.ReLU
+          conditional_config:
+            class_path: cmmvae.modules.base.FCBlockConfig
+            init_args:
+              layers: [ 128 ]
+              dropout_rate: 0.0
+              use_batch_norm: False
+              use_layer_norm: True
+              activation_fn: null
+          concat_config:
+            class_path: cmmvae.modules.base.ConcatBlockConfig
+            init_args:
+              dropout_rate: 0.0
+              use_batch_norm: False
+              use_layer_norm: False
+              activation_fn: torch.nn.ReLU
+          conditionals:
+          - assay
+          - dataset_id
+          - donor_id
+          - species
+          - tissue
+          selection_order:
+          - parallel
+      experts:
+        class_path: cmmvae.modules.base.Experts
+        init_args:
+          experts:
+          - class_path: cmmvae.modules.base.Expert
+            init_args:
+              id: human
+              encoder_config:
+                class_path: cmmvae.modules.base.FCBlockConfig
+                init_args:
+                  layers: [ 60530, 1024, 512 ]
+                  dropout_rate: [ 0.1, 0.0 ]
+                  use_batch_norm: True
+                  use_layer_norm: False
+                  activation_fn: torch.nn.ReLU
+              decoder_config:
+                class_path: cmmvae.modules.base.FCBlockConfig
+                init_args:
+                  layers: [ 512, 1024, 60530 ]
+                  dropout_rate: 0.0
+                  use_batch_norm: False
+                  use_layer_norm: False
+                  activation_fn: torch.nn.ReLU
+          - class_path: cmmvae.modules.base.Expert
+            init_args:
+              id: mouse
+              encoder_config:
+                class_path: cmmvae.modules.base.FCBlockConfig
+                init_args:
+                  layers: [ 52437, 1024, 512 ]
+                  dropout_rate: [ 0.1, 0.0 ]
+                  use_batch_norm: True
+                  use_layer_norm: False
+                  activation_fn: torch.nn.ReLU
+              decoder_config:
+                class_path: cmmvae.modules.base.FCBlockConfig
+                init_args:
+                  layers: [ 512, 1024, 52437 ]
+                  dropout_rate: 0.0
+                  use_batch_norm: False
+                  use_layer_norm: False
+                  activation_fn: torch.nn.ReLU
+      adversarials:
+      # - class_path: cmmvae.modules.base.FCBlockConfig
+      #   init_args:
+      #     layers: [ 256, 128, 64, 1 ]
+      #     dropout_rate: 0.0
+      #     use_batch_norm: False
+      #     use_layer_norm: False
+      #     activation_fn: torch.nn.Sigmoid
diff --git a/scripts/gather-norms.py b/scripts/gather-norms.py
new file mode 100644
index 0000000..242555a
--- /dev/null
+++ b/scripts/gather-norms.py
@@ -0,0 +1,18 @@
+import tensorflow as tf
+import pandas as pd
+
+# Replace with the path to your TensorBoard log file
+log_file = "/mnt/projects/debruinz_project/denhofja/cmmvae/lightning_logs/run-experiment/adversarial-conditional.5a9df4a./events.out.tfevents.1730760440.g001.clipper.gvsu.edu.3079995.0"
+data = []
+
+for event in tf.compat.v1.train.summary_iterator(log_file):
+    for value in event.summary.value:
+        # Modify 'grad_norm' to the exact tag name used for gradient norms in your logs
+        if "grad_norm" in value.tag:
+            data.append(
+                {"step": event.step, "grad_norm": value.simple_value, "tag": value.tag}
+            )
+
+# Convert to DataFrame and save to CSV
+df = pd.DataFrame(data)
+df.to_csv("gradient_data.json")
diff --git a/scripts/run-compare.sh b/scripts/run-compare.sh
index 5d73963..1b63596 100755
--- a/scripts/run-compare.sh
+++ b/scripts/run-compare.sh
@@ -1,29 +1,124 @@
 #!/bin/bash
 
-if [ -z "$1" ]; then
-  echo "No argument provided for the name of the experiment!"
-  exit 1
+debug=false
+append_commit_hash=true
+root_dir="${CMMVAE_ROOT_DIR}"
+experiment="${CMMVAE_EXPERIMENT_NAME}"
+data="${CMMVAE_DATA_CONFIG}"
+compare=""
+max_epochs="${CMMVAE_MAX_EPOCHS}"
+commit_hash=""
+extra_args=""
+
+if [ -z "${max_epochs}" ]; then
+  max_epochs=5
+else
+  echo "CMMVAE_MAX_EPOCHS is set to '$CMMVAE_MAX_EPOCHS'"
 fi
 
-if [ -z "$2" ]; then
-  echo "No argument provided for model config filename!"
+if [ -z "${data}" ]; then
+  data=configs/data/local.yaml
+else
+  echo "CMMVAE_DATA_CONFIG is set to '$CMMVAE_DATA_CONFIG'"
+fi
+
+if [ -z "${root_dir}" ]; then
+  root_dir=lightning_logs
+else
+  echo "CMMVAE_ROOT_DIR is set to '$CMMVAE_ROOT_DIR'"
+fi
+
+if [ -z "${experiment}" ]; then
+  experiment=default
+else
+  echo "CMMVAE_EXPERIMENT_NAME is set to '$CMMVAE_EXPERIMENT_NAME'"
+fi
+
+for arg in "$@"
+do
+    case $arg in
+        --debug)
+            debug=true
+            shift
+            ;;
+        --no-commit-hash)
+            append_commit_hash=false
+            shift
+            ;;
+        root_dir=*)
+            root_dir="${arg#*=}"
+            shift
+            ;;
+        experiment=*)
+            experiment="${arg#*=}"
+            shift
+            ;;
+        compare=*)
+            compare="${arg#*=}"
+            shift
+            ;;
+        data=*)
+            data="${arg#*=}"
+            shift
+            ;;
+        max_epochs=*)
+            max_epochs="${arg#*=}"
+            shift
+            ;;
+        *)
+            extra_args="$extra_args $arg"
+            shift
+            ;;
+    esac
+done
+
+if [ -z "$compare" ]; then
+  echo "Please specify directory that contains model configs to compare."
   exit 1
 fi
 
-for file in "$2"/*.yaml
+if [ "$append_commit_hash" = true ]; then
+  if ! command -v git &> /dev/null; then
+      echo "Error: Git is not installed. Please install Git to use this script or specify --no-commit-hash."
+      exit 1
+  fi
+
+  if ! git rev-parse --is-inside-work-tree &> /dev/null; then
+      echo "Error: This is not a Git repository. Please run the script inside a Git repository or specify --no-commit-hash."
+      exit 1
+  fi
+
+  commit_hash=$(git rev-parse --short HEAD)
+  echo "Latest Commit Hash: $commit_hash"
+else
+  echo "Skipping commit hash display."
+fi
+
+for file in "$compare"/*.yaml
 do
-    filename=$(basename "$file" .yaml)
-    echo "Processing: $filename"
-    sbatch scripts/run-snakemake.sh --config \
-        experiment_name=$1\
-        run_name=$filename \
-        root_dir=lightning_logs \
-        train_command=\
-"\
-fit \
---data configs/data/local.yaml \
---model $file \
---trainer.max_epochs 5 \
-"
+  run_name=$(basename "$file" .yaml)
+
+  if [ "$commit_hash" != "" ]; then
+    run_name="${run_name}.${commit_hash}"
+  fi
+
+  ran_dirs=$(ls -d "$root_dir/$experiment/$run_name"* 2>/dev/null)
+
+  if [ -z "$ran_dirs" ]; then
+    version="V000"
+  else
+    version=$(echo "$ran_dirs" | grep -E 'V[0-9]{3}$' | sort -V | tail -n 1 | sed -E 's/.*V([0-9]{3})$/\1/' | awk '{printf "V%03d", $1 + 1}')
+  fi
 
+  echo "Processing: $file"
+  command="scripts/run-snakemake.sh --config \
+    root_dir=${root_dir} \
+    experiment_name=${experiment} \
+    run_name=${version}.${run_name} \
+    train_command=\"fit --model $file --data $data --trainer.max_epochs $max_epochs $extra_args\"
+  "
+  echo $command
+  if [ "$debug" = false ]; then
+    eval "sbatch $command"
+  fi
 done
diff --git a/src/cmmvae/models/cmmvae_model.py b/src/cmmvae/models/cmmvae_model.py
index 2a74abc..6ec411a 100644
--- a/src/cmmvae/models/cmmvae_model.py
+++ b/src/cmmvae/models/cmmvae_model.py
@@ -206,6 +206,7 @@ def training_step(
         qz, pz, z, xhats, hidden_representations = self.module(
             x=x, metadata=metadata, expert_id=expert_id
         )
+        # assert isinstance(qz, torch.distributions.Normal)
 
         if x.layout == torch.sparse_csr:
             x = x.to_dense()
@@ -214,6 +215,10 @@ def training_step(
         main_loss_dict = self.module.vae.elbo(
             qz, pz, x, xhats[expert_id], self.kl_annealing_fn.kl_weight
         )
+
+        main_loss_dict["Mean"] = qz.mean.mean()
+        main_loss_dict["Variance"] = qz.variance.mean()
+
         total_loss = main_loss_dict[RK.LOSS]
 
         adv_loss = None
diff --git a/src/cmmvae/modules/base/__init__.py b/src/cmmvae/modules/base/__init__.py
index 56a2312..90dfb1c 100644
--- a/src/cmmvae/modules/base/__init__.py
+++ b/src/cmmvae/modules/base/__init__.py
@@ -10,6 +10,7 @@
     ConditionalLayer,
     ConditionalLayers,
     GradientReversalFunction,
+    ConcatBlockConfig,
 )
 
 from cmmvae.modules.base.annealing_fn import KLAnnealingFn, LinearKLAnnealingFn
@@ -17,6 +18,7 @@
 __all__ = [
     "ConditionalLayer",
     "ConditionalLayers",
+    "ConcatBlockConfig",
     "Encoder",
     "Expert",
     "Experts",
diff --git a/src/cmmvae/modules/base/components.py b/src/cmmvae/modules/base/components.py
index 40dbc9c..19d7ae4 100644
--- a/src/cmmvae/modules/base/components.py
+++ b/src/cmmvae/modules/base/components.py
@@ -174,6 +174,22 @@ def validate(self):
             self._validate_option(name, req_type, **kwargs)
 
 
+class ConcatBlockConfig(FCBlockConfig):
+    def __init__(
+        self,
+        dropout_rate: float = 0.0,
+        use_batch_norm: bool = False,
+        use_layer_norm: bool = False,
+        return_hidden: bool = False,
+        activation_fn: Optional[Type[nn.Module]] = None,
+    ):
+        self.dropout_rate = dropout_rate
+        self.use_batch_norm = use_batch_norm
+        self.use_layer_norm = use_layer_norm
+        self.return_hidden = return_hidden
+        self.activation_fn = activation_fn
+
+
 class FCBlock(nn.Module):
     """
     Fully Connected Block for building neural network layers.
@@ -500,7 +516,8 @@ def __init__(
         self.shared_conditionals = list(conditional_paths["shared"].keys())
 
         self.shuffle_selection_order = False
-        if not selection_order:
+        self.is_parallel = selection_order[0] == "parallel"
+        if not selection_order or self.is_parallel:
             selection_order = conditionals
             self.shuffle_selection_order = True
 
@@ -589,6 +606,7 @@ def forward(
         else:
             order = self.selection_order
 
+        xs = []
         # Apply each layer in the determined order
         for conditional in order:
             layer = self.layers[conditional]
@@ -599,10 +617,17 @@ def forward(
                     )
                 layer = layer[species]
             if isinstance(layer, ConditionalLayer):
-                x = layer(x, metadata)
+                if self.is_parallel:
+                    xs.append(layer(x, metadata))
+                else:
+                    x = layer(x, metadata)
             else:
-                x = layer(x)
-
+                if self.is_parallel:
+                    xs.append(layer(x))
+                else:
+                    x = layer(x)
+        if xs:
+            x = torch.cat(xs, dim=1)
         return x
 
 
diff --git a/src/cmmvae/modules/clvae.py b/src/cmmvae/modules/clvae.py
index f84199a..a446631 100644
--- a/src/cmmvae/modules/clvae.py
+++ b/src/cmmvae/modules/clvae.py
@@ -4,7 +4,7 @@
 import pandas as pd
 
 from cmmvae.modules.vae import VAE
-from cmmvae.modules.base import FCBlockConfig, ConditionalLayers
+from cmmvae.modules.base import FCBlockConfig, ConditionalLayers, ConcatBlockConfig
 
 
 class CLVAE(VAE):
@@ -36,27 +36,56 @@ def __init__(
         conditionals_directory: Optional[str] = None,
         conditionals: Optional[list[str]] = None,
         selection_order: Optional[list[str]] = None,
+        concat_config: Optional[ConcatBlockConfig] = None,
         **encoder_kwargs
     ):
-        super().__init__(
-            encoder_config=encoder_config,
-            decoder_config=decoder_config,
-            **encoder_kwargs,
-        )
-
+        conditionals_module = None
         if conditional_config and conditionals and conditionals_directory:
-            self.conditionals = ConditionalLayers(
+            conditionals_module = ConditionalLayers(
                 directory=conditionals_directory,
                 conditionals=conditionals,
                 fc_block_config=conditional_config,
                 selection_order=selection_order,
             )
         else:
-            self.conditionals = None
             import warnings
 
             warnings.warn("No conditionals found for vae")
 
+        if selection_order and selection_order[0] == "parallel":
+            if not concat_config:
+                raise RuntimeError(
+                    "Please define concat_config when selection_order = parallel"
+                )
+            concat_dim = (
+                len(conditionals_module.selection_order) * conditional_config.layers[-1]
+            )
+
+            decoder_config.layers = [concat_dim] + decoder_config.layers
+            decoder_config.activation_fn = [
+                concat_config.activation_fn
+            ] + decoder_config.activation_fn
+            decoder_config.dropout_rate = [
+                concat_config.dropout_rate
+            ] + decoder_config.dropout_rate
+            decoder_config.return_hidden = [
+                concat_config.return_hidden
+            ] + decoder_config.return_hidden
+            decoder_config.use_layer_norm = [
+                concat_config.use_layer_norm
+            ] + decoder_config.use_layer_norm
+            decoder_config.use_batch_norm = [
+                concat_config.use_batch_norm
+            ] + decoder_config.use_batch_norm
+
+        super().__init__(
+            encoder_config=encoder_config,
+            decoder_config=decoder_config,
+            **encoder_kwargs,
+        )
+
+        self.conditionals = conditionals_module
+
     def after_reparameterize(
         self, z: torch.Tensor, metadata: pd.DataFrame, **kwargs
     ) -> torch.Tensor:
diff --git a/src/cmmvae/modules/vae.py b/src/cmmvae/modules/vae.py
index 8d6de36..7964f08 100644
--- a/src/cmmvae/modules/vae.py
+++ b/src/cmmvae/modules/vae.py
@@ -133,19 +133,27 @@ def elbo(
                 - RK.LOSS: Total loss.
                 - RK.KL_WEIGHT: KL weight.
         """
-        z_kl_div = kl_divergence(qz, pz).sum(dim=-1)
+        z_kl_div = kl_divergence(qz, pz)
+        z_kl_div = z_kl_div.sum(dim=-1)
+        z_kl_div = z_kl_div.mean()
 
         if x.layout == torch.sparse_csr:
             x = x.to_dense()
 
         recon_loss = F.mse_loss(xhat, x, reduction="sum")
+        # recon_loss = F.mse_loss(xhat, x, reduction="none")
+        # recon_loss = recon_loss.sum(dim=1)
 
-        loss = recon_loss + kl_weight * z_kl_div.mean()
+        loss = recon_loss + (kl_weight * z_kl_div)
+        # loss = torch.mean(z_kl_div * kl_weight + recon_loss)
+
+        recon_loss = recon_loss / x.numel()
+        # recon_loss = recon_loss.mean()
 
         return {
-            RK.RECON_LOSS: recon_loss / x.numel(),
-            RK.KL_LOSS: z_kl_div.mean(),
             RK.LOSS: loss,
+            RK.RECON_LOSS: recon_loss,
+            RK.KL_LOSS: z_kl_div,
             RK.KL_WEIGHT: kl_weight,
         }
 
diff --git a/workflow/profile/slurm/config.yaml b/workflow/profile/slurm/config.yaml
index c8193fb..04557ba 100644
--- a/workflow/profile/slurm/config.yaml
+++ b/workflow/profile/slurm/config.yaml
@@ -26,41 +26,41 @@ jobs: 10
 set-resources:
   diff_expression:
     partition: bigmem
-    mem: 179GB
+    mem: 100GB
     gpus_per_node: ""
     cpus_per_task: 1
   train:
     partition: gpu
-    mem: 179GB
+    mem: 100GB
     gpus_per_node: tesla_v100s:1
-    cpus_per_task: 12
+    cpus_per_task: 6
   predict:
     partition: gpu
-    mem: 179GB
+    mem: 100GB
     gpus_per_node: 1
-    cpus_per_task: 12
+    cpus_per_task: 6
   merge_predictions:
     partition: all
-    mem: 179GB
+    mem: 100GB
     gpus_per_node: ""
     cpus_per_task: 1
   correlations:
     partition: gpu
-    mem: 179GB
+    mem: 100GB
     gpus_per_node: 1
-    cpus_per_task: 12
+    cpus_per_task: 6
   run_correlations:
     partition: cpu
-    mem: 179GB
+    mem: 100GB
     gpus_per_node: ""
     cpus_per_task: 1
   umap_predictions:
     partition: all
-    mem: 179GB
+    mem: 100GB
     gpus_per_node: ""
     cpus_per_task: 40
   meta_discriminators:
     partition: gpu
-    mem: 179GB
+    mem: 100GB
     gpus_per_node: tesla_v100s:1
-    cpus_per_task: 12
+    cpus_per_task: 6