From 8d87c7a340aa8bc0b7117289d3605407d5e916bd Mon Sep 17 00:00:00 2001
From: Lars Lorentz Ludvigsen <59617571+larsll@users.noreply.github.com>
Date: Sat, 27 Jan 2024 15:58:45 +0100
Subject: [PATCH] Max Steps per Iteration (#152)

* Max Steps per Iteration

* Update dependencies

* Add to docs
---
 defaults/dependencies.json         | 4 ++--
 defaults/template-run.env          | 1 +
 docker/docker-compose-training.yml | 1 +
 docs/reference.md                  | 1 +
 4 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/defaults/dependencies.json b/defaults/dependencies.json
index 657dafa5..07d35ea9 100644
--- a/defaults/dependencies.json
+++ b/defaults/dependencies.json
@@ -1,8 +1,8 @@
 {
     "master_version": "5.1",
     "containers": {
-        "rl_coach": "5.1.1",
-        "robomaker": "5.1.1",
+        "rl_coach": "5.1.2",
+        "robomaker": "5.1.2",
         "sagemaker": "5.1.1"
     }
 }
diff --git a/defaults/template-run.env b/defaults/template-run.env
index 55da3c75..f56dd841 100644
--- a/defaults/template-run.env
+++ b/defaults/template-run.env
@@ -32,6 +32,7 @@ DR_TRAIN_MULTI_CONFIG=False
 DR_TRAIN_MIN_EVAL_TRIALS=5
 DR_TRAIN_BEST_MODEL_METRIC=progress
 #DR_TRAIN_RTF=1.0
+#DR_TRAIN_MAX_STEPS_PER_ITERATION=10000
 DR_LOCAL_S3_MODEL_PREFIX=rl-deepracer-sagemaker
 DR_LOCAL_S3_PRETRAINED=False
 DR_LOCAL_S3_PRETRAINED_PREFIX=rl-sagemaker-pretrained
diff --git a/docker/docker-compose-training.yml b/docker/docker-compose-training.yml
index 0924e6c0..8ca4dbd8 100644
--- a/docker/docker-compose-training.yml
+++ b/docker/docker-compose-training.yml
@@ -19,6 +19,7 @@ services:
       - HYPERPARAMETER_FILE_S3_KEY=${DR_LOCAL_S3_HYPERPARAMETERS_KEY}
       - MODELMETADATA_FILE_S3_KEY=${DR_LOCAL_S3_MODEL_METADATA_KEY}
       - CUDA_VISIBLE_DEVICES=${DR_SAGEMAKER_CUDA_DEVICES:-}
+      - MAX_MEMORY_STEPS=${DR_TRAIN_MAX_STEPS_PER_ITERATION:-}
     volumes:
       - "/var/run/docker.sock:/var/run/docker.sock"
       - "/tmp/sagemaker:/tmp/sagemaker"
diff --git a/docs/reference.md b/docs/reference.md
index 7ef5c44d..dadf5297 100644
--- a/docs/reference.md
+++ b/docs/reference.md
@@ -27,6 +27,7 @@ The scripts assume that two files `system.env` containing constant configuration
 | `DR_TRAIN_MIN_EVAL_TRIALS` | The minimum number of evaluation trials run between each training iteration.  Evaluations will continue as long as policy training is occuring and may be more than this number.  This establishes the minimum, and is generally useful if you want to speed up training especially when using gpu sagemaker containers.|
 | `DR_TRAIN_REVERSE_DIRECTION` | Set to `True` to reverse the direction in which the car traverses the track. |
 | `DR_TRAIN_BEST_MODEL_METRIC` | Can be used to control which model is kept as the "best" model. Set to `progress` to select the model with the highest evaluation completion percentage, set to `reward` to select the model with the highest evaluation reward.|
+| `DR_TRAIN_MAX_STEPS_PER_ITERATION` | Can be used to control the max number of steps per iteration to use for learning, the excess steps will be discarded to avoid out-of-memory situations, default is 10000. |
 | `DR_LOCAL_S3_PRETRAINED` | Determines if training or evaluation shall be based on the model created in a previous session, held in `s3://{DR_LOCAL_S3_BUCKET}/{LOCAL_S3_PRETRAINED_PREFIX}`, accessible by credentials held in profile `{DR_LOCAL_S3_PROFILE}`.|
 | `DR_LOCAL_S3_PRETRAINED_PREFIX` | Prefix of pretrained model within S3 bucket.|
 | `DR_LOCAL_S3_MODEL_PREFIX` | Prefix of model within S3 bucket.|