Skip to content

Commit

Permalink
deploy: c37af13
Browse files Browse the repository at this point in the history
  • Loading branch information
robert-lieck committed Feb 3, 2025
1 parent e2c265f commit 846b0bc
Show file tree
Hide file tree
Showing 40 changed files with 72 additions and 71 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def train(self):


env = rld.make("rldurham/Walker", render_mode="rgb_array")
# env = rld.make("rldurham/Walker", render_mode="rgb_array", hardcore=True) # only attempt this when your agent has solved WalkerEasy
# env = rld.make("rldurham/Walker", render_mode="rgb_array", hardcore=True) # only attempt this when your agent has solved the non-hardcore version

# get statistics, logs, and videos
env = rld.Recorder(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
},
"outputs": [],
"source": [
"env = rld.make(\"rldurham/Walker\", render_mode=\"rgb_array\")\n# env = rld.make(\"rldurham/Walker\", render_mode=\"rgb_array\", hardcore=True) # only attempt this when your agent has solved WalkerEasy\n\n# get statistics, logs, and videos\nenv = rld.Recorder(\n env,\n smoothing=10, # track rolling averages (useful for plotting)\n video=True, # enable recording videos\n video_folder=\"videos\", # folder for videos\n video_prefix=\"xxxx00-agent-video\", # prefix for videos (replace xxxx00 with your username)\n logs=True, # keep logs\n)\n\n# training on CPU recommended\nrld.check_device()\n\n# environment info\ndiscrete_act, discrete_obs, act_dim, obs_dim = rld.env_info(env, print_out=True)\n\n# render start image\nenv.reset(seed=42)\nrld.render(env)"
"env = rld.make(\"rldurham/Walker\", render_mode=\"rgb_array\")\n# env = rld.make(\"rldurham/Walker\", render_mode=\"rgb_array\", hardcore=True) # only attempt this when your agent has solved the non-hardcore version\n\n# get statistics, logs, and videos\nenv = rld.Recorder(\n env,\n smoothing=10, # track rolling averages (useful for plotting)\n video=True, # enable recording videos\n video_folder=\"videos\", # folder for videos\n video_prefix=\"xxxx00-agent-video\", # prefix for videos (replace xxxx00 with your username)\n logs=True, # keep logs\n)\n\n# training on CPU recommended\nrld.check_device()\n\n# environment info\ndiscrete_act, discrete_obs, act_dim, obs_dim = rld.env_info(env, print_out=True)\n\n# render start image\nenv.reset(seed=42)\nrld.render(env)"
]
},
{
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified _downloads/76c6bc5123ddcaf25de0c73d6996ce14/Lecture_2_Gym.zip
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified _downloads/d28530d240daf6b6c64c084dc148da37/rldurham_basics.zip
Binary file not shown.
Binary file modified _images/sphx_glr_Coursework_Template_002.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_003.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_004.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_005.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_006.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_007.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_008.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_009.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_010.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_011.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Lecture_2_Gym_012.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
11 changes: 6 additions & 5 deletions _modules/rldurham.html
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ <h1>Source code for rldurham</h1><div class="highlight"><pre>
<a class="viewcode-back" href="../_autosummary/rldurham.RLDurhamEnv.html#rldurham.RLDurhamEnv.step">[docs]</a>
<span class="k">def</span> <span class="nf">step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action</span><span class="p">):</span>
<span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">action</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_unscaled_reward</span> <span class="o">=</span> <span class="n">reward</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_unscaled_reward</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">reward</span><span class="p">)</span> <span class="c1"># convert to float in case it is tensor/array</span>
<span class="k">return</span> <span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span></div>
</div>

Expand Down Expand Up @@ -381,10 +381,10 @@ <h1>Source code for rldurham</h1><div class="highlight"><pre>

<span class="c1"># episode stats</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_count</span> <span class="o">=</span> <span class="mi">0</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_reward_sum</span> <span class="o">=</span> <span class="mi">0</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_reward_sum_unscaled</span> <span class="o">=</span> <span class="mi">0</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_squared_reward_sum</span> <span class="o">=</span> <span class="mi">0</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_squared_reward_sum_unscaled</span> <span class="o">=</span> <span class="mi">0</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_reward_sum</span> <span class="o">=</span> <span class="mf">0.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_reward_sum_unscaled</span> <span class="o">=</span> <span class="mf">0.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_squared_reward_sum</span> <span class="o">=</span> <span class="mf">0.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_squared_reward_sum_unscaled</span> <span class="o">=</span> <span class="mf">0.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_length</span> <span class="o">=</span> <span class="mi">0</span>
<span class="c1"># logging statistics</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_count_log</span> <span class="o">=</span> <span class="p">[]</span>
Expand Down Expand Up @@ -417,6 +417,7 @@ <h1>Source code for rldurham</h1><div class="highlight"><pre>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_started</span> <span class="o">=</span> <span class="kc">True</span>
<span class="n">obs</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">action</span><span class="p">)</span>

<span class="n">reward</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">reward</span><span class="p">)</span> <span class="c1"># convert to float in case it is tensor/array</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_reward_sum</span> <span class="o">+=</span> <span class="n">reward</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_reward_sum_unscaled</span> <span class="o">+=</span> <span class="n">getwrappedattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s2">&quot;_unscaled_reward&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_episode_squared_reward_sum</span> <span class="o">+=</span> <span class="n">reward</span> <span class="o">**</span> <span class="mi">2</span>
Expand Down
4 changes: 2 additions & 2 deletions _sources/auto_examples/Coursework_Template.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ Prepare the environment and wrap it to capture statistics, logs, and videos
env = rld.make("rldurham/Walker", render_mode="rgb_array")
# env = rld.make("rldurham/Walker", render_mode="rgb_array", hardcore=True) # only attempt this when your agent has solved WalkerEasy
# env = rld.make("rldurham/Walker", render_mode="rgb_array", hardcore=True) # only attempt this when your agent has solved the non-hardcore version
# get statistics, logs, and videos
env = rld.Recorder(
Expand Down Expand Up @@ -364,7 +364,7 @@ Prepare the environment and wrap it to capture statistics, logs, and videos
.. rst-class:: sphx-glr-timing

**Total running time of the script:** (1 minutes 14.750 seconds)
**Total running time of the script:** (1 minutes 14.565 seconds)


.. _sphx_glr_download_auto_examples_Coursework_Template.py:
Expand Down
18 changes: 9 additions & 9 deletions _sources/auto_examples/Lecture_2_Gym.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ Different environments

.. code-block:: none
Expand Down Expand Up @@ -727,7 +727,7 @@ Training an agent

.. code-block:: none
/home/runner/work/rldurham/rldurham/rldurham/__init__.py:414: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`.
/home/runner/work/rldurham/rldurham/rldurham/__init__.py:415: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`.
fig, ax = plt.subplots(1, 1)

Expand Down Expand Up @@ -891,16 +891,16 @@ Custom environments (here: multi-armed bandits)

.. code-block:: none
5 -25
4 -25
1 -24.9
3 -25
5 45
4 -19
5 45
0 -25
3 -25
4 -25
1 -25
3 -25
3 -25
2 -25
4 -25
1 -25
Expand Down Expand Up @@ -1282,7 +1282,7 @@ Custom environments (here: multi-armed bandits)
.. rst-class:: sphx-glr-timing

**Total running time of the script:** (0 minutes 7.234 seconds)
**Total running time of the script:** (0 minutes 6.948 seconds)


.. _sphx_glr_download_auto_examples_Lecture_2_Gym.py:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -958,7 +958,7 @@ Policy evaluation

.. rst-class:: sphx-glr-timing

**Total running time of the script:** (0 minutes 8.602 seconds)
**Total running time of the script:** (0 minutes 9.071 seconds)


.. _sphx_glr_download_auto_examples_Lecture_4_Dynamic_Programming.py:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ Practical 3: Markov Decision Processes
.. code-block:: none
[<matplotlib.lines.Line2D object at 0x7f8a0d1a0b90>, <matplotlib.lines.Line2D object at 0x7f8a0d1a1ac0>, <matplotlib.lines.Line2D object at 0x7f8a0d1a1b80>, <matplotlib.lines.Line2D object at 0x7f8a0d1a1f70>, <matplotlib.lines.Line2D object at 0x7f8a0d1a2030>, <matplotlib.lines.Line2D object at 0x7f8a0d1a1e50>, <matplotlib.lines.Line2D object at 0x7f8a0d1a21e0>, <matplotlib.lines.Line2D object at 0x7f8a0d1a2060>, <matplotlib.lines.Line2D object at 0x7f8a0d1a2300>, <matplotlib.lines.Line2D object at 0x7f8a0d1a24b0>]
[<matplotlib.lines.Line2D object at 0x7fc1360f2150>, <matplotlib.lines.Line2D object at 0x7fc1360f1ee0>, <matplotlib.lines.Line2D object at 0x7fc1360f2060>, <matplotlib.lines.Line2D object at 0x7fc1360f1f10>, <matplotlib.lines.Line2D object at 0x7fc1360f25a0>, <matplotlib.lines.Line2D object at 0x7fc1360f2540>, <matplotlib.lines.Line2D object at 0x7fc1360f2420>, <matplotlib.lines.Line2D object at 0x7fc1360f28d0>, <matplotlib.lines.Line2D object at 0x7fc1360f2720>, <matplotlib.lines.Line2D object at 0x7fc1360f2a20>]
Expand Down Expand Up @@ -158,7 +158,7 @@ Practical 3: Markov Decision Processes
.. code-block:: none
<matplotlib.legend.Legend object at 0x7f8a0d2efaa0>
<matplotlib.legend.Legend object at 0x7fc136048ef0>
Expand Down Expand Up @@ -238,7 +238,7 @@ Practical 3: Markov Decision Processes
.. code-block:: none
[<matplotlib.lines.Line2D object at 0x7f8a0ce1caa0>]
[<matplotlib.lines.Line2D object at 0x7fc13607ddc0>]
Expand Down Expand Up @@ -491,7 +491,7 @@ Practical 3: Markov Decision Processes
.. rst-class:: sphx-glr-timing

**Total running time of the script:** (0 minutes 2.140 seconds)
**Total running time of the script:** (0 minutes 2.113 seconds)


.. _sphx_glr_download_auto_examples_Practical_3_Markov_Decision_Processes.py:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ Practical 4: DynamicProgramming
.. rst-class:: sphx-glr-timing

**Total running time of the script:** (0 minutes 1.552 seconds)
**Total running time of the script:** (0 minutes 1.622 seconds)


.. _sphx_glr_download_auto_examples_Practical_4_Dynamic_Programming.py:
Expand Down
2 changes: 1 addition & 1 deletion _sources/auto_examples/rldurham_basics.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ Render the environment with matplotlib (essentially a single video frame; requir

.. rst-class:: sphx-glr-timing

**Total running time of the script:** (0 minutes 1.206 seconds)
**Total running time of the script:** (0 minutes 1.229 seconds)


.. _sphx_glr_download_auto_examples_rldurham_basics.py:
Expand Down
14 changes: 7 additions & 7 deletions _sources/auto_examples/sg_execution_times.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

Computation times
=================
**01:35.509** total execution time for 7 files **from auto_examples**:
**01:35.572** total execution time for 7 files **from auto_examples**:

.. container::

Expand All @@ -33,22 +33,22 @@ Computation times
- Time
- Mem (MB)
* - :ref:`sphx_glr_auto_examples_Coursework_Template.py` (``Coursework_Template.py``)
- 01:14.750
- 01:14.565
- 0.0
* - :ref:`sphx_glr_auto_examples_Lecture_4_Dynamic_Programming.py` (``Lecture_4_Dynamic_Programming.py``)
- 00:08.602
- 00:09.071
- 0.0
* - :ref:`sphx_glr_auto_examples_Lecture_2_Gym.py` (``Lecture_2_Gym.py``)
- 00:07.234
- 00:06.948
- 0.0
* - :ref:`sphx_glr_auto_examples_Practical_3_Markov_Decision_Processes.py` (``Practical_3_Markov_Decision_Processes.py``)
- 00:02.140
- 00:02.113
- 0.0
* - :ref:`sphx_glr_auto_examples_Practical_4_Dynamic_Programming.py` (``Practical_4_Dynamic_Programming.py``)
- 00:01.552
- 00:01.622
- 0.0
* - :ref:`sphx_glr_auto_examples_rldurham_basics.py` (``rldurham_basics.py``)
- 00:01.206
- 00:01.229
- 0.0
* - :ref:`sphx_glr_auto_examples_Practical_2_Multi-Armed_Bandits.py` (``Practical_2_Multi-Armed_Bandits.py``)
- 00:00.024
Expand Down
14 changes: 7 additions & 7 deletions _sources/sg_execution_times.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

Computation times
=================
**01:35.509** total execution time for 7 files **from all galleries**:
**01:35.572** total execution time for 7 files **from all galleries**:

.. container::

Expand All @@ -33,22 +33,22 @@ Computation times
- Time
- Mem (MB)
* - :ref:`sphx_glr_auto_examples_Coursework_Template.py` (``../examples/Coursework_Template.py``)
- 01:14.750
- 01:14.565
- 0.0
* - :ref:`sphx_glr_auto_examples_Lecture_4_Dynamic_Programming.py` (``../examples/Lecture_4_Dynamic_Programming.py``)
- 00:08.602
- 00:09.071
- 0.0
* - :ref:`sphx_glr_auto_examples_Lecture_2_Gym.py` (``../examples/Lecture_2_Gym.py``)
- 00:07.234
- 00:06.948
- 0.0
* - :ref:`sphx_glr_auto_examples_Practical_3_Markov_Decision_Processes.py` (``../examples/Practical_3_Markov_Decision_Processes.py``)
- 00:02.140
- 00:02.113
- 0.0
* - :ref:`sphx_glr_auto_examples_Practical_4_Dynamic_Programming.py` (``../examples/Practical_4_Dynamic_Programming.py``)
- 00:01.552
- 00:01.622
- 0.0
* - :ref:`sphx_glr_auto_examples_rldurham_basics.py` (``../examples/rldurham_basics.py``)
- 00:01.206
- 00:01.229
- 0.0
* - :ref:`sphx_glr_auto_examples_Practical_2_Multi-Armed_Bandits.py` (``../examples/Practical_2_Multi-Armed_Bandits.py``)
- 00:00.024
Expand Down
4 changes: 2 additions & 2 deletions auto_examples/Coursework_Template.html
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ <h2>Reinforcement learning agent<a class="headerlink" href="#reinforcement-learn
<section id="prepare-the-environment-and-wrap-it-to-capture-statistics-logs-and-videos">
<h2>Prepare the environment and wrap it to capture statistics, logs, and videos<a class="headerlink" href="#prepare-the-environment-and-wrap-it-to-capture-statistics-logs-and-videos" title="Link to this heading"></a></h2>
<div class="highlight-Python notranslate"><div class="highlight"><pre><span></span><span class="n">env</span> <span class="o">=</span> <span class="n">rld</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s2">&quot;rldurham/Walker&quot;</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s2">&quot;rgb_array&quot;</span><span class="p">)</span>
<span class="c1"># env = rld.make(&quot;rldurham/Walker&quot;, render_mode=&quot;rgb_array&quot;, hardcore=True) # only attempt this when your agent has solved WalkerEasy</span>
<span class="c1"># env = rld.make(&quot;rldurham/Walker&quot;, render_mode=&quot;rgb_array&quot;, hardcore=True) # only attempt this when your agent has solved the non-hardcore version</span>

<span class="c1"># get statistics, logs, and videos</span>
<span class="n">env</span> <span class="o">=</span> <span class="n">rld</span><span class="o">.</span><span class="n">Recorder</span><span class="p">(</span>
Expand Down Expand Up @@ -264,7 +264,7 @@ <h2>Prepare the environment and wrap it to capture statistics, logs, and videos<
<span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
</pre></div>
</div>
<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (1 minutes 14.750 seconds)</p>
<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> (1 minutes 14.565 seconds)</p>
<div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-auto-examples-coursework-template-py">
<div class="sphx-glr-download sphx-glr-download-jupyter docutils container">
<p><a class="reference download internal" download="" href="../_downloads/131b2b4f284f8b6a2286f29077475f41/Coursework_Template.ipynb"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Jupyter</span> <span class="pre">notebook:</span> <span class="pre">Coursework_Template.ipynb</span></code></a></p>
Expand Down
Loading

0 comments on commit 846b0bc

Please sign in to comment.