deploy: 24fb0c2

robert-lieck · Feb 3, 2025 · e2c265f · e2c265f
1 parent 09daed3
commit e2c265f
Show file tree

Hide file tree

Showing 46 changed files with 185 additions and 200 deletions.
diff --git a/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip b/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
diff --git a/_downloads/11d6d43464c14efce21535e2d86cf9ef/Coursework_Template.py b/_downloads/11d6d43464c14efce21535e2d86cf9ef/Coursework_Template.py
@@ -46,8 +46,11 @@ def __init__(self):
     def sample_action(self, s):
         return torch.rand(act_dim) * 2 - 1 # unifrom random in [-1, 1]
 
+    def put_data(self, action, observation, reward):
+        pass
+
     def train(self):
-        return
+        pass
 
 
 # %%
@@ -119,6 +122,9 @@ def train(self):
         # take action in the environment
         observation, reward, terminated, truncated, info = env.step(action)
 
+        # remember
+        agent.put_data(action, observation, reward)
+
         # check whether done
         done = terminated or truncated
 
@@ -140,52 +146,32 @@ def train(self):
 env.write_log(folder="logs", file="xxxx00-agent-log.txt")  # replace xxxx00 with your username
 
 
-# A small demo with a predefined heuristic that is suboptimal and has no notion of balance...
+# A small demo with a predefined heuristic that is suboptimal and has no notion of balance (and is designed for the orignal BipedalWalker environment)...
 
 # %%
 
 
 from gymnasium.envs.box2d.bipedal_walker import BipedalWalkerHeuristics
 
-env = rld.make("rldurham/Walker", render_mode="human")
-# env = rld.make("rldurham/Walker", render_mode="human", hardcore=True)
+env = rld.make(
+    "rldurham/Walker",
+    # "BipedalWalker-v3",
+    render_mode="human",
+    # render_mode="rgb_array",
+    hardcore=False,
+    # hardcore=True,
+)
+_, obs, info = rld.seed_everything(42, env)
 
 heuristics = BipedalWalkerHeuristics()
 
-obs, info = env.reset(seed=0)
 act = heuristics.step_heuristic(obs)
-for _ in range(1000):
+for _ in range(500):
     obs, rew, terminated, truncated, info = env.step(act)
     act = heuristics.step_heuristic(obs)
     if terminated or truncated:
         break
+    if env.render_mode == "rgb_array":
+        rld.render(env, clear=True)
 env.close()
-
-
-# In[ ]:
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 
diff --git a/_downloads/131b2b4f284f8b6a2286f29077475f41/Coursework_Template.ipynb b/_downloads/131b2b4f284f8b6a2286f29077475f41/Coursework_Template.ipynb
@@ -62,7 +62,7 @@
       },
       "outputs": [],
       "source": [
-        "class Agent(torch.nn.Module):\n    def __init__(self):\n        super(Agent, self).__init__()\n\n    def sample_action(self, s):\n        return torch.rand(act_dim) * 2 - 1 # unifrom random in [-1, 1]\n\n    def train(self):\n        return"
+        "class Agent(torch.nn.Module):\n    def __init__(self):\n        super(Agent, self).__init__()\n\n    def sample_action(self, s):\n        return torch.rand(act_dim) * 2 - 1 # unifrom random in [-1, 1]\n\n    def put_data(self, action, observation, reward):\n        pass\n\n    def train(self):\n        pass"
       ]
     },
     {
@@ -91,7 +91,7 @@
       },
       "outputs": [],
       "source": [
-        "# in the submission please use seed_everything with seed 42 for verification\nseed, observation, info = rld.seed_everything(42, env)\n\n# initialise agent\nagent = Agent()\nmax_episodes = 100\nmax_timesteps = 2000\n\n# track statistics for plotting\ntracker = rld.InfoTracker()\n\n# switch video recording off (only switch on every x episodes as this is slow)\nenv.video = False\n\n# training procedure\nfor episode in range(max_episodes):\n    \n    # recording statistics and video can be switched on and off (video recording is slow!)\n    # env.info = episode % 10 == 0   # track every x episodes (usually tracking every episode is fine)\n    # env.video = episode % 10 == 0  # record videos every x episodes (set BEFORE calling reset!)\n\n    # reset for new episode\n    observation, info = env.reset()\n\n    # run episode\n    for t in range(max_timesteps):\n        \n        # select the agent action\n        action = agent.sample_action(observation)\n\n        # take action in the environment\n        observation, reward, terminated, truncated, info = env.step(action)\n\n        # check whether done\n        done = terminated or truncated\n\n        # stop episode\n        if done:\n            break\n\n    # TRAIN THE AGENT HERE!\n            \n    # track and plot statistics\n    tracker.track(info)\n    if (episode + 1) % 10 == 0:\n        tracker.plot(r_mean_=True, r_std_=True, r_sum=dict(linestyle=':', marker='x'))\n\n# don't forget to close environment (e.g. triggers last video save)\nenv.close()\n\n# write log file (for coursework)\nenv.write_log(folder=\"logs\", file=\"xxxx00-agent-log.txt\")  # replace xxxx00 with your username\n\n\n# A small demo with a predefined heuristic that is suboptimal and has no notion of balance..."
+        "# in the submission please use seed_everything with seed 42 for verification\nseed, observation, info = rld.seed_everything(42, env)\n\n# initialise agent\nagent = Agent()\nmax_episodes = 100\nmax_timesteps = 2000\n\n# track statistics for plotting\ntracker = rld.InfoTracker()\n\n# switch video recording off (only switch on every x episodes as this is slow)\nenv.video = False\n\n# training procedure\nfor episode in range(max_episodes):\n    \n    # recording statistics and video can be switched on and off (video recording is slow!)\n    # env.info = episode % 10 == 0   # track every x episodes (usually tracking every episode is fine)\n    # env.video = episode % 10 == 0  # record videos every x episodes (set BEFORE calling reset!)\n\n    # reset for new episode\n    observation, info = env.reset()\n\n    # run episode\n    for t in range(max_timesteps):\n        \n        # select the agent action\n        action = agent.sample_action(observation)\n\n        # take action in the environment\n        observation, reward, terminated, truncated, info = env.step(action)\n\n        # remember\n        agent.put_data(action, observation, reward)\n\n        # check whether done\n        done = terminated or truncated\n\n        # stop episode\n        if done:\n            break\n\n    # TRAIN THE AGENT HERE!\n            \n    # track and plot statistics\n    tracker.track(info)\n    if (episode + 1) % 10 == 0:\n        tracker.plot(r_mean_=True, r_std_=True, r_sum=dict(linestyle=':', marker='x'))\n\n# don't forget to close environment (e.g. triggers last video save)\nenv.close()\n\n# write log file (for coursework)\nenv.write_log(folder=\"logs\", file=\"xxxx00-agent-log.txt\")  # replace xxxx00 with your username\n\n\n# A small demo with a predefined heuristic that is suboptimal and has no notion of balance (and is designed for the orignal BipedalWalker environment)..."
       ]
     },
     {
@@ -102,7 +102,7 @@
       },
       "outputs": [],
       "source": [
-        "from gymnasium.envs.box2d.bipedal_walker import BipedalWalkerHeuristics\n\nenv = rld.make(\"rldurham/Walker\", render_mode=\"human\")\n# env = rld.make(\"rldurham/Walker\", render_mode=\"human\", hardcore=True)\n\nheuristics = BipedalWalkerHeuristics()\n\nobs, info = env.reset(seed=0)\nact = heuristics.step_heuristic(obs)\nfor _ in range(1000):\n    obs, rew, terminated, truncated, info = env.step(act)\n    act = heuristics.step_heuristic(obs)\n    if terminated or truncated:\n        break\nenv.close()\n\n\n# In[ ]:"
+        "from gymnasium.envs.box2d.bipedal_walker import BipedalWalkerHeuristics\n\nenv = rld.make(\n    \"rldurham/Walker\",\n    # \"BipedalWalker-v3\",\n    render_mode=\"human\",\n    # render_mode=\"rgb_array\",\n    hardcore=False,\n    # hardcore=True,\n)\n_, obs, info = rld.seed_everything(42, env)\n\nheuristics = BipedalWalkerHeuristics()\n\nact = heuristics.step_heuristic(obs)\nfor _ in range(500):\n    obs, rew, terminated, truncated, info = env.step(act)\n    act = heuristics.step_heuristic(obs)\n    if terminated or truncated:\n        break\n    if env.render_mode == \"rgb_array\":\n        rld.render(env, clear=True)\nenv.close()"
       ]
     }
   ],

diff --git a/_downloads/13b21f3c8642c194cb8dda9e06e91bf0/Coursework_Template.zip b/_downloads/13b21f3c8642c194cb8dda9e06e91bf0/Coursework_Template.zip
diff --git a/_downloads/2937269fcac6caa3e1e811a84beb9ca7/Practical_2_Multi-Armed_Bandits.zip b/_downloads/2937269fcac6caa3e1e811a84beb9ca7/Practical_2_Multi-Armed_Bandits.zip
diff --git a/_downloads/4986d0acbe56329fb4fc328da27b949a/Practical_3_Markov_Decision_Processes.zip b/_downloads/4986d0acbe56329fb4fc328da27b949a/Practical_3_Markov_Decision_Processes.zip
diff --git a/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip b/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
diff --git a/_downloads/76c6bc5123ddcaf25de0c73d6996ce14/Lecture_2_Gym.zip b/_downloads/76c6bc5123ddcaf25de0c73d6996ce14/Lecture_2_Gym.zip
diff --git a/_downloads/805b9f38a7c295615a7076de9b2bf145/Practical_4_Dynamic_Programming.zip b/_downloads/805b9f38a7c295615a7076de9b2bf145/Practical_4_Dynamic_Programming.zip
diff --git a/_downloads/d02344b65e2aab5876dc80a92b67685c/Lecture_4_Dynamic_Programming.zip b/_downloads/d02344b65e2aab5876dc80a92b67685c/Lecture_4_Dynamic_Programming.zip
diff --git a/_downloads/d28530d240daf6b6c64c084dc148da37/rldurham_basics.zip b/_downloads/d28530d240daf6b6c64c084dc148da37/rldurham_basics.zip
diff --git a/_images/sphx_glr_Coursework_Template_001.png b/_images/sphx_glr_Coursework_Template_001.png
diff --git a/_images/sphx_glr_Coursework_Template_002.png b/_images/sphx_glr_Coursework_Template_002.png
diff --git a/_images/sphx_glr_Coursework_Template_003.png b/_images/sphx_glr_Coursework_Template_003.png
diff --git a/_images/sphx_glr_Coursework_Template_004.png b/_images/sphx_glr_Coursework_Template_004.png
diff --git a/_images/sphx_glr_Coursework_Template_005.png b/_images/sphx_glr_Coursework_Template_005.png
diff --git a/_images/sphx_glr_Coursework_Template_006.png b/_images/sphx_glr_Coursework_Template_006.png
diff --git a/_images/sphx_glr_Coursework_Template_007.png b/_images/sphx_glr_Coursework_Template_007.png
diff --git a/_images/sphx_glr_Coursework_Template_008.png b/_images/sphx_glr_Coursework_Template_008.png
diff --git a/_images/sphx_glr_Coursework_Template_009.png b/_images/sphx_glr_Coursework_Template_009.png
diff --git a/_images/sphx_glr_Coursework_Template_010.png b/_images/sphx_glr_Coursework_Template_010.png
diff --git a/_images/sphx_glr_Coursework_Template_011.png b/_images/sphx_glr_Coursework_Template_011.png
diff --git a/_images/sphx_glr_Coursework_Template_thumb.png b/_images/sphx_glr_Coursework_Template_thumb.png
diff --git a/_images/sphx_glr_Lecture_2_Gym_012.png b/_images/sphx_glr_Lecture_2_Gym_012.png