Skip to content

Commit

Permalink
deploy: 24fb0c2
Browse files Browse the repository at this point in the history
  • Loading branch information
robert-lieck committed Feb 3, 2025
1 parent 09daed3 commit e2c265f
Show file tree
Hide file tree
Showing 46 changed files with 185 additions and 200 deletions.
Binary file not shown.
54 changes: 20 additions & 34 deletions _downloads/11d6d43464c14efce21535e2d86cf9ef/Coursework_Template.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,11 @@ def __init__(self):
def sample_action(self, s):
return torch.rand(act_dim) * 2 - 1 # unifrom random in [-1, 1]

def put_data(self, action, observation, reward):
pass

def train(self):
return
pass


# %%
Expand Down Expand Up @@ -119,6 +122,9 @@ def train(self):
# take action in the environment
observation, reward, terminated, truncated, info = env.step(action)

# remember
agent.put_data(action, observation, reward)

# check whether done
done = terminated or truncated

Expand All @@ -140,52 +146,32 @@ def train(self):
env.write_log(folder="logs", file="xxxx00-agent-log.txt") # replace xxxx00 with your username


# A small demo with a predefined heuristic that is suboptimal and has no notion of balance...
# A small demo with a predefined heuristic that is suboptimal and has no notion of balance (and is designed for the orignal BipedalWalker environment)...

# %%


from gymnasium.envs.box2d.bipedal_walker import BipedalWalkerHeuristics

env = rld.make("rldurham/Walker", render_mode="human")
# env = rld.make("rldurham/Walker", render_mode="human", hardcore=True)
env = rld.make(
"rldurham/Walker",
# "BipedalWalker-v3",
render_mode="human",
# render_mode="rgb_array",
hardcore=False,
# hardcore=True,
)
_, obs, info = rld.seed_everything(42, env)

heuristics = BipedalWalkerHeuristics()

obs, info = env.reset(seed=0)
act = heuristics.step_heuristic(obs)
for _ in range(1000):
for _ in range(500):
obs, rew, terminated, truncated, info = env.step(act)
act = heuristics.step_heuristic(obs)
if terminated or truncated:
break
if env.render_mode == "rgb_array":
rld.render(env, clear=True)
env.close()


# In[ ]:


























Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
},
"outputs": [],
"source": [
"class Agent(torch.nn.Module):\n def __init__(self):\n super(Agent, self).__init__()\n\n def sample_action(self, s):\n return torch.rand(act_dim) * 2 - 1 # unifrom random in [-1, 1]\n\n def train(self):\n return"
"class Agent(torch.nn.Module):\n def __init__(self):\n super(Agent, self).__init__()\n\n def sample_action(self, s):\n return torch.rand(act_dim) * 2 - 1 # unifrom random in [-1, 1]\n\n def put_data(self, action, observation, reward):\n pass\n\n def train(self):\n pass"
]
},
{
Expand Down Expand Up @@ -91,7 +91,7 @@
},
"outputs": [],
"source": [
"# in the submission please use seed_everything with seed 42 for verification\nseed, observation, info = rld.seed_everything(42, env)\n\n# initialise agent\nagent = Agent()\nmax_episodes = 100\nmax_timesteps = 2000\n\n# track statistics for plotting\ntracker = rld.InfoTracker()\n\n# switch video recording off (only switch on every x episodes as this is slow)\nenv.video = False\n\n# training procedure\nfor episode in range(max_episodes):\n \n # recording statistics and video can be switched on and off (video recording is slow!)\n # env.info = episode % 10 == 0 # track every x episodes (usually tracking every episode is fine)\n # env.video = episode % 10 == 0 # record videos every x episodes (set BEFORE calling reset!)\n\n # reset for new episode\n observation, info = env.reset()\n\n # run episode\n for t in range(max_timesteps):\n \n # select the agent action\n action = agent.sample_action(observation)\n\n # take action in the environment\n observation, reward, terminated, truncated, info = env.step(action)\n\n # check whether done\n done = terminated or truncated\n\n # stop episode\n if done:\n break\n\n # TRAIN THE AGENT HERE!\n \n # track and plot statistics\n tracker.track(info)\n if (episode + 1) % 10 == 0:\n tracker.plot(r_mean_=True, r_std_=True, r_sum=dict(linestyle=':', marker='x'))\n\n# don't forget to close environment (e.g. triggers last video save)\nenv.close()\n\n# write log file (for coursework)\nenv.write_log(folder=\"logs\", file=\"xxxx00-agent-log.txt\") # replace xxxx00 with your username\n\n\n# A small demo with a predefined heuristic that is suboptimal and has no notion of balance..."
"# in the submission please use seed_everything with seed 42 for verification\nseed, observation, info = rld.seed_everything(42, env)\n\n# initialise agent\nagent = Agent()\nmax_episodes = 100\nmax_timesteps = 2000\n\n# track statistics for plotting\ntracker = rld.InfoTracker()\n\n# switch video recording off (only switch on every x episodes as this is slow)\nenv.video = False\n\n# training procedure\nfor episode in range(max_episodes):\n \n # recording statistics and video can be switched on and off (video recording is slow!)\n # env.info = episode % 10 == 0 # track every x episodes (usually tracking every episode is fine)\n # env.video = episode % 10 == 0 # record videos every x episodes (set BEFORE calling reset!)\n\n # reset for new episode\n observation, info = env.reset()\n\n # run episode\n for t in range(max_timesteps):\n \n # select the agent action\n action = agent.sample_action(observation)\n\n # take action in the environment\n observation, reward, terminated, truncated, info = env.step(action)\n\n # remember\n agent.put_data(action, observation, reward)\n\n # check whether done\n done = terminated or truncated\n\n # stop episode\n if done:\n break\n\n # TRAIN THE AGENT HERE!\n \n # track and plot statistics\n tracker.track(info)\n if (episode + 1) % 10 == 0:\n tracker.plot(r_mean_=True, r_std_=True, r_sum=dict(linestyle=':', marker='x'))\n\n# don't forget to close environment (e.g. triggers last video save)\nenv.close()\n\n# write log file (for coursework)\nenv.write_log(folder=\"logs\", file=\"xxxx00-agent-log.txt\") # replace xxxx00 with your username\n\n\n# A small demo with a predefined heuristic that is suboptimal and has no notion of balance (and is designed for the orignal BipedalWalker environment)..."
]
},
{
Expand All @@ -102,7 +102,7 @@
},
"outputs": [],
"source": [
"from gymnasium.envs.box2d.bipedal_walker import BipedalWalkerHeuristics\n\nenv = rld.make(\"rldurham/Walker\", render_mode=\"human\")\n# env = rld.make(\"rldurham/Walker\", render_mode=\"human\", hardcore=True)\n\nheuristics = BipedalWalkerHeuristics()\n\nobs, info = env.reset(seed=0)\nact = heuristics.step_heuristic(obs)\nfor _ in range(1000):\n obs, rew, terminated, truncated, info = env.step(act)\n act = heuristics.step_heuristic(obs)\n if terminated or truncated:\n break\nenv.close()\n\n\n# In[ ]:"
"from gymnasium.envs.box2d.bipedal_walker import BipedalWalkerHeuristics\n\nenv = rld.make(\n \"rldurham/Walker\",\n # \"BipedalWalker-v3\",\n render_mode=\"human\",\n # render_mode=\"rgb_array\",\n hardcore=False,\n # hardcore=True,\n)\n_, obs, info = rld.seed_everything(42, env)\n\nheuristics = BipedalWalkerHeuristics()\n\nact = heuristics.step_heuristic(obs)\nfor _ in range(500):\n obs, rew, terminated, truncated, info = env.step(act)\n act = heuristics.step_heuristic(obs)\n if terminated or truncated:\n break\n if env.render_mode == \"rgb_array\":\n rld.render(env, clear=True)\nenv.close()"
]
}
],
Expand Down
Binary file modified _downloads/13b21f3c8642c194cb8dda9e06e91bf0/Coursework_Template.zip
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified _downloads/76c6bc5123ddcaf25de0c73d6996ce14/Lecture_2_Gym.zip
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified _downloads/d28530d240daf6b6c64c084dc148da37/rldurham_basics.zip
Binary file not shown.
Binary file modified _images/sphx_glr_Coursework_Template_001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_002.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_003.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_004.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_005.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_006.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_007.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_008.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_009.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_010.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_011.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Coursework_Template_thumb.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified _images/sphx_glr_Lecture_2_Gym_012.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit e2c265f

Please sign in to comment.