Skip to content

Commit

Permalink
version 0.0.5
Browse files Browse the repository at this point in the history
  • Loading branch information
robert-lieck committed Feb 3, 2025
1 parent 24fb0c2 commit c37af13
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 9 deletions.
2 changes: 1 addition & 1 deletion examples/Coursework_Template.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def train(self):


env = rld.make("rldurham/Walker", render_mode="rgb_array")
# env = rld.make("rldurham/Walker", render_mode="rgb_array", hardcore=True) # only attempt this when your agent has solved WalkerEasy
# env = rld.make("rldurham/Walker", render_mode="rgb_array", hardcore=True) # only attempt this when your agent has solved the non-hardcore version

# get statistics, logs, and videos
env = rld.Recorder(
Expand Down
4 changes: 2 additions & 2 deletions notebooks/Coursework_Template.ipynb

Large diffs are not rendered by default.

11 changes: 6 additions & 5 deletions rldurham/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def __getattr__(self, item):

def step(self, action):
obs, reward, terminated, truncated, info = super().step(action)
self._unscaled_reward = reward
self._unscaled_reward = float(reward) # convert to float in case it is tensor/array
return obs, reward, terminated, truncated, info


Expand Down Expand Up @@ -258,10 +258,10 @@ def __init__(self, env, info=True, video=False, logs=False, key="recorder",

# episode stats
self._episode_count = 0
self._episode_reward_sum = 0
self._episode_reward_sum_unscaled = 0
self._episode_squared_reward_sum = 0
self._episode_squared_reward_sum_unscaled = 0
self._episode_reward_sum = 0.
self._episode_reward_sum_unscaled = 0.
self._episode_squared_reward_sum = 0.
self._episode_squared_reward_sum_unscaled = 0.
self._episode_length = 0
# logging statistics
self._episode_count_log = []
Expand All @@ -285,6 +285,7 @@ def step(self, action):
self._episode_started = True
obs, reward, terminated, truncated, info = super().step(action)

reward = float(reward) # convert to float in case it is tensor/array
self._episode_reward_sum += reward
self._episode_reward_sum_unscaled += getwrappedattr(self, "_unscaled_reward")
self._episode_squared_reward_sum += reward ** 2
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setuptools.setup(
name="rldurham",
version="0.0.4",
version="0.0.5",
author="Robert Lieck",
author_email="[email protected]",
description="Python package for the Reinforcement Learning course at Durham University",
Expand Down

0 comments on commit c37af13

Please sign in to comment.