Skip to content

Commit

Permalink
update practical 4
Browse files Browse the repository at this point in the history
  • Loading branch information
robert-lieck committed Feb 4, 2025
1 parent c37af13 commit 3aaa040
Show file tree
Hide file tree
Showing 3 changed files with 313 additions and 102 deletions.
89 changes: 69 additions & 20 deletions examples/Practical_4_Dynamic_Programming.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,38 @@
import rldurham as rld


# ## Frozen Lake Environment

# %%


name = 'FrozenLake-v1' # small version
# name = 'FrozenLake8x8-v1' # larger version
env = rld.make(name, is_slippery=False)
env = rld.make(
'FrozenLake-v1', # small version
# 'FrozenLake8x8-v1', # larger version
# desc=["GFFS", "FHFH", "FFFH", "HFFG"], # custom map
render_mode="rgb_array", # for rendering as image/video
is_slippery=False, # warning: slippery=True results in complex dynamics
)
rld.env_info(env, print_out=True)
rld.seed_everything(42, env)
LEFT, DOWN, RIGHT, UP = 0, 1, 2, 3


# %%


print('action space: ' + str(env.action_space))
print('reward range: ' + str(env.reward_range))
print('observation space: ' + str(env.observation_space))
rld.plot_frozenlake(env=env)
# render the environment (requires render_mode="rgb_array")
rld.render(env)


# %%


# helper function that can also plot policies and value functions
rld.plot_frozenlake(env=env,
v=np.random.uniform(0, 1, 16),
policy=np.random.uniform(0, 1, (16, 4)),
draw_vals=True)


# %%
Expand All @@ -40,6 +55,8 @@ def uniform_policy(env):
rld.plot_frozenlake(env=env, policy=uniform_policy(env))


# ## Policy Evaluation

# %%


Expand Down Expand Up @@ -88,13 +105,26 @@ def policy_eval_step_inplace(env, policy, gamma, v_init=None):
# %%


def policy_evaluation(env, policy, gamma, v_init=None,
print_iter=False, atol=1e-8, max_iter=10**10):
v = np.zeros(env.observation_space.n)


# %%


v = policy_eval_step_inplace(env, uniform_policy(env), 1, v)
rld.plot_frozenlake(env, v, uniform_policy(env), draw_vals=True)


# %%


def policy_evaluation(env, policy, gamma, v_init=None, print_iter=False, atol=1e-8, max_iter=10**10):
if v_init is None:
v_init = np.zeros(env.observation_space.n)
v = v_init
for i in range(1, max_iter + 1):
new_v = policy_eval_step(env, policy, gamma, v)
# new_v = policy_eval_step_inplace(env, policy, gamma, v)
if np.allclose(v, new_v, atol=atol):
break
v = new_v
Expand All @@ -106,6 +136,15 @@ def policy_evaluation(env, policy, gamma, v_init=None,
# %%


v = policy_evaluation(env, uniform_policy(env), 1, print_iter=True)
rld.plot_frozenlake(env, v, uniform_policy(env), draw_vals=True)


# ## Policy Improvement

# %%


def q_from_v(env, v, s, gamma):
q = np.zeros(env.action_space.n)
for a in range(env.action_space.n):
Expand Down Expand Up @@ -136,31 +175,41 @@ def policy_improvement(env, v, gamma, deterministic=False):

env = rld.make('FrozenLake8x8-v1', is_slippery=False)
rld.seed_everything(42, env)
gamma = 1
policy = uniform_policy(env)
v = policy_evaluation(env, policy, gamma=1)
rld.plot_frozenlake(env, v, policy, draw_vals=True)


# %%


new_policy = policy_improvement(env, v, gamma=1)
rld.plot_frozenlake(env, v, new_policy, draw_vals=True)
v = policy_evaluation(env, policy, gamma=gamma)
rld.plot_frozenlake(env, v=v, policy=policy, draw_vals=True)


# %%


gamma = 1
v = policy_evaluation(env, new_policy, gamma=gamma)
rld.plot_frozenlake(env, v=v, policy=new_policy, draw_vals=True)
print(v)
new_policy = policy_improvement(env, v, gamma=gamma)
rld.plot_frozenlake(env, v=v, policy=new_policy, draw_vals=True)
policy = policy_improvement(env, v, gamma=gamma)
rld.plot_frozenlake(env, v=v, policy=policy, draw_vals=True)


# ## Policy Iteration

# In[ ]:
# %%


env = rld.make('FrozenLake8x8-v1', is_slippery=False)
rld.seed_everything(42, env)
policy = uniform_policy(env)
gamma = 1


# %%


v = policy_evaluation(env, policy, gamma=gamma)
rld.plot_frozenlake(env, v=v, policy=policy, draw_vals=True)
print(v)
policy = policy_improvement(env, v, gamma=gamma)
rld.plot_frozenlake(env, v=v, policy=policy, draw_vals=True)

Loading

0 comments on commit 3aaa040

Please sign in to comment.