Skip to content

Commit

Permalink
Issue #10 Changed input to build_policy_network for fcn as single lis…
Browse files Browse the repository at this point in the history
…ts, and action_size
  • Loading branch information
i-gayo committed Aug 11, 2023
1 parent 617974c commit 550a314
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 18 deletions.
5 changes: 2 additions & 3 deletions demos/cartpole_single_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@
state_shape = env.observation_space.shape
action_space = env.action_space
num_actions = action_space.n
output_shape = (action_space.n,)

# Build policy network
policy_network = build_policy_network(state_shape,
output_shape,
action_space,
action_size = num_actions,
action_space = action_space,
policy_type = 'fcn',
layers = [[], [64, 32, 14]])

Expand Down
7 changes: 3 additions & 4 deletions demos/dummy_single_agent_continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,13 @@ def reset(self):
env = DummyEnv(action_size)

action_space = env.action_space
output_shape = env.action_space.shape
state_shape = env.observation_space.shape

policy_network = build_policy_network(state_shape,
output_shape,
action_space,
action_size = action_size,
action_space = action_space,
policy_type = 'fcn',
layers = [[], [128]],
layers = [128],
activation_fn = 'linear')

reinforce = REINFORCE(env, policy_network, scale=0.2)
Expand Down
7 changes: 3 additions & 4 deletions demos/dummy_single_agent_discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,11 @@ def reset(self):
# Building policy network
state_shape = env.observation_space.shape
action_space = env.action_space
output_shape = (action_space.n,)
policy_network = build_policy_network(state_shape,
output_shape,
action_space,
action_size = num_actions,
action_space = action_space,
policy_type = 'fcn',
layers = [[],[128]])
layers = [128])

reinforce = REINFORCE(env, policy_network)

Expand Down
6 changes: 3 additions & 3 deletions demos/pendulum_single_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
env = gym.make("Pendulum-v1")

action_space = env.action_space
output_shape = env.action_space.shape
action_size = env.action_space.shape
state_shape = env.observation_space.shape

policy_network = build_policy_network(state_shape,
output_shape,
action_space,
action_size = action_size,
action_space = action_space,
policy_type = 'fcn',
layers = [[], [128, 64, 32]],
activation_fn = 'tanh')
Expand Down
8 changes: 4 additions & 4 deletions demos/tictactoe_sequential_selfplay.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@

state_shape = env.observation_space.shape
action_space = env.action_space
output_shape = (action_space.n,)
num_actions = (action_space.n,)

policy_network = build_policy_network(state_shape,
output_shape,
action_space,
action_size = num_actions,
action_space = action_space,
policy_type = 'fcn',
layers = [[],[128, 64, 32]])
layers = [128, 64, 32])

reinforce = REINFORCE(env, policy_network, artificial_truncation=256)

Expand Down

0 comments on commit 550a314

Please sign in to comment.