Issue #10 Changed input to build_policy_network for fcn as single lis…

…ts, and action_size
s-sd · Aug 11, 2023 · 550a314 · 550a314
1 parent 617974c
commit 550a314
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 18 deletions.
diff --git a/demos/cartpole_single_agent.py b/demos/cartpole_single_agent.py
@@ -15,12 +15,11 @@
 state_shape = env.observation_space.shape
 action_space = env.action_space
 num_actions = action_space.n
-output_shape = (action_space.n,)
 
 # Build policy network 
 policy_network = build_policy_network(state_shape, 
-                                      output_shape, 
-                                      action_space, 
+                                      action_size = num_actions, 
+                                      action_space = action_space, 
                                       policy_type = 'fcn',
                                       layers = [[], [64, 32, 14]])
 

diff --git a/demos/dummy_single_agent_continuous.py b/demos/dummy_single_agent_continuous.py
@@ -39,14 +39,13 @@ def reset(self):
 env = DummyEnv(action_size)
 
 action_space = env.action_space
-output_shape = env.action_space.shape
 state_shape = env.observation_space.shape
 
 policy_network = build_policy_network(state_shape, 
-                                      output_shape, 
-                                      action_space,
+                                      action_size = action_size, 
+                                      action_space = action_space,
                                       policy_type = 'fcn',
-                                      layers = [[], [128]],
+                                      layers = [128],
                                       activation_fn = 'linear')
 
 reinforce = REINFORCE(env, policy_network, scale=0.2)

diff --git a/demos/dummy_single_agent_discrete.py b/demos/dummy_single_agent_discrete.py
@@ -40,12 +40,11 @@ def reset(self):
 # Building policy network
 state_shape = env.observation_space.shape
 action_space = env.action_space
-output_shape = (action_space.n,)
 policy_network = build_policy_network(state_shape,
-                                      output_shape,
-                                      action_space,
+                                      action_size = num_actions,
+                                      action_space = action_space,
                                       policy_type = 'fcn',
-                                      layers = [[],[128]])
+                                      layers = [128])
 
 reinforce = REINFORCE(env, policy_network)
 

diff --git a/demos/pendulum_single_agent.py b/demos/pendulum_single_agent.py
@@ -13,12 +13,12 @@
 env = gym.make("Pendulum-v1")
 
 action_space = env.action_space
-output_shape = env.action_space.shape
+action_size = env.action_space.shape
 state_shape = env.observation_space.shape
 
 policy_network = build_policy_network(state_shape, 
-                                      output_shape, 
-                                      action_space,
+                                      action_size = action_size, 
+                                      action_space = action_space,
                                       policy_type = 'fcn',
                                       layers = [[], [128, 64, 32]],
                                       activation_fn = 'tanh')

diff --git a/demos/tictactoe_sequential_selfplay.py b/demos/tictactoe_sequential_selfplay.py
@@ -16,13 +16,13 @@
 
 state_shape = env.observation_space.shape
 action_space = env.action_space
-output_shape = (action_space.n,)
+num_actions = (action_space.n,)
 
 policy_network = build_policy_network(state_shape,
-                                      output_shape,
-                                      action_space,
+                                      action_size = num_actions,
+                                      action_space = action_space,
                                       policy_type = 'fcn',
-                                      layers = [[],[128, 64, 32]])
+                                      layers = [128, 64, 32])
 
 reinforce = REINFORCE(env, policy_network, artificial_truncation=256)