Skip to content

Commit

Permalink
Issue #10 Changed input parameters to obs and action space
Browse files Browse the repository at this point in the history
  • Loading branch information
i-gayo committed Aug 11, 2023
1 parent c987c19 commit 9b78c19
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 68 deletions.
35 changes: 15 additions & 20 deletions demos/cartpole_single_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,26 @@
tf.random.set_seed(42)
np.random.seed(42)

<<<<<<< HEAD
=======
def build_policy_network(state_shape, num_actions):
inputs = tf.keras.layers.Input(shape=state_shape)
flat = tf.keras.layers.Flatten()(inputs)
dense1 = tf.keras.layers.Dense(64, activation='relu')(flat)
dropout1 = tf.keras.layers.Dropout(0.4)(dense1)
dense2 = tf.keras.layers.Dense(32, activation='relu')(dropout1)
dropout2 = tf.keras.layers.Dropout(0.4)(dense2)
dense3 = tf.keras.layers.Dense(14, activation='relu')(dropout2)
dense4 = tf.keras.layers.Dense(num_actions, activation='softmax')(dense3)
policy_network = tf.keras.Model(inputs=inputs, outputs=dense4)
return policy_network

>>>>>>> main
# def build_policy_network(state_shape, num_actions):
# inputs = tf.keras.layers.Input(shape=state_shape)
# flat = tf.keras.layers.Flatten()(inputs)
# dense1 = tf.keras.layers.Dense(64, activation='relu')(flat)
# dropout1 = tf.keras.layers.Dropout(0.4)(dense1)
# dense2 = tf.keras.layers.Dense(32, activation='relu')(dropout1)
# dropout2 = tf.keras.layers.Dropout(0.4)(dense2)
# dense3 = tf.keras.layers.Dense(14, activation='relu')(dropout2)
# dense4 = tf.keras.layers.Dense(num_actions, activation='softmax')(dense3)
# policy_network = tf.keras.Model(inputs=inputs, outputs=dense4)
# return policy_network

env = gym.make('CartPole-v1')

state_shape = env.observation_space.shape
state_space = env.observation_space
action_space = env.action_space
num_actions = action_space.n

# Build policy network
policy_network = build_policy_network(state_shape,
action_size = num_actions,
action_space = action_space,
policy_network = build_policy_network(state_space,
action_space,
policy_type = 'fcn',
layers = [64, 32, 14])

Expand Down
7 changes: 3 additions & 4 deletions demos/dummy_single_agent_continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,10 @@ def reset(self):
env = DummyEnv(action_size)

action_space = env.action_space
state_shape = env.observation_space.shape
state_space = env.observation_space

policy_network = build_policy_network(state_shape,
action_size = action_size,
action_space = action_space,
policy_network = build_policy_network(state_space,
action_space,
policy_type = 'fcn',
layers = [128],
activation_fn = 'linear')
Expand Down
7 changes: 3 additions & 4 deletions demos/dummy_single_agent_discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,10 @@ def reset(self):
env = DummyEnv(num_actions)

# Building policy network
state_shape = env.observation_space.shape
state_space = env.observation_space
action_space = env.action_space
policy_network = build_policy_network(state_shape,
action_size = num_actions,
action_space = action_space,
policy_network = build_policy_network(state_space,
action_space,
policy_type = 'fcn',
layers = [128])

Expand Down
35 changes: 15 additions & 20 deletions demos/pendulum_single_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,28 @@
tf.random.set_seed(42)
np.random.seed(42)

<<<<<<< HEAD
=======
def build_policy_network(state_shape, action_size):
inputs = tf.keras.layers.Input(shape=state_shape)
flat = tf.keras.layers.Flatten()(inputs)
dense1 = tf.keras.layers.Dense(128, activation='relu')(flat)
dropout1 = tf.keras.layers.Dropout(0.4)(dense1)
dense2 = tf.keras.layers.Dense(64, activation='relu')(dropout1)
dropout2 = tf.keras.layers.Dropout(0.4)(dense2)
dense3 = tf.keras.layers.Dense(32, activation='relu')(dropout2)
dense4 = tf.keras.layers.Dense(np.prod(action_size), activation='tanh')(dense3)
# def build_policy_network(state_shape, action_size):
# inputs = tf.keras.layers.Input(shape=state_shape)
# flat = tf.keras.layers.Flatten()(inputs)
# dense1 = tf.keras.layers.Dense(128, activation='relu')(flat)
# dropout1 = tf.keras.layers.Dropout(0.4)(dense1)
# dense2 = tf.keras.layers.Dense(64, activation='relu')(dropout1)
# dropout2 = tf.keras.layers.Dropout(0.4)(dense2)
# dense3 = tf.keras.layers.Dense(32, activation='relu')(dropout2)
# dense4 = tf.keras.layers.Dense(np.prod(action_size), activation='tanh')(dense3)

scaled_outputs = tf.keras.layers.Lambda(lambda x: (x + 1) * 2 - 2)(dense4) # scale to action space
# scaled_outputs = tf.keras.layers.Lambda(lambda x: (x + 1) * 2 - 2)(dense4) # scale to action space

policy_network = tf.keras.Model(inputs=inputs, outputs=scaled_outputs)
return policy_network
# policy_network = tf.keras.Model(inputs=inputs, outputs=scaled_outputs)
# return policy_network

>>>>>>> main
env = gym.make("Pendulum-v1")

action_space = env.action_space
action_size = env.action_space.shape
state_shape = env.observation_space.shape
state_space = env.observation_space

policy_network = build_policy_network(state_shape,
action_size = action_size,
action_space = action_space,
policy_network = build_policy_network(state_space,
action_space,
policy_type = 'fcn',
layers = [128, 64, 32],
activation_fn = 'tanh')
Expand Down
35 changes: 15 additions & 20 deletions demos/tictactoe_sequential_selfplay.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,18 @@
tf.random.set_seed(42)
np.random.seed(42)

<<<<<<< HEAD
=======
def build_policy_network(state_shape, action_size):
inputs = tf.keras.layers.Input(shape=state_shape)
flat = tf.keras.layers.Flatten()(inputs)
dense1 = tf.keras.layers.Dense(128, activation='relu')(flat)
dropout1 = tf.keras.layers.Dropout(0.4)(dense1)
dense2 = tf.keras.layers.Dense(64, activation='relu')(dropout1);
dropout2 = tf.keras.layers.Dropout(0.4)(dense2)
dense3 = tf.keras.layers.Dense(32, activation='relu')(dropout2)
dense4 = tf.keras.layers.Dense(np.prod(action_size), activation='softmax')(dense3)
policy_network = tf.keras.Model(inputs=inputs, outputs=dense4)
return policy_network

>>>>>>> main
# def build_policy_network(state_shape, action_size):
# inputs = tf.keras.layers.Input(shape=state_shape)
# flat = tf.keras.layers.Flatten()(inputs)
# dense1 = tf.keras.layers.Dense(128, activation='relu')(flat)
# dropout1 = tf.keras.layers.Dropout(0.4)(dense1)
# dense2 = tf.keras.layers.Dense(64, activation='relu')(dropout1);
# dropout2 = tf.keras.layers.Dropout(0.4)(dense2)
# dense3 = tf.keras.layers.Dense(32, activation='relu')(dropout2)
# dense4 = tf.keras.layers.Dense(np.prod(action_size), activation='softmax')(dense3)
# policy_network = tf.keras.Model(inputs=inputs, outputs=dense4)
# return policy_network

env = TicTacToeEnv()

class REINFORCE_TicTacToe(REINFORCE):
Expand All @@ -43,13 +40,11 @@ def invert_state(self, state):
state[:, :, 0] *= -1
return state

state_shape = env.observation_space.shape
state_space = env.observation_space
action_space = env.action_space
num_actions = (action_space.n,)

policy_network = build_policy_network(state_shape,
action_size = num_actions,
action_space = action_space,
policy_network = build_policy_network(state_space,
action_space,
policy_type = 'fcn',
layers = [128, 64, 32])

Expand Down

0 comments on commit 9b78c19

Please sign in to comment.