openai · dwicke · Sep 7, 2017 · Oct 19, 2017 · Oct 20, 2017 · Nov 8, 2017
diff --git a/gym_soccer/__init__.py b/gym_soccer/__init__.py
@@ -6,23 +6,29 @@
 register(
     id='Soccer-v0',
     entry_point='gym_soccer.envs:SoccerEnv',
-    timestep_limit=1000,
+    # Previously timestep_limit, changed due to upgrade of gym
+    max_episode_steps=1000, 
+    # timestep_limit=1000,
     reward_threshold=1.0,
     nondeterministic = True,
 )
 
 register(
     id='SoccerEmptyGoal-v0',
     entry_point='gym_soccer.envs:SoccerEmptyGoalEnv',
-    timestep_limit=1000,
+    # Previously timestep_limit, changed due to upgrade of gym
+    max_episode_steps=1000, 
+    # timestep_limit=1000,
     reward_threshold=10.0,
     nondeterministic = True,
 )
 
 register(
     id='SoccerAgainstKeeper-v0',
-    entry_point='gym.envs:SoccerAgainstKeeperEnv',
-    timestep_limit=1000,
+    entry_point='gym_soccer.envs:SoccerAgainstKeeperEnv',
+    # Previously timestep_limit, changed due to upgrade of gym
+    max_episode_steps=1000, 
+    # timestep_limit=1000,
     reward_threshold=8.0,
     nondeterministic = True,
 )
diff --git a/gym_soccer/envs/soccer_empty_goal.py b/gym_soccer/envs/soccer_empty_goal.py
@@ -24,6 +24,38 @@ def __init__(self):
         self.got_kickable_reward = False
         self.first_step = True
 
+    def _configure_environment(self):
+        """
+        Provides a chance for subclasses to override this method and supply
+        a different server configuration. 
+        """
+        # Set the random seed, need to divide to make the number
+        # smaller, otherwise, the server will raise error
+        seed = self.seed()[0] / 1e13
+        kwargs = dict(
+            # This should be the same length in the __init__.py
+            frames_per_trial=1000,
+            # To make the problem simpler, we do not use this
+            # value. Thus, set to a value is greater than 
+            # frames_per_trial
+            untouched_time=1200,
+            offense_agents=1,
+            defense_agents=0,
+            offense_npcs=0,
+            defense_npcs=0,
+            sync_mode=True,
+            port=6000,
+            offense_on_ball=0,
+            fullstate=True,
+            seed=seed,
+            ball_x_min=0.0,
+            ball_x_max=0.2,
+            verbose=False,
+            log_game=False,
+            log_dir="log"
+        )
+        self._start_hfo_server(**kwargs)
+
     def _get_reward(self):
         """
         Agent is rewarded for minimizing the distance between itself and
@@ -66,7 +98,7 @@ def _get_reward(self):
                 reward += 1.
                 self.got_kickable_reward = True
             # Reward the agent for kicking towards the goal
-            reward += 0.6 * -ball_dist_goal_delta
+            reward += 3.0 * -ball_dist_goal_delta
             # Reward the agent for scoring
             if self.status == hfo_py.GOAL:
                 reward += 5.0

diff --git a/gym_soccer/envs/soccer_env.py b/gym_soccer/envs/soccer_env.py
@@ -1,4 +1,5 @@
 import os, subprocess, time, signal
+import numpy as np
 import gym
 from gym import error, spaces
 from gym import utils
@@ -26,12 +27,21 @@ def __init__(self):
         self.observation_space = spaces.Box(low=-1, high=1,
                                             shape=(self.env.getStateSize()))
         # Action space omits the Tackle/Catch actions, which are useful on defense
-        self.action_space = spaces.Tuple((spaces.Discrete(3),
-                                          spaces.Box(low=0, high=100, shape=1),
-                                          spaces.Box(low=-180, high=180, shape=1),
-                                          spaces.Box(low=-180, high=180, shape=1),
-                                          spaces.Box(low=0, high=100, shape=1),
-                                          spaces.Box(low=-180, high=180, shape=1)))
+        # self.action_space = spaces.Tuple((spaces.Discrete(3),
+        #                                   spaces.Box(low=0, high=100, shape=1),
+        #                                   spaces.Box(low=-180, high=180, shape=1),
+        #                                   spaces.Box(low=-180, high=180, shape=1),
+        #                                   spaces.Box(low=0, high=100, shape=1),
+        #                                   spaces.Box(low=-180, high=180, shape=1)))
+
+        # Modified action space, this modification will combine the parameter for the
+        # same discrete action into a single action spaces
+        self.action_space = spaces.Tuple((
+            spaces.Discrete(3),
+            spaces.Box(low=np.array([0.0,-180.0]), high=np.array([100.0, 180.0])),
+            spaces.Box(low=np.array([-180.0]), high=np.array([180.0])),
+            spaces.Box(low=np.array([0.0, -180.0]), high=np.array([100.0,180.0]))))
+
         self.status = hfo_py.IN_GAME
 
     def __del__(self):
@@ -50,7 +60,7 @@ def _configure_environment(self):
         self._start_hfo_server()
 
     def _start_hfo_server(self, frames_per_trial=500,
-                          untouched_time=100, offense_agents=1,
+                          untouched_time=600, offense_agents=1,
                           defense_agents=0, offense_npcs=0,
                           defense_npcs=0, sync_mode=True, port=6000,
                           offense_on_ball=0, fullstate=True, seed=-1,
@@ -113,17 +123,38 @@ def _step(self, action):
 
     def _take_action(self, action):
         """ Converts the action space into an HFO action. """
-        action_type = ACTION_LOOKUP[action[0]]
+        action_index = action[0]
+        action_type = ACTION_LOOKUP[action_index]
+        parameter = action[1 + action_index]
+        # We have maximum 2 parameter for a discrete action
+        parameter_list = np.array_split(parameter, 2)
         if action_type == hfo_py.DASH:
-            self.env.act(action_type, action[1], action[2])
+            self.env.act(action_type, parameter_list[0], parameter_list[1])
         elif action_type == hfo_py.TURN:
-            self.env.act(action_type, action[3])
+            self.env.act(action_type, parameter_list[0])
         elif action_type == hfo_py.KICK:
-            self.env.act(action_type, action[4], action[5])
+            self.env.act(action_type, parameter_list[0], parameter_list[1])
         else:
             print('Unrecognized action %d' % action_type)
             self.env.act(hfo_py.NOOP)
 
+    # def _take_action(self, action):
+    #     """ Converts the action space into an HFO action. """
+    #     action_type = ACTION_LOOKUP[action[0]]
+    #     if action_type == hfo_py.DASH:
+    #         self.env.act(action_type, action[1], action[2])
+    #     elif action_type == hfo_py.TURN:
+    #         self.env.act(action_type, action[3])
+    #     elif action_type == hfo_py.KICK:
+    #         self.env.act(action_type, action[4], action[5])
+    #     else:
+    #         print('Unrecognized action %d' % action_type)
+    #         self.env.act(hfo_py.NOOP)
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
     def _get_reward(self):
         """ Reward is given for scoring a goal. """
         if self.status == hfo_py.GOAL:

diff --git a/setup.py b/setup.py
@@ -1,7 +1,9 @@
-from setuptools import setup
+from setuptools import setup, find_packages
 
 setup(name='gym_soccer',
       version='0.0.1',
       install_requires=['gym>=0.2.3',
-                        'hfo_py>=0.2']
+                        'hfo_py>=0.2'],
+      packages=[package for package in find_packages()
+                if package.startswith('gym')],
 )