diff --git a/gym_soccer/__init__.py b/gym_soccer/__init__.py index a682811..e14ca6a 100644 --- a/gym_soccer/__init__.py +++ b/gym_soccer/__init__.py @@ -6,7 +6,9 @@ register( id='Soccer-v0', entry_point='gym_soccer.envs:SoccerEnv', - timestep_limit=1000, + # Previously timestep_limit, changed due to upgrade of gym + max_episode_steps=1000, + # timestep_limit=1000, reward_threshold=1.0, nondeterministic = True, ) @@ -14,15 +16,19 @@ register( id='SoccerEmptyGoal-v0', entry_point='gym_soccer.envs:SoccerEmptyGoalEnv', - timestep_limit=1000, + # Previously timestep_limit, changed due to upgrade of gym + max_episode_steps=1000, + # timestep_limit=1000, reward_threshold=10.0, nondeterministic = True, ) register( id='SoccerAgainstKeeper-v0', - entry_point='gym.envs:SoccerAgainstKeeperEnv', - timestep_limit=1000, + entry_point='gym_soccer.envs:SoccerAgainstKeeperEnv', + # Previously timestep_limit, changed due to upgrade of gym + max_episode_steps=1000, + # timestep_limit=1000, reward_threshold=8.0, nondeterministic = True, ) diff --git a/gym_soccer/envs/soccer_empty_goal.py b/gym_soccer/envs/soccer_empty_goal.py index 143641e..e754173 100644 --- a/gym_soccer/envs/soccer_empty_goal.py +++ b/gym_soccer/envs/soccer_empty_goal.py @@ -24,6 +24,38 @@ def __init__(self): self.got_kickable_reward = False self.first_step = True + def _configure_environment(self): + """ + Provides a chance for subclasses to override this method and supply + a different server configuration. + """ + # Set the random seed, need to divide to make the number + # smaller, otherwise, the server will raise error + seed = self.seed()[0] / 1e13 + kwargs = dict( + # This should be the same length in the __init__.py + frames_per_trial=1000, + # To make the problem simpler, we do not use this + # value. Thus, set to a value is greater than + # frames_per_trial + untouched_time=1200, + offense_agents=1, + defense_agents=0, + offense_npcs=0, + defense_npcs=0, + sync_mode=True, + port=6000, + offense_on_ball=0, + fullstate=True, + seed=seed, + ball_x_min=0.0, + ball_x_max=0.2, + verbose=False, + log_game=False, + log_dir="log" + ) + self._start_hfo_server(**kwargs) + def _get_reward(self): """ Agent is rewarded for minimizing the distance between itself and @@ -66,7 +98,7 @@ def _get_reward(self): reward += 1. self.got_kickable_reward = True # Reward the agent for kicking towards the goal - reward += 0.6 * -ball_dist_goal_delta + reward += 3.0 * -ball_dist_goal_delta # Reward the agent for scoring if self.status == hfo_py.GOAL: reward += 5.0 diff --git a/gym_soccer/envs/soccer_env.py b/gym_soccer/envs/soccer_env.py index 9c58c24..21ffc95 100644 --- a/gym_soccer/envs/soccer_env.py +++ b/gym_soccer/envs/soccer_env.py @@ -1,4 +1,5 @@ import os, subprocess, time, signal +import numpy as np import gym from gym import error, spaces from gym import utils @@ -26,12 +27,21 @@ def __init__(self): self.observation_space = spaces.Box(low=-1, high=1, shape=(self.env.getStateSize())) # Action space omits the Tackle/Catch actions, which are useful on defense - self.action_space = spaces.Tuple((spaces.Discrete(3), - spaces.Box(low=0, high=100, shape=1), - spaces.Box(low=-180, high=180, shape=1), - spaces.Box(low=-180, high=180, shape=1), - spaces.Box(low=0, high=100, shape=1), - spaces.Box(low=-180, high=180, shape=1))) + # self.action_space = spaces.Tuple((spaces.Discrete(3), + # spaces.Box(low=0, high=100, shape=1), + # spaces.Box(low=-180, high=180, shape=1), + # spaces.Box(low=-180, high=180, shape=1), + # spaces.Box(low=0, high=100, shape=1), + # spaces.Box(low=-180, high=180, shape=1))) + + # Modified action space, this modification will combine the parameter for the + # same discrete action into a single action spaces + self.action_space = spaces.Tuple(( + spaces.Discrete(3), + spaces.Box(low=np.array([0.0,-180.0]), high=np.array([100.0, 180.0])), + spaces.Box(low=np.array([-180.0]), high=np.array([180.0])), + spaces.Box(low=np.array([0.0, -180.0]), high=np.array([100.0,180.0])))) + self.status = hfo_py.IN_GAME def __del__(self): @@ -50,7 +60,7 @@ def _configure_environment(self): self._start_hfo_server() def _start_hfo_server(self, frames_per_trial=500, - untouched_time=100, offense_agents=1, + untouched_time=600, offense_agents=1, defense_agents=0, offense_npcs=0, defense_npcs=0, sync_mode=True, port=6000, offense_on_ball=0, fullstate=True, seed=-1, @@ -113,17 +123,38 @@ def _step(self, action): def _take_action(self, action): """ Converts the action space into an HFO action. """ - action_type = ACTION_LOOKUP[action[0]] + action_index = action[0] + action_type = ACTION_LOOKUP[action_index] + parameter = action[1 + action_index] + # We have maximum 2 parameter for a discrete action + parameter_list = np.array_split(parameter, 2) if action_type == hfo_py.DASH: - self.env.act(action_type, action[1], action[2]) + self.env.act(action_type, parameter_list[0], parameter_list[1]) elif action_type == hfo_py.TURN: - self.env.act(action_type, action[3]) + self.env.act(action_type, parameter_list[0]) elif action_type == hfo_py.KICK: - self.env.act(action_type, action[4], action[5]) + self.env.act(action_type, parameter_list[0], parameter_list[1]) else: print('Unrecognized action %d' % action_type) self.env.act(hfo_py.NOOP) + # def _take_action(self, action): + # """ Converts the action space into an HFO action. """ + # action_type = ACTION_LOOKUP[action[0]] + # if action_type == hfo_py.DASH: + # self.env.act(action_type, action[1], action[2]) + # elif action_type == hfo_py.TURN: + # self.env.act(action_type, action[3]) + # elif action_type == hfo_py.KICK: + # self.env.act(action_type, action[4], action[5]) + # else: + # print('Unrecognized action %d' % action_type) + # self.env.act(hfo_py.NOOP) + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + def _get_reward(self): """ Reward is given for scoring a goal. """ if self.status == hfo_py.GOAL: diff --git a/setup.py b/setup.py index 2205797..31349d5 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,9 @@ -from setuptools import setup +from setuptools import setup, find_packages setup(name='gym_soccer', version='0.0.1', install_requires=['gym>=0.2.3', - 'hfo_py>=0.2'] + 'hfo_py>=0.2'], + packages=[package for package in find_packages() + if package.startswith('gym')], )