Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed entry_point for SoccerAgainstKeeper-v0 #6

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions gym_soccer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,29 @@
register(
id='Soccer-v0',
entry_point='gym_soccer.envs:SoccerEnv',
timestep_limit=1000,
# Previously timestep_limit, changed due to upgrade of gym
max_episode_steps=1000,
# timestep_limit=1000,
reward_threshold=1.0,
nondeterministic = True,
)

register(
id='SoccerEmptyGoal-v0',
entry_point='gym_soccer.envs:SoccerEmptyGoalEnv',
timestep_limit=1000,
# Previously timestep_limit, changed due to upgrade of gym
max_episode_steps=1000,
# timestep_limit=1000,
reward_threshold=10.0,
nondeterministic = True,
)

register(
id='SoccerAgainstKeeper-v0',
entry_point='gym.envs:SoccerAgainstKeeperEnv',
timestep_limit=1000,
entry_point='gym_soccer.envs:SoccerAgainstKeeperEnv',
# Previously timestep_limit, changed due to upgrade of gym
max_episode_steps=1000,
# timestep_limit=1000,
reward_threshold=8.0,
nondeterministic = True,
)
34 changes: 33 additions & 1 deletion gym_soccer/envs/soccer_empty_goal.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,38 @@ def __init__(self):
self.got_kickable_reward = False
self.first_step = True

def _configure_environment(self):
"""
Provides a chance for subclasses to override this method and supply
a different server configuration.
"""
# Set the random seed, need to divide to make the number
# smaller, otherwise, the server will raise error
seed = self.seed()[0] / 1e13
kwargs = dict(
# This should be the same length in the __init__.py
frames_per_trial=1000,
# To make the problem simpler, we do not use this
# value. Thus, set to a value is greater than
# frames_per_trial
untouched_time=1200,
offense_agents=1,
defense_agents=0,
offense_npcs=0,
defense_npcs=0,
sync_mode=True,
port=6000,
offense_on_ball=0,
fullstate=True,
seed=seed,
ball_x_min=0.0,
ball_x_max=0.2,
verbose=False,
log_game=False,
log_dir="log"
)
self._start_hfo_server(**kwargs)

def _get_reward(self):
"""
Agent is rewarded for minimizing the distance between itself and
Expand Down Expand Up @@ -66,7 +98,7 @@ def _get_reward(self):
reward += 1.
self.got_kickable_reward = True
# Reward the agent for kicking towards the goal
reward += 0.6 * -ball_dist_goal_delta
reward += 3.0 * -ball_dist_goal_delta
# Reward the agent for scoring
if self.status == hfo_py.GOAL:
reward += 5.0
Expand Down
53 changes: 42 additions & 11 deletions gym_soccer/envs/soccer_env.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os, subprocess, time, signal
import numpy as np
import gym
from gym import error, spaces
from gym import utils
Expand Down Expand Up @@ -26,12 +27,21 @@ def __init__(self):
self.observation_space = spaces.Box(low=-1, high=1,
shape=(self.env.getStateSize()))
# Action space omits the Tackle/Catch actions, which are useful on defense
self.action_space = spaces.Tuple((spaces.Discrete(3),
spaces.Box(low=0, high=100, shape=1),
spaces.Box(low=-180, high=180, shape=1),
spaces.Box(low=-180, high=180, shape=1),
spaces.Box(low=0, high=100, shape=1),
spaces.Box(low=-180, high=180, shape=1)))
# self.action_space = spaces.Tuple((spaces.Discrete(3),
# spaces.Box(low=0, high=100, shape=1),
# spaces.Box(low=-180, high=180, shape=1),
# spaces.Box(low=-180, high=180, shape=1),
# spaces.Box(low=0, high=100, shape=1),
# spaces.Box(low=-180, high=180, shape=1)))

# Modified action space, this modification will combine the parameter for the
# same discrete action into a single action spaces
self.action_space = spaces.Tuple((
spaces.Discrete(3),
spaces.Box(low=np.array([0.0,-180.0]), high=np.array([100.0, 180.0])),
spaces.Box(low=np.array([-180.0]), high=np.array([180.0])),
spaces.Box(low=np.array([0.0, -180.0]), high=np.array([100.0,180.0]))))

self.status = hfo_py.IN_GAME

def __del__(self):
Expand All @@ -50,7 +60,7 @@ def _configure_environment(self):
self._start_hfo_server()

def _start_hfo_server(self, frames_per_trial=500,
untouched_time=100, offense_agents=1,
untouched_time=600, offense_agents=1,
defense_agents=0, offense_npcs=0,
defense_npcs=0, sync_mode=True, port=6000,
offense_on_ball=0, fullstate=True, seed=-1,
Expand Down Expand Up @@ -113,17 +123,38 @@ def _step(self, action):

def _take_action(self, action):
""" Converts the action space into an HFO action. """
action_type = ACTION_LOOKUP[action[0]]
action_index = action[0]
action_type = ACTION_LOOKUP[action_index]
parameter = action[1 + action_index]
# We have maximum 2 parameter for a discrete action
parameter_list = np.array_split(parameter, 2)
if action_type == hfo_py.DASH:
self.env.act(action_type, action[1], action[2])
self.env.act(action_type, parameter_list[0], parameter_list[1])
elif action_type == hfo_py.TURN:
self.env.act(action_type, action[3])
self.env.act(action_type, parameter_list[0])
elif action_type == hfo_py.KICK:
self.env.act(action_type, action[4], action[5])
self.env.act(action_type, parameter_list[0], parameter_list[1])
else:
print('Unrecognized action %d' % action_type)
self.env.act(hfo_py.NOOP)

# def _take_action(self, action):
# """ Converts the action space into an HFO action. """
# action_type = ACTION_LOOKUP[action[0]]
# if action_type == hfo_py.DASH:
# self.env.act(action_type, action[1], action[2])
# elif action_type == hfo_py.TURN:
# self.env.act(action_type, action[3])
# elif action_type == hfo_py.KICK:
# self.env.act(action_type, action[4], action[5])
# else:
# print('Unrecognized action %d' % action_type)
# self.env.act(hfo_py.NOOP)

def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]

def _get_reward(self):
""" Reward is given for scoring a goal. """
if self.status == hfo_py.GOAL:
Expand Down
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from setuptools import setup
from setuptools import setup, find_packages

setup(name='gym_soccer',
version='0.0.1',
install_requires=['gym>=0.2.3',
'hfo_py>=0.2']
'hfo_py>=0.2'],
packages=[package for package in find_packages()
if package.startswith('gym')],
)