forked from rail-berkeley/rlkit
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add run script and update exploration noise
- Loading branch information
Showing
8 changed files
with
207 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
""" | ||
This should results in an average return of ~3000 by the end of training. | ||
Usually hits 3000 around epoch 80-100. Within a see, the performance will be | ||
a bit noisy from one epoch to the next (occasionally dips dow to ~2000). | ||
Note that one epoch = 5k steps, so 200 epochs = 1 million steps. | ||
""" | ||
import gym | ||
|
||
import rlkit.torch.pytorch_util as ptu | ||
from rlkit.exploration_strategies.base import ( | ||
PolicyWrappedWithExplorationStrategy | ||
) | ||
from rlkit.exploration_strategies.gaussian_and_epsilon_strategy import ( | ||
GaussianAndEpislonStrategy | ||
) | ||
from rlkit.launchers.launcher_util import setup_logger | ||
from rlkit.torch.her.her import HerTd3 | ||
from rlkit.torch.her.obs_dict_replay_buffer import ObsDictRelabelingBuffer | ||
from rlkit.torch.networks import FlattenMlp, TanhMlpPolicy | ||
|
||
|
||
def experiment(variant): | ||
env = gym.make('FetchReach-v1') | ||
es = GaussianAndEpislonStrategy( | ||
action_space=env.action_space, | ||
max_sigma=.2, | ||
min_sigma=.2, # constant sigma | ||
epsilon=.3, | ||
) | ||
obs_dim = env.observation_space.spaces['observation'].low.size | ||
goal_dim = env.observation_space.spaces['desired_goal'].low.size | ||
action_dim = env.action_space.low.size | ||
qf1 = FlattenMlp( | ||
input_size=obs_dim + goal_dim + action_dim, | ||
output_size=1, | ||
hidden_sizes=[400, 300], | ||
) | ||
qf2 = FlattenMlp( | ||
input_size=obs_dim + goal_dim + action_dim, | ||
output_size=1, | ||
hidden_sizes=[400, 300], | ||
) | ||
policy = TanhMlpPolicy( | ||
input_size=obs_dim + goal_dim, | ||
output_size=action_dim, | ||
hidden_sizes=[400, 300], | ||
) | ||
exploration_policy = PolicyWrappedWithExplorationStrategy( | ||
exploration_strategy=es, | ||
policy=policy, | ||
) | ||
replay_buffer = ObsDictRelabelingBuffer( | ||
env=env, | ||
**variant['replay_buffer_kwargs'] | ||
) | ||
algorithm = HerTd3( | ||
env=env, | ||
qf1=qf1, | ||
qf2=qf2, | ||
policy=policy, | ||
exploration_policy=exploration_policy, | ||
replay_buffer=replay_buffer, | ||
**variant['algo_kwargs'] | ||
) | ||
algorithm.to(ptu.device) | ||
algorithm.train() | ||
|
||
|
||
if __name__ == "__main__": | ||
variant = dict( | ||
algo_kwargs=dict( | ||
num_epochs=100, | ||
num_steps_per_epoch=1000, | ||
num_steps_per_eval=1000, | ||
max_path_length=50, | ||
batch_size=128, | ||
discount=0.99, | ||
), | ||
replay_buffer_kwargs=dict( | ||
max_size=100000, | ||
fraction_goals_rollout_goals=0.2, # equal to k = 4 in HER paper | ||
fraction_goals_env_goals=0.0, | ||
), | ||
) | ||
setup_logger('her-td3-fetch-experiment', variant=variant) | ||
experiment(variant) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
37 changes: 37 additions & 0 deletions
37
rlkit/exploration_strategies/gaussian_and_epsilon_strategy.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import random | ||
from rlkit.exploration_strategies.base import RawExplorationStrategy | ||
from rlkit.core.serializable import Serializable | ||
import numpy as np | ||
|
||
|
||
class GaussianAndEpislonStrategy(RawExplorationStrategy, Serializable): | ||
""" | ||
With probability epsilon, take a completely random action. | ||
with probability 1-epsilon, add Gaussian noise to the action taken by a | ||
deterministic policy. | ||
""" | ||
def __init__(self, action_space, epsilon, max_sigma=1.0, min_sigma=None, | ||
decay_period=1000000): | ||
assert len(action_space.shape) == 1 | ||
Serializable.quick_init(self, locals()) | ||
if min_sigma is None: | ||
min_sigma = max_sigma | ||
self._max_sigma = max_sigma | ||
self._epsilon = epsilon | ||
self._min_sigma = min_sigma | ||
self._decay_period = decay_period | ||
self._action_space = action_space | ||
|
||
def get_action_from_raw_action(self, action, t=None, **kwargs): | ||
if random.random() < self._epsilon: | ||
return self._action_space.sample() | ||
else: | ||
sigma = ( | ||
self._max_sigma - (self._max_sigma - self._min_sigma) | ||
* min(1.0, t * 1.0 / self._decay_period) | ||
) | ||
return np.clip( | ||
action + np.random.normal(size=len(action)) * sigma, | ||
self._action_space.low, | ||
self._action_space.high, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import argparse | ||
import pickle | ||
|
||
from rlkit.core import logger | ||
from rlkit.samplers.rollout_functions import multitask_rollout | ||
from rlkit.torch import pytorch_util as ptu | ||
|
||
|
||
def simulate_policy(args): | ||
if args.pause: | ||
import ipdb; ipdb.set_trace() | ||
data = pickle.load(open(args.file, "rb")) | ||
policy = data['policy'] | ||
env = data['env'] | ||
print("Policy and environment loaded") | ||
if args.gpu: | ||
ptu.set_gpu_mode(True) | ||
policy.to(ptu.device) | ||
policy.train(False) | ||
paths = [] | ||
while True: | ||
paths.append(multitask_rollout( | ||
env, | ||
policy, | ||
max_path_length=args.H, | ||
animated=not args.hide, | ||
observation_key='observation', | ||
desired_goal_key='desired_goal', | ||
)) | ||
if hasattr(env, "log_diagnostics"): | ||
env.log_diagnostics(paths) | ||
if hasattr(env, "get_diagnostics"): | ||
for k, v in env.get_diagnostics(paths).items(): | ||
logger.record_tabular(k, v) | ||
logger.dump_tabular() | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument('file', type=str, | ||
help='path to the snapshot file') | ||
parser.add_argument('--H', type=int, default=300, | ||
help='Max length of rollout') | ||
parser.add_argument('--speedup', type=float, default=10, | ||
help='Speedup') | ||
parser.add_argument('--mode', default='video_env', type=str, | ||
help='env mode') | ||
parser.add_argument('--gpu', action='store_true') | ||
parser.add_argument('--pause', action='store_true') | ||
parser.add_argument('--enable_render', action='store_true') | ||
parser.add_argument('--multitaskpause', action='store_true') | ||
parser.add_argument('--hide', action='store_true') | ||
args = parser.parse_args() | ||
|
||
simulate_policy(args) |