Skip to content

Commit

Permalink
[Feature] Allow multipe inputs to models (#73)
Browse files Browse the repository at this point in the history
* add multiagent cnn implementation and tests

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* docs

* mend

* mend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

* amend

---------

Co-authored-by: ezhang7423 <[email protected]>
  • Loading branch information
matteobettini and ezhang7423 authored Apr 8, 2024
1 parent e272278 commit 3a9a40c
Show file tree
Hide file tree
Showing 10 changed files with 208 additions and 158 deletions.
29 changes: 3 additions & 26 deletions benchmarl/algorithms/iddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from dataclasses import dataclass, MISSING
from typing import Dict, Iterable, Tuple, Type

import torch
from tensordict import TensorDictBase
from tensordict.nn import TensorDictModule, TensorDictSequential
from torchrl.data import CompositeSpec, UnboundedContinuousTensorSpec
Expand Down Expand Up @@ -188,34 +187,12 @@ def process_batch(self, group: str, batch: TensorDictBase) -> TensorDictBase:
def get_value_module(self, group: str) -> TensorDictModule:
n_agents = len(self.group_map[group])
modules = []
group_observation_key = list(self.observation_spec[group].keys())[0]

modules.append(
TensorDictModule(
lambda obs, action: torch.cat([obs, action], dim=-1),
in_keys=[
(group, group_observation_key),
(group, "action"),
],
out_keys=[(group, "obs_action")],
)
)
critic_input_spec = CompositeSpec(
{
group: CompositeSpec(
{
"obs_action": UnboundedContinuousTensorSpec(
shape=(
n_agents,
self.observation_spec[
group, group_observation_key
].shape[-1]
+ self.action_spec[group, "action"].shape[-1],
)
)
},
shape=(n_agents,),
)
group: self.observation_spec[group]
.clone()
.update(self.action_spec[group])
}
)
critic_output_spec = CompositeSpec(
Expand Down
26 changes: 3 additions & 23 deletions benchmarl/algorithms/isac.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from dataclasses import dataclass, MISSING
from typing import Dict, Iterable, Optional, Tuple, Type, Union

import torch
from tensordict import TensorDictBase
from tensordict.nn import NormalParamExtractor, TensorDictModule, TensorDictSequential
from torch.distributions import Categorical
Expand Down Expand Up @@ -315,31 +314,12 @@ def get_discrete_value_module(self, group: str) -> TensorDictModule:
def get_continuous_value_module(self, group: str) -> TensorDictModule:
n_agents = len(self.group_map[group])
modules = []
group_observation_key = list(self.observation_spec[group].keys())[0]

modules.append(
TensorDictModule(
lambda obs, action: torch.cat([obs, action], dim=-1),
in_keys=[(group, group_observation_key), (group, "action")],
out_keys=[(group, "obs_action")],
)
)
critic_input_spec = CompositeSpec(
{
group: CompositeSpec(
{
"obs_action": UnboundedContinuousTensorSpec(
shape=(
n_agents,
self.observation_spec[
group, group_observation_key
].shape[-1]
+ self.action_spec[group, "action"].shape[-1],
)
)
},
shape=(n_agents,),
)
group: self.observation_spec[group]
.clone()
.update(self.action_spec[group])
}
)

Expand Down
47 changes: 11 additions & 36 deletions benchmarl/algorithms/maddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from dataclasses import dataclass, MISSING
from typing import Dict, Iterable, Tuple, Type

import torch
from tensordict import TensorDictBase
from tensordict.nn import TensorDictModule, TensorDictSequential
from torchrl.data import CompositeSpec, UnboundedContinuousTensorSpec
Expand Down Expand Up @@ -41,7 +40,7 @@ def __init__(
loss_function: str,
delay_value: bool,
use_tanh_mapping: bool,
**kwargs
**kwargs,
):
super().__init__(**kwargs)

Expand Down Expand Up @@ -188,7 +187,6 @@ def process_batch(self, group: str, batch: TensorDictBase) -> TensorDictBase:
def get_value_module(self, group: str) -> TensorDictModule:
n_agents = len(self.group_map[group])
modules = []
group_observation_key = list(self.observation_spec[group].keys())[0]

if self.share_param_critic:
critic_output_spec = CompositeSpec(
Expand All @@ -209,23 +207,18 @@ def get_value_module(self, group: str) -> TensorDictModule:
)

if self.state_spec is not None:
global_state_key = list(self.state_spec.keys())[0]
modules.append(
TensorDictModule(
lambda state, action: torch.cat(
[state, action.reshape(*action.shape[:-2], -1)], dim=-1
),
in_keys=[global_state_key, (group, "action")],
out_keys=["state_action"],
lambda action: action.reshape(*action.shape[:-2], -1),
in_keys=[(group, "action")],
out_keys=["global_action"],
)
)
critic_input_spec = CompositeSpec(

critic_input_spec = self.state_spec.clone().update(
{
"state_action": UnboundedContinuousTensorSpec(
shape=(
self.state_spec[global_state_key].shape[-1]
+ self.action_spec[group, "action"].shape[-1] * n_agents,
)
"global_action": UnboundedContinuousTensorSpec(
shape=(self.action_spec[group, "action"].shape[-1] * n_agents,)
)
}
)
Expand All @@ -245,29 +238,11 @@ def get_value_module(self, group: str) -> TensorDictModule:
)

else:
modules.append(
TensorDictModule(
lambda obs, action: torch.cat([obs, action], dim=-1),
in_keys=[(group, group_observation_key), (group, "action")],
out_keys=[(group, "obs_action")],
)
)
critic_input_spec = CompositeSpec(
{
group: CompositeSpec(
{
"obs_action": UnboundedContinuousTensorSpec(
shape=(
n_agents,
self.observation_spec[
group, group_observation_key
].shape[-1]
+ self.action_spec[group, "action"].shape[-1],
)
)
},
shape=(n_agents,),
)
group: self.observation_spec[group]
.clone()
.update(self.action_spec[group])
}
)

Expand Down
46 changes: 11 additions & 35 deletions benchmarl/algorithms/masac.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from dataclasses import dataclass, MISSING
from typing import Dict, Iterable, Optional, Tuple, Type, Union

import torch
from tensordict import TensorDictBase
from tensordict.nn import NormalParamExtractor, TensorDictModule, TensorDictSequential
from torch.distributions import Categorical
Expand Down Expand Up @@ -342,7 +341,6 @@ def get_discrete_value_module(self, group: str) -> TensorDictModule:
def get_continuous_value_module(self, group: str) -> TensorDictModule:
n_agents = len(self.group_map[group])
modules = []
group_observation_key = list(self.observation_spec[group].keys())[0]

if self.share_param_critic:
critic_output_spec = CompositeSpec(
Expand All @@ -363,23 +361,19 @@ def get_continuous_value_module(self, group: str) -> TensorDictModule:
)

if self.state_spec is not None:
global_state_key = list(self.state_spec.keys())[0]

modules.append(
TensorDictModule(
lambda state, action: torch.cat(
[state, action.reshape(*action.shape[:-2], -1)], dim=-1
),
in_keys=[global_state_key, (group, "action")],
out_keys=["state_action"],
lambda action: action.reshape(*action.shape[:-2], -1),
in_keys=[(group, "action")],
out_keys=["global_action"],
)
)
critic_input_spec = CompositeSpec(

critic_input_spec = self.state_spec.clone().update(
{
"state_action": UnboundedContinuousTensorSpec(
shape=(
self.state_spec[global_state_key].shape[-1]
+ self.action_spec[group, "action"].shape[-1] * n_agents,
)
"global_action": UnboundedContinuousTensorSpec(
shape=(self.action_spec[group, "action"].shape[-1] * n_agents,)
)
}
)
Expand All @@ -399,29 +393,11 @@ def get_continuous_value_module(self, group: str) -> TensorDictModule:
)

else:
modules.append(
TensorDictModule(
lambda obs, action: torch.cat([obs, action], dim=-1),
in_keys=[(group, group_observation_key), (group, "action")],
out_keys=[(group, "obs_action")],
)
)
critic_input_spec = CompositeSpec(
{
group: CompositeSpec(
{
"obs_action": UnboundedContinuousTensorSpec(
shape=(
n_agents,
self.observation_spec[
group, group_observation_key
].shape[-1]
+ self.action_spec[group, "action"].shape[-1],
)
)
},
shape=(n_agents,),
)
group: self.observation_spec[group]
.clone()
.update(self.action_spec[group])
}
)

Expand Down
Loading

0 comments on commit 3a9a40c

Please sign in to comment.