[Feature] Allow multipe inputs to models (#73)

* add multiagent cnn implementation and tests * amend * amend * amend * amend * amend * amend * amend * amend * amend * amend * amend * amend * docs * mend * mend * amend * amend * amend * amend * amend * amend * amend * amend * amend * amend * amend * amend * amend * amend * amend * amend --------- Co-authored-by: ezhang7423 <[email protected]>
facebookresearch · Apr 8, 2024 · 3a9a40c · 3a9a40c
1 parent e272278
commit 3a9a40c
Show file tree

Hide file tree

Showing 10 changed files with 208 additions and 158 deletions.
diff --git a/benchmarl/algorithms/iddpg.py b/benchmarl/algorithms/iddpg.py
@@ -7,7 +7,6 @@
 from dataclasses import dataclass, MISSING
 from typing import Dict, Iterable, Tuple, Type
 
-import torch
 from tensordict import TensorDictBase
 from tensordict.nn import TensorDictModule, TensorDictSequential
 from torchrl.data import CompositeSpec, UnboundedContinuousTensorSpec
@@ -188,34 +187,12 @@ def process_batch(self, group: str, batch: TensorDictBase) -> TensorDictBase:
     def get_value_module(self, group: str) -> TensorDictModule:
         n_agents = len(self.group_map[group])
         modules = []
-        group_observation_key = list(self.observation_spec[group].keys())[0]
 
-        modules.append(
-            TensorDictModule(
-                lambda obs, action: torch.cat([obs, action], dim=-1),
-                in_keys=[
-                    (group, group_observation_key),
-                    (group, "action"),
-                ],
-                out_keys=[(group, "obs_action")],
-            )
-        )
         critic_input_spec = CompositeSpec(
             {
-                group: CompositeSpec(
-                    {
-                        "obs_action": UnboundedContinuousTensorSpec(
-                            shape=(
-                                n_agents,
-                                self.observation_spec[
-                                    group, group_observation_key
-                                ].shape[-1]
-                                + self.action_spec[group, "action"].shape[-1],
-                            )
-                        )
-                    },
-                    shape=(n_agents,),
-                )
+                group: self.observation_spec[group]
+                .clone()
+                .update(self.action_spec[group])
             }
         )
         critic_output_spec = CompositeSpec(

diff --git a/benchmarl/algorithms/isac.py b/benchmarl/algorithms/isac.py
@@ -7,7 +7,6 @@
 from dataclasses import dataclass, MISSING
 from typing import Dict, Iterable, Optional, Tuple, Type, Union
 
-import torch
 from tensordict import TensorDictBase
 from tensordict.nn import NormalParamExtractor, TensorDictModule, TensorDictSequential
 from torch.distributions import Categorical
@@ -315,31 +314,12 @@ def get_discrete_value_module(self, group: str) -> TensorDictModule:
     def get_continuous_value_module(self, group: str) -> TensorDictModule:
         n_agents = len(self.group_map[group])
         modules = []
-        group_observation_key = list(self.observation_spec[group].keys())[0]
 
-        modules.append(
-            TensorDictModule(
-                lambda obs, action: torch.cat([obs, action], dim=-1),
-                in_keys=[(group, group_observation_key), (group, "action")],
-                out_keys=[(group, "obs_action")],
-            )
-        )
         critic_input_spec = CompositeSpec(
             {
-                group: CompositeSpec(
-                    {
-                        "obs_action": UnboundedContinuousTensorSpec(
-                            shape=(
-                                n_agents,
-                                self.observation_spec[
-                                    group, group_observation_key
-                                ].shape[-1]
-                                + self.action_spec[group, "action"].shape[-1],
-                            )
-                        )
-                    },
-                    shape=(n_agents,),
-                )
+                group: self.observation_spec[group]
+                .clone()
+                .update(self.action_spec[group])
             }
         )
 

diff --git a/benchmarl/algorithms/maddpg.py b/benchmarl/algorithms/maddpg.py
@@ -7,7 +7,6 @@
 from dataclasses import dataclass, MISSING
 from typing import Dict, Iterable, Tuple, Type
 
-import torch
 from tensordict import TensorDictBase
 from tensordict.nn import TensorDictModule, TensorDictSequential
 from torchrl.data import CompositeSpec, UnboundedContinuousTensorSpec
@@ -41,7 +40,7 @@ def __init__(
         loss_function: str,
         delay_value: bool,
         use_tanh_mapping: bool,
-        **kwargs
+        **kwargs,
     ):
         super().__init__(**kwargs)
 
@@ -188,7 +187,6 @@ def process_batch(self, group: str, batch: TensorDictBase) -> TensorDictBase:
     def get_value_module(self, group: str) -> TensorDictModule:
         n_agents = len(self.group_map[group])
         modules = []
-        group_observation_key = list(self.observation_spec[group].keys())[0]
 
         if self.share_param_critic:
             critic_output_spec = CompositeSpec(
@@ -209,23 +207,18 @@ def get_value_module(self, group: str) -> TensorDictModule:
             )
 
         if self.state_spec is not None:
-            global_state_key = list(self.state_spec.keys())[0]
             modules.append(
                 TensorDictModule(
-                    lambda state, action: torch.cat(
-                        [state, action.reshape(*action.shape[:-2], -1)], dim=-1
-                    ),
-                    in_keys=[global_state_key, (group, "action")],
-                    out_keys=["state_action"],
+                    lambda action: action.reshape(*action.shape[:-2], -1),
+                    in_keys=[(group, "action")],
+                    out_keys=["global_action"],
                 )
             )
-            critic_input_spec = CompositeSpec(
+
+            critic_input_spec = self.state_spec.clone().update(
                 {
-                    "state_action": UnboundedContinuousTensorSpec(
-                        shape=(
-                            self.state_spec[global_state_key].shape[-1]
-                            + self.action_spec[group, "action"].shape[-1] * n_agents,
-                        )
+                    "global_action": UnboundedContinuousTensorSpec(
+                        shape=(self.action_spec[group, "action"].shape[-1] * n_agents,)
                     )
                 }
             )
@@ -245,29 +238,11 @@ def get_value_module(self, group: str) -> TensorDictModule:
             )
 
         else:
-            modules.append(
-                TensorDictModule(
-                    lambda obs, action: torch.cat([obs, action], dim=-1),
-                    in_keys=[(group, group_observation_key), (group, "action")],
-                    out_keys=[(group, "obs_action")],
-                )
-            )
             critic_input_spec = CompositeSpec(
                 {
-                    group: CompositeSpec(
-                        {
-                            "obs_action": UnboundedContinuousTensorSpec(
-                                shape=(
-                                    n_agents,
-                                    self.observation_spec[
-                                        group, group_observation_key
-                                    ].shape[-1]
-                                    + self.action_spec[group, "action"].shape[-1],
-                                )
-                            )
-                        },
-                        shape=(n_agents,),
-                    )
+                    group: self.observation_spec[group]
+                    .clone()
+                    .update(self.action_spec[group])
                 }
             )
 

diff --git a/benchmarl/algorithms/masac.py b/benchmarl/algorithms/masac.py
@@ -7,7 +7,6 @@
 from dataclasses import dataclass, MISSING
 from typing import Dict, Iterable, Optional, Tuple, Type, Union
 
-import torch
 from tensordict import TensorDictBase
 from tensordict.nn import NormalParamExtractor, TensorDictModule, TensorDictSequential
 from torch.distributions import Categorical
@@ -342,7 +341,6 @@ def get_discrete_value_module(self, group: str) -> TensorDictModule:
     def get_continuous_value_module(self, group: str) -> TensorDictModule:
         n_agents = len(self.group_map[group])
         modules = []
-        group_observation_key = list(self.observation_spec[group].keys())[0]
 
         if self.share_param_critic:
             critic_output_spec = CompositeSpec(
@@ -363,23 +361,19 @@ def get_continuous_value_module(self, group: str) -> TensorDictModule:
             )
 
         if self.state_spec is not None:
-            global_state_key = list(self.state_spec.keys())[0]
+
             modules.append(
                 TensorDictModule(
-                    lambda state, action: torch.cat(
-                        [state, action.reshape(*action.shape[:-2], -1)], dim=-1
-                    ),
-                    in_keys=[global_state_key, (group, "action")],
-                    out_keys=["state_action"],
+                    lambda action: action.reshape(*action.shape[:-2], -1),
+                    in_keys=[(group, "action")],
+                    out_keys=["global_action"],
                 )
             )
-            critic_input_spec = CompositeSpec(
+
+            critic_input_spec = self.state_spec.clone().update(
                 {
-                    "state_action": UnboundedContinuousTensorSpec(
-                        shape=(
-                            self.state_spec[global_state_key].shape[-1]
-                            + self.action_spec[group, "action"].shape[-1] * n_agents,
-                        )
+                    "global_action": UnboundedContinuousTensorSpec(
+                        shape=(self.action_spec[group, "action"].shape[-1] * n_agents,)
                     )
                 }
             )
@@ -399,29 +393,11 @@ def get_continuous_value_module(self, group: str) -> TensorDictModule:
             )
 
         else:
-            modules.append(
-                TensorDictModule(
-                    lambda obs, action: torch.cat([obs, action], dim=-1),
-                    in_keys=[(group, group_observation_key), (group, "action")],
-                    out_keys=[(group, "obs_action")],
-                )
-            )
             critic_input_spec = CompositeSpec(
                 {
-                    group: CompositeSpec(
-                        {
-                            "obs_action": UnboundedContinuousTensorSpec(
-                                shape=(
-                                    n_agents,
-                                    self.observation_spec[
-                                        group, group_observation_key
-                                    ].shape[-1]
-                                    + self.action_spec[group, "action"].shape[-1],
-                                )
-                            )
-                        },
-                        shape=(n_agents,),
-                    )
+                    group: self.observation_spec[group]
+                    .clone()
+                    .update(self.action_spec[group])
                 }
             )