Skip to content

Commit

Permalink
Merge pull request #4 from opimwue/nv_variable_sl
Browse files Browse the repository at this point in the history
Nv variable sl
  • Loading branch information
majoma7 authored Aug 17, 2024
2 parents b918d78 + efcabf4 commit f59971b
Show file tree
Hide file tree
Showing 27 changed files with 2,889 additions and 448 deletions.
67 changes: 61 additions & 6 deletions ddopnew/_modidx.py

Large diffs are not rendered by default.

8 changes: 7 additions & 1 deletion ddopnew/agents/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,16 @@ def draw_action(self, observation: np.ndarray) -> np.ndarray: #
Internal logic of the agent to be implemented in draw_action_ method.
"""

observation = self.add_batch_dim(observation)
batch_added = False
if not isinstance(observation, dict):
observation = self.add_batch_dim(observation)
batch_added = True

for obsprocessor in self.obsprocessors:
observation = obsprocessor(observation)
if not isinstance(observation, dict) and not batch_added:
observation = self.add_batch_dim(observation)
batch_added = True

action = self.draw_action_(observation)

Expand Down
9 changes: 9 additions & 0 deletions ddopnew/agents/class_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,20 @@
# %% ../../nbs/40_base_agents/10_AGENT_CLASSES.ipynb 3
AGENT_CLASSES = {
"RandomAgent": "ddopnew.agents.saa.SAA",

"SAA": "ddopnew.agents.newsvendor.saa.NewsvendorSAAagent",
"wSAA": "ddopnew.agents.newsvendor.saa.NewsvendorRFwSAAagent",
"RFwSAA": "ddopnew.agents.newsvendor.saa.NewsvendorRFwSAAagent",

"lERM": "ddopnew.agents.newsvendor.erm.NewsvendorlERMAgent",
"DLNV": "ddopnew.agents.newsvendor.erm.NewsvendorDLAgent",
"DLNVRNN": "ddopnew.agents.newsvendor.erm.NewsvendorDLRNNAgent",
"DLNVTransformer": "ddopnew.agents.newsvendor.erm.NewsvendorDLTransformerAgent",

"lERMMeta": "ddopnew.agents.newsvendor.erm.NewsvendorlERMMetaAgent",
"DLNVMeta": "ddopnew.agents.newsvendor.erm.NewsvendorDLMetaAgent",
"DLNVRNNMeta": "ddopnew.agents.newsvendor.erm.NewsvendorDLRNNMetaAgent",
"DLNVTransformerMeta": "ddopnew.agents.newsvendor.erm.NewsvendorDLTransformerMetaAgent",

"SAC": "ddopnew.agents.rl.sac.SACAgent",
"SACRNN": "ddopnew.agents.rl.sac.SACRNNAgent",
Expand Down
203 changes: 180 additions & 23 deletions ddopnew/agents/newsvendor/erm.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../../nbs/41_NV_agents/11_NV_erm_agents.ipynb.

# %% auto 0
__all__ = ['SGDBaseAgent', 'NVBaseAgent', 'NewsvendorlERMAgent', 'NewsvendorDLAgent']
__all__ = ['SGDBaseAgent', 'NVBaseAgent', 'NewsvendorlERMAgent', 'NewsvendorDLAgent', 'BaseMetaAgent', 'NewsvendorlERMMetaAgent',
'NewsvendorDLMetaAgent']

# %% ../../../nbs/41_NV_agents/11_NV_erm_agents.ipynb 3
import logging
Expand All @@ -14,7 +15,7 @@

from ...envs.base import BaseEnvironment
from ..base import BaseAgent
from ...utils import MDPInfo, Parameter, DatasetWrapper
from ...utils import MDPInfo, Parameter, DatasetWrapper, DatasetWrapperMeta
from ...torch_utils.loss_functions import TorchQuantileLoss
from ...torch_utils.obsprocessors import FlattenTimeDim

Expand Down Expand Up @@ -55,7 +56,9 @@ def __init__(self,
self.device = self.set_device(device)

self.set_dataloader(dataloader, dataloader_params)

self.set_model(input_shape, output_shape)
self.loss_function_params=None # default
self.set_loss_function()
self.set_optimizer(optimizer_params)
self.set_learning_rate_scheduler(learning_rate_scheduler)
Expand Down Expand Up @@ -89,8 +92,12 @@ def set_dataloader(self,
Set the dataloader for the agent by wrapping it into a Torch Dataset
"""
dataset = DatasetWrapper(dataloader)
self.dataloader = torch.utils.data.DataLoader(dataset, **dataloader_params)

# check if class already have a dataloader
if not hasattr(self, 'dataloader'):

dataset = DatasetWrapper(dataloader)
self.dataloader = torch.utils.data.DataLoader(dataset, **dataloader_params)

@abstractmethod
def set_loss_function(self):
Expand All @@ -105,18 +112,21 @@ def set_model(self, input_shape: Tuple, output_shape: Tuple):
def set_optimizer(self, optimizer_params: dict): # dict with keys: optimizer, lr, weight_decay

""" Set the optimizer for the model """
optimizer = optimizer_params["optimizer"]
optimizer_params_copy = optimizer_params.copy()
del optimizer_params_copy["optimizer"]

if optimizer == "Adam":
self.optimizer = torch.optim.Adam(self.model.parameters(), **optimizer_params_copy)
elif optimizer == "SGD":
self.optimizer = torch.optim.SGD(self.model.parameters(), **optimizer_params_copy)
elif optimizer == "RMSprop":
self.optimizer = torch.optim.RMSprop(self.model.parameters(), **optimizer_params_copy)
else:
raise ValueError(f"Optimizer {optimizer} not supported")

if not hasattr(self, 'optimizer'):

optimizer = optimizer_params["optimizer"]
optimizer_params_copy = optimizer_params.copy()
del optimizer_params_copy["optimizer"]

if optimizer == "Adam":
self.optimizer = torch.optim.Adam(self.model.parameters(), **optimizer_params_copy)
elif optimizer == "SGD":
self.optimizer = torch.optim.SGD(self.model.parameters(), **optimizer_params_copy)
elif optimizer == "RMSprop":
self.optimizer = torch.optim.RMSprop(self.model.parameters(), **optimizer_params_copy)
else:
raise ValueError(f"Optimizer {optimizer} not supported")

def set_learning_rate_scheduler(self, learning_rate_scheduler: None = None): #
""" Set learning rate scheudler (can be None) """
Expand All @@ -135,7 +145,11 @@ def fit_epoch(self):

for i, output in enumerate(self.dataloader):

X, y = output
if len(output)==3:
X, y, loss_function_params = output
else:
X, y = output
loss_function_params = None

# convert X and y to float32
X = X.type(torch.float32)
Expand All @@ -150,10 +164,12 @@ def fit_epoch(self):

y_pred = self.model(X)

if self.loss_function_params==None:
loss = self.loss_function(y_pred, y)
if loss_function_params is not None:
loss = self.loss_function(y_pred, y, **loss_function_params)
elif self.loss_function_params is not None:
loss = self.loss_function(y_pred, y, **self.loss_function_params)
else:
loss = self.loss_function(y_pred, y, **self.loss_function_params) # TODO: add reduction param when defining loss function
loss = self.loss_function(y_pred, y)

loss.backward()
self.optimizer.step()
Expand Down Expand Up @@ -286,7 +302,6 @@ def __init__(self,
agent_name: str | None = None,
):


cu = self.convert_to_numpy_array(cu)
co = self.convert_to_numpy_array(co)

Expand All @@ -306,12 +321,13 @@ def __init__(self,
device=device,
agent_name=agent_name
)

def set_loss_function(self):

"""Set the loss function for the model to the quantile loss. For training
the model uses quantile loss and not the pinball loss with specific cu and
co values to ensure similar scale of the feedback signal during training."""

self.loss_function_params = {"quantile": self.sl}
self.loss_function = TorchQuantileLoss(reduction="mean")

Expand Down Expand Up @@ -448,3 +464,144 @@ def set_model(self, input_shape, output_shape):

from ddopnew.approximators import MLP
self.model = MLP(input_size=input_size, output_size=output_size, **self.model_params)

# %% ../../../nbs/41_NV_agents/11_NV_erm_agents.ipynb 35
class BaseMetaAgent():

def set_meta_dataloader(
self,
dataloader: BaseDataLoader,
dataloader_params, # dict with keys: batch_size, shuffle
draw_parameter_function: callable, # function to draw parameters from distribution
distribution: str, # distribution for params during training
bounds_low: Union[int, float], # lower bound for params during training
bounds_high: Union[int, float], # upper bound for params during training
obsprocessor: callable, # function to process observations
parameter_names: List[str] = None, # names of parameters
) -> None:

""" """

# check if class already have a dataloader

print("setting meta datloader")

dataset = DatasetWrapperMeta(
dataloader = dataloader,
draw_parameter_function = draw_parameter_function,
distribution = distribution,
bounds_low = bounds_low,
bounds_high = bounds_high,
obsprocessor = obsprocessor,
parameter_names = parameter_names,
)

self.dataloader = torch.utils.data.DataLoader(dataset, **dataloader_params)

# %% ../../../nbs/41_NV_agents/11_NV_erm_agents.ipynb 36
class NewsvendorlERMMetaAgent(NewsvendorlERMAgent, BaseMetaAgent):

"""
Newsvendor agent implementing Empirical Risk Minimization (ERM) approach
based on a linear (regression) model. In addition to the features, the agent
also gets the sl as input to be able to forecast the optimal order quantity
for different sl values. Depending on the training pipeline, this model can be
adapted to become a full meta-learning algorithm cross products and cross sls.
"""

def __init__(self,
# Parameters for meta Agent
dataset_meta_params: dict, # Parameters for meta dataloader

# Parameters for lERM agent
environment_info: MDPInfo,
dataloader: BaseDataLoader,
cu: np.ndarray | Parameter,
co: np.ndarray | Parameter,
input_shape: Tuple,
output_shape: Tuple,
optimizer_params: dict | None = None, # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
learning_rate_scheduler = None, # TODO: add base class for learning rate scheduler for typing
model_params: dict | None = None, # default: {"relu_output": False}
dataloader_params: dict | None = None, # default: {"batch_size": 32, "shuffle": True}
obsprocessors: list | None = None, # default: []
torch_obsprocessors: list | None = None, # default: [FlattenTimeDim(allow_2d=False)]
device: str = "cpu", # "cuda" or "cpu"
agent_name: str | None = "lERMMeta"
):

self.set_meta_dataloader(dataloader, dataloader_params, **dataset_meta_params)

super().__init__(
environment_info=environment_info,
dataloader=dataloader,
cu=cu,
co=co,
input_shape=input_shape,
output_shape=output_shape,
optimizer_params=optimizer_params,
learning_rate_scheduler=learning_rate_scheduler,
model_params=model_params,
dataloader_params=dataloader_params,
obsprocessors=obsprocessors,
torch_obsprocessors=torch_obsprocessors,
device=device,
agent_name=agent_name
)

# %% ../../../nbs/41_NV_agents/11_NV_erm_agents.ipynb 37
class NewsvendorDLMetaAgent(NewsvendorDLAgent, BaseMetaAgent):

"""
Newsvendor agent implementing Empirical Risk Minimization (ERM) approach
based on a Neural Network. In addition to the features, the agent
also gets the sl as input to be able to forecast the optimal order quantity
for different sl values. Depending on the training pipeline, this model can be
adapted to become a full meta-learning algorithm cross products and cross sls.
"""

def __init__(self,
# Parameters for meta Agent
dataset_meta_params: dict, # Parameters for meta dataloader

environment_info: MDPInfo,
dataloader: BaseDataLoader,
cu: np.ndarray | Parameter,
co: np.ndarray | Parameter,
input_shape: Tuple,
output_shape: Tuple,
learning_rate_scheduler = None, # TODO: add base class for learning rate scheduler for typing

# parameters in yaml file
optimizer_params: dict | None = None, # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
model_params: dict | None = None, # default: {"hidden_layers": [64, 64], "drop_prob": 0.0, "batch_norm": False, "relu_output": False}
dataloader_params: dict | None = None, # default: {"batch_size": 32, "shuffle": True}
device: str = "cpu", # "cuda" or "cpu"

obsprocessors: list | None = None, # default: []
torch_obsprocessors: list | None = None, # default: [FlattenTimeDim(allow_2d=False)]
agent_name: str | None = "DLNV",
):

self.set_meta_dataloader(dataloader, dataloader_params, **dataset_meta_params)

super().__init__(
environment_info=environment_info,
dataloader=dataloader,
cu=cu,
co=co,
input_shape=input_shape,
output_shape=output_shape,
learning_rate_scheduler=learning_rate_scheduler,

optimizer_params=optimizer_params,
model_params=model_params,
dataloader_params=dataloader_params,
device=device,

obsprocessors=obsprocessors,
torch_obsprocessors=torch_obsprocessors,
agent_name=agent_name
)
14 changes: 11 additions & 3 deletions ddopnew/agents/rl/sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import torch
import torch.nn.functional as F
from torchinfo import summary
from IPython import get_ipython

from copy import deepcopy

Expand Down Expand Up @@ -166,7 +167,10 @@ def __init__(self,
else:
input_tensor = torch.randn(batch_dim, *actor_mu_params["input_shape"]).to(self.device)
input_tuple = (input_tensor,)
print(summary(self.actor, input_data=input_tuple, device=self.device))
if get_ipython() is not None:
print(summary(self.actor, input_data=input_tuple, device=self.device))
else:
summary(self.actor, input_data=input_tuple, device=self.device)
time.sleep(0.2)

logging.info("################################################################################")
Expand All @@ -183,7 +187,11 @@ def __init__(self,
state_mlp_sample = torch.randn(batch_dim, *critic_params["input_shape"][0][1]).to(self.device)
state_sample = torch.cat((state_sample, state_mlp_sample), dim=1)
input_tuple = (state_sample, action_sample)
print(summary(self.critic, input_data=input_tuple, device=self.device))
if get_ipython() is not None:
print(summary(self.critic, input_data=input_tuple, device=self.device))
else:
summary(self.critic, input_data=input_tuple, device=self.device)
# print(summary(self.critic, input_data=input_tuple, device=self.device))

def get_network_list(self, set_actor_critic_attributes: bool = True):
""" Get the list of networks in the agent for the save and load functions
Expand All @@ -207,7 +215,7 @@ def get_network_list(self, set_actor_critic_attributes: bool = True):
def predict_(self, observation: np.ndarray) -> np.ndarray: #
""" Do one forward pass of the model directly and return the prediction.
Apply tanh as implemented for the SAC actor in mushroom_rl"""

# make observation torch tensor
device = next(self.actor.parameters()).device
observation = torch.tensor(observation, dtype=torch.float32).to(device)
Expand Down
4 changes: 2 additions & 2 deletions ddopnew/envs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# %% auto 0
__all__ = ['BaseEnvironment']

# %% ../../nbs/20_base_env/10_base_env.ipynb 3
# %% ../../nbs/20_base_env/10_base_env.ipynb 4
import gymnasium as gym
from abc import ABC, abstractmethod
from typing import Union, List
Expand All @@ -12,7 +12,7 @@
from ..utils import MDPInfo, Parameter, set_param
import time

# %% ../../nbs/20_base_env/10_base_env.ipynb 4
# %% ../../nbs/20_base_env/10_base_env.ipynb 5
class BaseEnvironment(gym.Env, ABC):

"""
Expand Down
2 changes: 1 addition & 1 deletion ddopnew/envs/inventory/multi_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def get_observation(self):

return observation, Y_item


def reset(self,
start_index: int | str = None, # index to start from
state: np.ndarray = None # initial state
Expand Down
Loading

0 comments on commit f59971b

Please sign in to comment.