From 940cf74e0d9d5bd2dde94c914ec4f6350a8538fb Mon Sep 17 00:00:00 2001 From: Tim Lachner Date: Mon, 11 Nov 2024 08:47:59 +0100 Subject: [PATCH] pricing env, main TODOs: mode and dataloader. --- .../22_envs_pricing/10_base_pricing_env.ipynb | 168 +++++++++++++++++- .../20_dynamic_pricing_env.ipynb | 124 +++++++++++++ 2 files changed, 284 insertions(+), 8 deletions(-) create mode 100644 nbs/20_environments/22_envs_pricing/20_dynamic_pricing_env.ipynb diff --git a/nbs/20_environments/22_envs_pricing/10_base_pricing_env.ipynb b/nbs/20_environments/22_envs_pricing/10_base_pricing_env.ipynb index 278e9aa..c3d8404 100644 --- a/nbs/20_environments/22_envs_pricing/10_base_pricing_env.ipynb +++ b/nbs/20_environments/22_envs_pricing/10_base_pricing_env.ipynb @@ -10,27 +10,179 @@ ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 1, "metadata": {}, + "outputs": [], "source": [ "from nbdev.showdoc import *" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "from abc import ABC, abstractmethod\n", + "from typing import Union, Tuple, List\n", + "\n", + "from ddopai.envs.base import BaseEnvironment\n", + "from ddopai.utils import Parameter, MDPInfo\n", + "from ddopai.dataloaders.base import BaseDataLoader\n", + "from ddopai.loss_functions import pinball_loss\n", + "\n", + "import gymnasium as gym\n", + "\n", + "import numpy as np\n", + "import time" + ] + }, { "cell_type": "code", "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "#| export\n", + "class BasePricingEnv(BaseEnvironment):\n", + " \"\"\"\n", + " Base class for inventory management environments. This class inherits from BaseEnvironment.\n", + " \n", + " \"\"\"\n", + "\n", + " def __init__(self, \n", + "\n", + " ## Parameters for Base env:\n", + " mdp_info: MDPInfo, #\n", + " postprocessors: list[object] | None = None, # default is empty list\n", + " mode: str = \"online\", # additional mode for the pricing environment TODO: add online mode to training loop\n", + " return_truncation: str = True, # whether to return a truncated condition in step function\n", + " dataloader: BaseDataLoader = None, # dataloader for the environment\n", + " \n", + " alpha: Union[float, np.ndarray] = 1, # market size parameter\n", + " beta: Union[float, np.ndarray] = 1, # price sensitivity parameter\n", + " horizon_train: int = 100 # horizon for the online learning TODO: check if it can be renamed to horizon\n", + " ) -> None:\n", + "\n", + " self.dataloader = dataloader\n", + " \n", + " self.set_param(\"alpha\", alpha, shape=(self.nun_SKUs[0],), new=True)\n", + " self.set_param(\"beta\", beta, shape=(self.nun_SKUs[0],), new=True)\n", + " \n", + " # TODO: check in the base env if train_horizon is needed \n", + " super().__init__(mdp_info=mdp_info, postprocessors = postprocessors, mode = mode, return_truncation=return_truncation, horizon_train=horizon_train)\n", + " \n", + " def set_observation_space(self,\n", + " shape: tuple, # shape of the dataloader features\n", + " low: Union[np.ndarray, float] = -np.inf, # lower bound of the observation space\n", + " high: Union[np.ndarray, float] = np.inf, # upper bound of the observation space\n", + " samples_dim_included = True # whether the first dimension of the shape input is the number of samples\n", + " ) -> None:\n", + " \n", + " '''\n", + " Set the observation space of the environment.\n", + " This is a standard function for simple observation spaces. For more complex observation spaces,\n", + " this function should be overwritten. Note that it is assumped that the first dimension\n", + " is n_samples that is not relevant for the observation space.\n", + "\n", + " '''\n", + "\n", + " # To handle cases when no external information is available (e.g., parametric NV)\n", + " \n", + " if shape is None:\n", + " self.observation_space = None\n", + "\n", + " else:\n", + " if not isinstance(shape, tuple):\n", + " raise ValueError(\"Shape must be a tuple.\")\n", + " \n", + " if samples_dim_included:\n", + " shape = shape[1:] # assumed that the first dimension is the number of samples\n", + "\n", + " self.observation_space = gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32)\n", + "\n", + " def set_action_space(self,\n", + " shape: tuple, # shape of the dataloader target\n", + " low: Union[np.ndarray, float] = -np.inf, # lower bound of the observation space\n", + " high: Union[np.ndarray, float] = np.inf, # upper bound of the observation space\n", + " samples_dim_included = True # whether the first dimension of the shape input is the number of samples\n", + " ) -> None:\n", + " \n", + " '''\n", + " Set the action space of the environment.\n", + " This is a standard function for simple action spaces. For more complex action spaces,\n", + " this function should be overwritten. Note that it is assumped that the first dimension\n", + " is n_samples that is not relevant for the action space.\n", + " '''\n", + "\n", + " if not isinstance(shape, tuple):\n", + " raise ValueError(\"Shape must be a tuple.\")\n", + " \n", + " if samples_dim_included:\n", + " shape = shape[1:] # assumed that the first dimension is the number of samples\n", + "\n", + " self.action_space = gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32)\n", + " \n", + " def get_observation(self):\n", + " \n", + " \"\"\"\n", + " Return the current observation. This function is for the online learning case it will return only the state,\n", + " this function should be overwritten.\n", + "\n", + " \"\"\"\n", + "\n", + " X_item, = self.dataloader[self.index]\n", + "\n", + " return X_item\n", + " \n", + " def get_demand_response(self, action):\n", + " \n", + " \"\"\"\n", + " Return the demand and the reward for the current action. This function should be overwritten.\n", + " TODO: add the tuple call to the pricing dataloader\n", + " \"\"\"\n", + " Y_item, epsilon = self.dataloader[self.index, action]\n", + " return Y_item, epsilon\n", + " def reset(self,\n", + " start_index: int | str = None, # index to start from\n", + " state: np.ndarray = None # initial state\n", + " ) -> Tuple[np.ndarray, bool]:\n", + "\n", + " \"\"\"\n", + " Reset function for the Newsvendor problem. It will return the first observation and demand.\n", + " For val and test modes, it will by default reset to 0, while for the train mode it depends\n", + " on the paramter \"horizon_train\" whether a random point in the training data is selected or 0\n", + " \"\"\"\n", + "\n", + " truncated = self.reset_index(start_index)\n", + "\n", + "\n", + "\n", + " observation, self.demand = self.get_observation()\n", + " \n", + " return observation\n" + ] } ], "metadata": { + "kernelspec": { + "display_name": "ddop", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.0" } }, "nbformat": 4, diff --git a/nbs/20_environments/22_envs_pricing/20_dynamic_pricing_env.ipynb b/nbs/20_environments/22_envs_pricing/20_dynamic_pricing_env.ipynb new file mode 100644 index 0000000..e8a1b0a --- /dev/null +++ b/nbs/20_environments/22_envs_pricing/20_dynamic_pricing_env.ipynb @@ -0,0 +1,124 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dynamic Pricing Env\n", + "\n", + "> Static dynamic pricing environment where a decision only affects the next period " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from nbdev.showdoc import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "from abc import ABC, abstractmethod\n", + "from typing import Union, Tuple, Literal\n", + "\n", + "from ddopai.utils import Parameter, MDPInfo\n", + "from ddopai.dataloaders.base import BaseDataLoader\n", + "from ddopai.loss_functions import pinball_loss, quantile_loss\n", + "from ddopai.envs.pricing.base import BasePricingEnv\n", + "\n", + "import gymnasium as gym\n", + "\n", + "import numpy as np\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "class DynamicPricingEnv(BasePricingEnv):\n", + " \"\"\"\n", + " Class implementing the dynamic pricing and learning problem, working for the single- and multi-item case.\n", + " If alpha and beta are scalars and they are multiple SKUs, then the same parameters are used for all SKUs.\n", + " If alpha and beta are arrays, then they should have the same length as the number of SKUs.\n", + " Num_SKUs can be set as parameter or inferrred from the DataLoader.\n", + " \"\"\"\n", + " def __init__(self,\n", + " alpha: Union[np.ndarray, Parameter, int, float] = 1.0, # market size per SKUs\n", + " beta: Union[np.ndarray, Parameter, int, float] = 0.5, # price elasticity per SKUs\n", + " p_bound_low: Union[np.ndarray, Parameter, int, float] = 0.0, # lower price bound per SKUs\n", + " p_bound_high: Union[np.ndarray, Parameter, int, float] = 1.0, # upper price bound per SKUs\n", + " dataloader: BaseDataLoader = None, # dataloader TODO: replace with pricing dataloader\n", + " num_SKUs: Union[np.ndarray, Parameter, int, float] = None, # number of SKUs\n", + " gamma: float = 1, # discount factor\n", + " horizon_train: int | str = \"use_all_data\", # if \"use_all_data\" then horizon is inferred from the DataLoader\n", + " postprocessors: list[object] | None = None, # default is empty list \n", + " mode: str = \"online\", # TODO: add online to relevant modes\n", + " return_truncation: str = True # TODO:Why is this a string?\n", + " ) -> None:\n", + "\n", + " self.print=False\n", + " \n", + " num_SKUs = dataloader.num_units if num_SKUs is None else num_SKUs\n", + " \n", + " if not isinstance(num_SKUs, int):\n", + " raise ValueError(\"num_SKUs should be an integer.\")\n", + " \n", + " self.set_param(\"num_SKUs\", num_SKUs, shape=(1,), new=True)\n", + " \n", + " self.set_param(\"p_bound_low\", p_bound_low, shape=(num_SKUs,), new=True)\n", + " self.set_param(\"p_bound_high\", p_bound_high, shape=(num_SKUs,), new=True)\n", + " \n", + " self.set_observation_space(dataloader.X_shape)\n", + " self.set_action_space(dataloader.Y_shape, low = self.p_bound_low, high = self.p_bound_high)\n", + " \n", + " mdp_info = MDPInfo(self.observation_space, self.action_space, gamma=gamma, horizon=horizon_train)\n", + " \n", + " super().__init__(mdp_info=mdp_info,\n", + " postprocessors=postprocessors,\n", + " mode=mode, return_truncation=return_truncation,\n", + " alpha=alpha,\n", + " beta=beta,\n", + " dataloader=dataloader,\n", + " horizon_train=horizon_train)\n", + " \n", + " def step_(self,\n", + " action: np.ndarray # prices)\n", + " ) -> Tuple[np.ndarray, float, bool, bool, dict]:\n", + " return observation, reward, terminated, truncated, info" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ddop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}