Skip to content

Commit

Permalink
pricing env, main TODOs: mode and dataloader.
Browse files Browse the repository at this point in the history
miTTimmiTTim committed Nov 11, 2024
1 parent c065b70 commit 940cf74
Showing 2 changed files with 284 additions and 8 deletions.
168 changes: 160 additions & 8 deletions nbs/20_environments/22_envs_pricing/10_base_pricing_env.ipynb
Original file line number Diff line number Diff line change
@@ -10,27 +10,179 @@
]
},
{
"cell_type": "markdown",
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from nbdev.showdoc import *"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"from abc import ABC, abstractmethod\n",
"from typing import Union, Tuple, List\n",
"\n",
"from ddopai.envs.base import BaseEnvironment\n",
"from ddopai.utils import Parameter, MDPInfo\n",
"from ddopai.dataloaders.base import BaseDataLoader\n",
"from ddopai.loss_functions import pinball_loss\n",
"\n",
"import gymnasium as gym\n",
"\n",
"import numpy as np\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"metadata": {},
"outputs": [],
"source": []
"source": [
"#| export\n",
"class BasePricingEnv(BaseEnvironment):\n",
" \"\"\"\n",
" Base class for inventory management environments. This class inherits from BaseEnvironment.\n",
" \n",
" \"\"\"\n",
"\n",
" def __init__(self, \n",
"\n",
" ## Parameters for Base env:\n",
" mdp_info: MDPInfo, #\n",
" postprocessors: list[object] | None = None, # default is empty list\n",
" mode: str = \"online\", # additional mode for the pricing environment TODO: add online mode to training loop\n",
" return_truncation: str = True, # whether to return a truncated condition in step function\n",
" dataloader: BaseDataLoader = None, # dataloader for the environment\n",
" \n",
" alpha: Union[float, np.ndarray] = 1, # market size parameter\n",
" beta: Union[float, np.ndarray] = 1, # price sensitivity parameter\n",
" horizon_train: int = 100 # horizon for the online learning TODO: check if it can be renamed to horizon\n",
" ) -> None:\n",
"\n",
" self.dataloader = dataloader\n",
" \n",
" self.set_param(\"alpha\", alpha, shape=(self.nun_SKUs[0],), new=True)\n",
" self.set_param(\"beta\", beta, shape=(self.nun_SKUs[0],), new=True)\n",
" \n",
" # TODO: check in the base env if train_horizon is needed \n",
" super().__init__(mdp_info=mdp_info, postprocessors = postprocessors, mode = mode, return_truncation=return_truncation, horizon_train=horizon_train)\n",
" \n",
" def set_observation_space(self,\n",
" shape: tuple, # shape of the dataloader features\n",
" low: Union[np.ndarray, float] = -np.inf, # lower bound of the observation space\n",
" high: Union[np.ndarray, float] = np.inf, # upper bound of the observation space\n",
" samples_dim_included = True # whether the first dimension of the shape input is the number of samples\n",
" ) -> None:\n",
" \n",
" '''\n",
" Set the observation space of the environment.\n",
" This is a standard function for simple observation spaces. For more complex observation spaces,\n",
" this function should be overwritten. Note that it is assumped that the first dimension\n",
" is n_samples that is not relevant for the observation space.\n",
"\n",
" '''\n",
"\n",
" # To handle cases when no external information is available (e.g., parametric NV)\n",
" \n",
" if shape is None:\n",
" self.observation_space = None\n",
"\n",
" else:\n",
" if not isinstance(shape, tuple):\n",
" raise ValueError(\"Shape must be a tuple.\")\n",
" \n",
" if samples_dim_included:\n",
" shape = shape[1:] # assumed that the first dimension is the number of samples\n",
"\n",
" self.observation_space = gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32)\n",
"\n",
" def set_action_space(self,\n",
" shape: tuple, # shape of the dataloader target\n",
" low: Union[np.ndarray, float] = -np.inf, # lower bound of the observation space\n",
" high: Union[np.ndarray, float] = np.inf, # upper bound of the observation space\n",
" samples_dim_included = True # whether the first dimension of the shape input is the number of samples\n",
" ) -> None:\n",
" \n",
" '''\n",
" Set the action space of the environment.\n",
" This is a standard function for simple action spaces. For more complex action spaces,\n",
" this function should be overwritten. Note that it is assumped that the first dimension\n",
" is n_samples that is not relevant for the action space.\n",
" '''\n",
"\n",
" if not isinstance(shape, tuple):\n",
" raise ValueError(\"Shape must be a tuple.\")\n",
" \n",
" if samples_dim_included:\n",
" shape = shape[1:] # assumed that the first dimension is the number of samples\n",
"\n",
" self.action_space = gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32)\n",
" \n",
" def get_observation(self):\n",
" \n",
" \"\"\"\n",
" Return the current observation. This function is for the online learning case it will return only the state,\n",
" this function should be overwritten.\n",
"\n",
" \"\"\"\n",
"\n",
" X_item, = self.dataloader[self.index]\n",
"\n",
" return X_item\n",
" \n",
" def get_demand_response(self, action):\n",
" \n",
" \"\"\"\n",
" Return the demand and the reward for the current action. This function should be overwritten.\n",
" TODO: add the tuple call to the pricing dataloader\n",
" \"\"\"\n",
" Y_item, epsilon = self.dataloader[self.index, action]\n",
" return Y_item, epsilon\n",
" def reset(self,\n",
" start_index: int | str = None, # index to start from\n",
" state: np.ndarray = None # initial state\n",
" ) -> Tuple[np.ndarray, bool]:\n",
"\n",
" \"\"\"\n",
" Reset function for the Newsvendor problem. It will return the first observation and demand.\n",
" For val and test modes, it will by default reset to 0, while for the train mode it depends\n",
" on the paramter \"horizon_train\" whether a random point in the training data is selected or 0\n",
" \"\"\"\n",
"\n",
" truncated = self.reset_index(start_index)\n",
"\n",
"\n",
"\n",
" observation, self.demand = self.get_observation()\n",
" \n",
" return observation\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ddop",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.0"
}
},
"nbformat": 4,
124 changes: 124 additions & 0 deletions nbs/20_environments/22_envs_pricing/20_dynamic_pricing_env.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Dynamic Pricing Env\n",
"\n",
"> Static dynamic pricing environment where a decision only affects the next period "
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"from nbdev.showdoc import *"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"from abc import ABC, abstractmethod\n",
"from typing import Union, Tuple, Literal\n",
"\n",
"from ddopai.utils import Parameter, MDPInfo\n",
"from ddopai.dataloaders.base import BaseDataLoader\n",
"from ddopai.loss_functions import pinball_loss, quantile_loss\n",
"from ddopai.envs.pricing.base import BasePricingEnv\n",
"\n",
"import gymnasium as gym\n",
"\n",
"import numpy as np\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# | export\n",
"class DynamicPricingEnv(BasePricingEnv):\n",
" \"\"\"\n",
" Class implementing the dynamic pricing and learning problem, working for the single- and multi-item case.\n",
" If alpha and beta are scalars and they are multiple SKUs, then the same parameters are used for all SKUs.\n",
" If alpha and beta are arrays, then they should have the same length as the number of SKUs.\n",
" Num_SKUs can be set as parameter or inferrred from the DataLoader.\n",
" \"\"\"\n",
" def __init__(self,\n",
" alpha: Union[np.ndarray, Parameter, int, float] = 1.0, # market size per SKUs\n",
" beta: Union[np.ndarray, Parameter, int, float] = 0.5, # price elasticity per SKUs\n",
" p_bound_low: Union[np.ndarray, Parameter, int, float] = 0.0, # lower price bound per SKUs\n",
" p_bound_high: Union[np.ndarray, Parameter, int, float] = 1.0, # upper price bound per SKUs\n",
" dataloader: BaseDataLoader = None, # dataloader TODO: replace with pricing dataloader\n",
" num_SKUs: Union[np.ndarray, Parameter, int, float] = None, # number of SKUs\n",
" gamma: float = 1, # discount factor\n",
" horizon_train: int | str = \"use_all_data\", # if \"use_all_data\" then horizon is inferred from the DataLoader\n",
" postprocessors: list[object] | None = None, # default is empty list \n",
" mode: str = \"online\", # TODO: add online to relevant modes\n",
" return_truncation: str = True # TODO:Why is this a string?\n",
" ) -> None:\n",
"\n",
" self.print=False\n",
" \n",
" num_SKUs = dataloader.num_units if num_SKUs is None else num_SKUs\n",
" \n",
" if not isinstance(num_SKUs, int):\n",
" raise ValueError(\"num_SKUs should be an integer.\")\n",
" \n",
" self.set_param(\"num_SKUs\", num_SKUs, shape=(1,), new=True)\n",
" \n",
" self.set_param(\"p_bound_low\", p_bound_low, shape=(num_SKUs,), new=True)\n",
" self.set_param(\"p_bound_high\", p_bound_high, shape=(num_SKUs,), new=True)\n",
" \n",
" self.set_observation_space(dataloader.X_shape)\n",
" self.set_action_space(dataloader.Y_shape, low = self.p_bound_low, high = self.p_bound_high)\n",
" \n",
" mdp_info = MDPInfo(self.observation_space, self.action_space, gamma=gamma, horizon=horizon_train)\n",
" \n",
" super().__init__(mdp_info=mdp_info,\n",
" postprocessors=postprocessors,\n",
" mode=mode, return_truncation=return_truncation,\n",
" alpha=alpha,\n",
" beta=beta,\n",
" dataloader=dataloader,\n",
" horizon_train=horizon_train)\n",
" \n",
" def step_(self,\n",
" action: np.ndarray # prices)\n",
" ) -> Tuple[np.ndarray, float, bool, bool, dict]:\n",
" return observation, reward, terminated, truncated, info"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ddop",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 940cf74

Please sign in to comment.