From 940cf74e0d9d5bd2dde94c914ec4f6350a8538fb Mon Sep 17 00:00:00 2001
From: Tim Lachner <timlachner@hotmail.de>
Date: Mon, 11 Nov 2024 08:47:59 +0100
Subject: [PATCH] pricing env, main TODOs: mode and dataloader.

---
 .../22_envs_pricing/10_base_pricing_env.ipynb | 168 +++++++++++++++++-
 .../20_dynamic_pricing_env.ipynb              | 124 +++++++++++++
 2 files changed, 284 insertions(+), 8 deletions(-)
 create mode 100644 nbs/20_environments/22_envs_pricing/20_dynamic_pricing_env.ipynb

diff --git a/nbs/20_environments/22_envs_pricing/10_base_pricing_env.ipynb b/nbs/20_environments/22_envs_pricing/10_base_pricing_env.ipynb
index 278e9aa..c3d8404 100644
--- a/nbs/20_environments/22_envs_pricing/10_base_pricing_env.ipynb
+++ b/nbs/20_environments/22_envs_pricing/10_base_pricing_env.ipynb
@@ -10,27 +10,179 @@
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
+   "outputs": [],
    "source": [
     "from nbdev.showdoc import *"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "from abc import ABC, abstractmethod\n",
+    "from typing import Union, Tuple, List\n",
+    "\n",
+    "from ddopai.envs.base import BaseEnvironment\n",
+    "from ddopai.utils import Parameter, MDPInfo\n",
+    "from ddopai.dataloaders.base import BaseDataLoader\n",
+    "from ddopai.loss_functions import pinball_loss\n",
+    "\n",
+    "import gymnasium as gym\n",
+    "\n",
+    "import numpy as np\n",
+    "import time"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
+   "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "#| export\n",
+    "class BasePricingEnv(BaseEnvironment):\n",
+    "    \"\"\"\n",
+    "    Base class for inventory management environments. This class inherits from BaseEnvironment.\n",
+    "    \n",
+    "    \"\"\"\n",
+    "\n",
+    "    def __init__(self, \n",
+    "\n",
+    "        ## Parameters for Base env:\n",
+    "        mdp_info: MDPInfo, #\n",
+    "        postprocessors: list[object] | None = None,  # default is empty list\n",
+    "        mode: str = \"online\", # additional mode for the pricing environment TODO: add online mode to training loop\n",
+    "        return_truncation: str = True, # whether to return a truncated condition in step function\n",
+    "        dataloader: BaseDataLoader = None, # dataloader for the environment\n",
+    "        \n",
+    "        alpha: Union[float, np.ndarray] = 1, # market size parameter\n",
+    "        beta: Union[float, np.ndarray] = 1, # price sensitivity parameter\n",
+    "        horizon_train: int = 100 # horizon for the online learning TODO: check if it can be renamed to horizon\n",
+    "        ) -> None:\n",
+    "\n",
+    "        self.dataloader = dataloader\n",
+    "        \n",
+    "        self.set_param(\"alpha\", alpha, shape=(self.nun_SKUs[0],), new=True)\n",
+    "        self.set_param(\"beta\", beta, shape=(self.nun_SKUs[0],), new=True)\n",
+    "        \n",
+    "        # TODO: check in the base env if train_horizon is needed \n",
+    "        super().__init__(mdp_info=mdp_info, postprocessors = postprocessors,  mode = mode, return_truncation=return_truncation, horizon_train=horizon_train)\n",
+    "    \n",
+    "    def set_observation_space(self,\n",
+    "                            shape: tuple, # shape of the dataloader features\n",
+    "                            low: Union[np.ndarray, float] = -np.inf, # lower bound of the observation space\n",
+    "                            high: Union[np.ndarray, float] = np.inf, # upper bound of the observation space\n",
+    "                            samples_dim_included = True # whether the first dimension of the shape input is the number of samples\n",
+    "                            ) -> None:\n",
+    "        \n",
+    "        '''\n",
+    "        Set the observation space of the environment.\n",
+    "        This is a standard function for simple observation spaces. For more complex observation spaces,\n",
+    "        this function should be overwritten. Note that it is assumped that the first dimension\n",
+    "        is n_samples that is not relevant for the observation space.\n",
+    "\n",
+    "        '''\n",
+    "\n",
+    "        # To handle cases when no external information is available (e.g., parametric NV)\n",
+    "        \n",
+    "        if shape is None:\n",
+    "            self.observation_space = None\n",
+    "\n",
+    "        else:\n",
+    "            if not isinstance(shape, tuple):\n",
+    "                raise ValueError(\"Shape must be a tuple.\")\n",
+    "            \n",
+    "            if samples_dim_included:\n",
+    "                shape = shape[1:] # assumed that the first dimension is the number of samples\n",
+    "\n",
+    "            self.observation_space = gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32)\n",
+    "\n",
+    "    def set_action_space(self,\n",
+    "                            shape: tuple, # shape of the dataloader target\n",
+    "                            low: Union[np.ndarray, float] = -np.inf, # lower bound of the observation space\n",
+    "                            high: Union[np.ndarray, float] = np.inf, # upper bound of the observation space\n",
+    "                            samples_dim_included = True # whether the first dimension of the shape input is the number of samples\n",
+    "                            ) -> None:\n",
+    "        \n",
+    "        '''\n",
+    "        Set the action space of the environment.\n",
+    "        This is a standard function for simple action spaces. For more complex action spaces,\n",
+    "        this function should be overwritten. Note that it is assumped that the first dimension\n",
+    "        is n_samples that is not relevant for the action space.\n",
+    "        '''\n",
+    "\n",
+    "        if not isinstance(shape, tuple):\n",
+    "            raise ValueError(\"Shape must be a tuple.\")\n",
+    "        \n",
+    "        if samples_dim_included:\n",
+    "            shape = shape[1:] # assumed that the first dimension is the number of samples\n",
+    "\n",
+    "        self.action_space = gym.spaces.Box(low=low, high=high, shape=shape, dtype=np.float32)\n",
+    "    \n",
+    "    def get_observation(self):\n",
+    "        \n",
+    "        \"\"\"\n",
+    "        Return the current observation. This function is for the online learning case it will return only the state,\n",
+    "        this function should be overwritten.\n",
+    "\n",
+    "        \"\"\"\n",
+    "\n",
+    "        X_item,  = self.dataloader[self.index]\n",
+    "\n",
+    "        return X_item\n",
+    "    \n",
+    "    def get_demand_response(self, action):\n",
+    "            \n",
+    "            \"\"\"\n",
+    "            Return the demand and the reward for the current action. This function should be overwritten.\n",
+    "            TODO: add the tuple call to the pricing dataloader\n",
+    "            \"\"\"\n",
+    "            Y_item, epsilon = self.dataloader[self.index, action]\n",
+    "            return Y_item, epsilon\n",
+    "    def reset(self,\n",
+    "        start_index: int | str = None, # index to start from\n",
+    "        state: np.ndarray = None # initial state\n",
+    "        ) -> Tuple[np.ndarray, bool]:\n",
+    "\n",
+    "        \"\"\"\n",
+    "        Reset function for the Newsvendor problem. It will return the first observation and demand.\n",
+    "        For val and test modes, it will by default reset to 0, while for the train mode it depends\n",
+    "        on the paramter \"horizon_train\" whether a random point in the training data is selected or 0\n",
+    "        \"\"\"\n",
+    "\n",
+    "        truncated = self.reset_index(start_index)\n",
+    "\n",
+    "\n",
+    "\n",
+    "        observation, self.demand = self.get_observation()\n",
+    "        \n",
+    "        return observation\n"
+   ]
   }
  ],
  "metadata": {
+  "kernelspec": {
+   "display_name": "ddop",
+   "language": "python",
+   "name": "python3"
+  },
   "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.0"
   }
  },
  "nbformat": 4,
diff --git a/nbs/20_environments/22_envs_pricing/20_dynamic_pricing_env.ipynb b/nbs/20_environments/22_envs_pricing/20_dynamic_pricing_env.ipynb
new file mode 100644
index 0000000..e8a1b0a
--- /dev/null
+++ b/nbs/20_environments/22_envs_pricing/20_dynamic_pricing_env.ipynb
@@ -0,0 +1,124 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Dynamic Pricing Env\n",
+    "\n",
+    "> Static dynamic pricing environment where a decision only affects the next period "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "from nbdev.showdoc import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "from abc import ABC, abstractmethod\n",
+    "from typing import Union, Tuple, Literal\n",
+    "\n",
+    "from ddopai.utils import Parameter, MDPInfo\n",
+    "from ddopai.dataloaders.base import BaseDataLoader\n",
+    "from ddopai.loss_functions import pinball_loss, quantile_loss\n",
+    "from ddopai.envs.pricing.base import BasePricingEnv\n",
+    "\n",
+    "import gymnasium as gym\n",
+    "\n",
+    "import numpy as np\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | export\n",
+    "class DynamicPricingEnv(BasePricingEnv):\n",
+    "    \"\"\"\n",
+    "    Class implementing the dynamic pricing and learning problem, working for the single- and multi-item case.\n",
+    "    If alpha and beta are scalars and they are multiple SKUs, then the same parameters are used for all SKUs.\n",
+    "    If alpha and beta are arrays, then they should have the same length as the number of SKUs.\n",
+    "    Num_SKUs can be set as parameter or inferrred from the DataLoader.\n",
+    "    \"\"\"\n",
+    "    def __init__(self,\n",
+    "        alpha: Union[np.ndarray, Parameter, int, float] = 1.0, # market size per SKUs\n",
+    "        beta: Union[np.ndarray, Parameter, int, float] = 0.5, # price elasticity per SKUs\n",
+    "        p_bound_low: Union[np.ndarray, Parameter, int, float] = 0.0, # lower price bound per SKUs\n",
+    "        p_bound_high: Union[np.ndarray, Parameter, int, float] = 1.0, # upper price bound per SKUs\n",
+    "        dataloader: BaseDataLoader = None, # dataloader TODO: replace with pricing dataloader\n",
+    "        num_SKUs: Union[np.ndarray, Parameter, int, float] = None, # number of SKUs\n",
+    "        gamma: float = 1, # discount factor\n",
+    "        horizon_train: int | str = \"use_all_data\", # if \"use_all_data\" then horizon is inferred from the DataLoader\n",
+    "        postprocessors: list[object] | None = None, # default is empty list \n",
+    "        mode: str = \"online\", # TODO: add online to relevant modes\n",
+    "        return_truncation: str = True # TODO:Why is this a string?\n",
+    "        ) -> None:\n",
+    "\n",
+    "        self.print=False\n",
+    "        \n",
+    "        num_SKUs = dataloader.num_units if num_SKUs is None else num_SKUs\n",
+    "        \n",
+    "        if not isinstance(num_SKUs, int):\n",
+    "            raise ValueError(\"num_SKUs should be an integer.\")\n",
+    "        \n",
+    "        self.set_param(\"num_SKUs\", num_SKUs, shape=(1,), new=True)\n",
+    "        \n",
+    "        self.set_param(\"p_bound_low\", p_bound_low, shape=(num_SKUs,), new=True)\n",
+    "        self.set_param(\"p_bound_high\", p_bound_high, shape=(num_SKUs,), new=True)\n",
+    "        \n",
+    "        self.set_observation_space(dataloader.X_shape)\n",
+    "        self.set_action_space(dataloader.Y_shape, low = self.p_bound_low, high = self.p_bound_high)\n",
+    "        \n",
+    "        mdp_info = MDPInfo(self.observation_space, self.action_space, gamma=gamma, horizon=horizon_train)\n",
+    "        \n",
+    "        super().__init__(mdp_info=mdp_info,\n",
+    "                         postprocessors=postprocessors,\n",
+    "                         mode=mode, return_truncation=return_truncation,\n",
+    "                         alpha=alpha,\n",
+    "                         beta=beta,\n",
+    "                         dataloader=dataloader,\n",
+    "                         horizon_train=horizon_train)\n",
+    "        \n",
+    "    def step_(self,\n",
+    "              action: np.ndarray # prices)\n",
+    "                ) -> Tuple[np.ndarray, float, bool, bool, dict]:\n",
+    "        return observation, reward, terminated, truncated, info"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ddop",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}