Added base online dataloader and pricing env

d3group · Nov 26, 2024 · 64e2176 · 64e2176
1 parent c714d85
commit 64e2176
Show file tree

Hide file tree

Showing 7 changed files with 752 additions and 1,313 deletions.
diff --git a/ddopai/_modidx.py b/ddopai/_modidx.py
diff --git a/ddopai/dataloaders/tabular.py b/ddopai/dataloaders/tabular.py
diff --git a/ddopai/envs/pricing/base.py b/ddopai/envs/pricing/base.py
@@ -107,18 +107,10 @@ def get_observation(self):
 
         """
 
-        X_item,  = self.dataloader[self.index]
+        X_item, Y_item  = self.dataloader[self.index]
 
-        return X_item
+        return X_item, Y_item
 
-    def get_demand_response(self, action):
-
-            """
-            Return the demand and the reward for the current action. This function should be overwritten.
-            TODO: add the tuple call to the pricing dataloader
-            """
-            Y_item, epsilon = self.dataloader[self.index, action]
-            return Y_item, epsilon
     def reset(self,
         start_index: int | str = None, # index to start from
         state: np.ndarray = None # initial state

diff --git a/ddopai/envs/pricing/dynamic.py b/ddopai/envs/pricing/dynamic.py
@@ -37,7 +37,7 @@ def __init__(self,
         gamma: float = 1, # discount factor
         horizon_train: int | str = "use_all_data", # if "use_all_data" then horizon is inferred from the DataLoader
         postprocessors: list[object] | None = None, # default is empty list 
-        mode: str = "online", # TODO: add online to relevant modes
+        mode: str = "train", 
         return_truncation: str = True # TODO:Why is this a string?
         ) -> None:
 
@@ -69,4 +69,45 @@ def __init__(self,
     def step_(self,
               action: np.ndarray # prices)
                 ) -> Tuple[np.ndarray, float, bool, bool, dict]:
-        return observation, reward, terminated, truncated, info
+        """
+        Step function implementing the dynamic pricing and learning problem. Note that the dataloader will return an observation and a demand function.
+        """
+        if action.ndim == 2 and action.shape[0] == 1:
+            action = np.squeeze(action, axis=0)
+
+
+
+        terminated = False
+        observation, reward_function = self.get_observation() 
+        reward_function_call = np.vectorize(lambda reward_function, observation, action: reward_function(observation, action))
+        demand_per_SKU = reward_function_call(reward_function, observation, action)
+
+        reward_per_SKU = demand_per_SKU * action
+        reward = np.sum(reward_per_SKU)
+        info = dict(
+             demand=demand_per_SKU.copy(),
+             action=action.copy(),
+            reward_per_SKU=reward_per_SKU.copy()
+        )
+
+        truncated = self.set_index()
+
+        if truncated:
+
+            if self.mode == "test" or self.mode == "val":
+                observation, self.demand = None, None
+            else:
+                observation, self.demand = self.get_observation()
+
+            return observation, reward, terminated, truncated, info
+
+        else:
+
+
+            if self.print:
+                print("next_period:", self.index+1)
+                print("next observation:", observation)
+                print("next demand:", demand_per_SKU)
+                time.sleep(3)
+
+            return observation, reward, terminated, truncated, info