Skip to content

Commit

Permalink
Added base online dataloader and pricing env
Browse files Browse the repository at this point in the history
  • Loading branch information
miTTimmiTTim committed Nov 26, 2024
1 parent c714d85 commit 64e2176
Show file tree
Hide file tree
Showing 7 changed files with 752 additions and 1,313 deletions.
106 changes: 27 additions & 79 deletions ddopai/_modidx.py

Large diffs are not rendered by default.

1,448 changes: 238 additions & 1,210 deletions ddopai/dataloaders/tabular.py

Large diffs are not rendered by default.

12 changes: 2 additions & 10 deletions ddopai/envs/pricing/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,18 +107,10 @@ def get_observation(self):
"""

X_item, = self.dataloader[self.index]
X_item, Y_item = self.dataloader[self.index]

return X_item
return X_item, Y_item

def get_demand_response(self, action):

"""
Return the demand and the reward for the current action. This function should be overwritten.
TODO: add the tuple call to the pricing dataloader
"""
Y_item, epsilon = self.dataloader[self.index, action]
return Y_item, epsilon
def reset(self,
start_index: int | str = None, # index to start from
state: np.ndarray = None # initial state
Expand Down
45 changes: 43 additions & 2 deletions ddopai/envs/pricing/dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(self,
gamma: float = 1, # discount factor
horizon_train: int | str = "use_all_data", # if "use_all_data" then horizon is inferred from the DataLoader
postprocessors: list[object] | None = None, # default is empty list
mode: str = "online", # TODO: add online to relevant modes
mode: str = "train",
return_truncation: str = True # TODO:Why is this a string?
) -> None:

Expand Down Expand Up @@ -69,4 +69,45 @@ def __init__(self,
def step_(self,
action: np.ndarray # prices)
) -> Tuple[np.ndarray, float, bool, bool, dict]:
return observation, reward, terminated, truncated, info
"""
Step function implementing the dynamic pricing and learning problem. Note that the dataloader will return an observation and a demand function.
"""
if action.ndim == 2 and action.shape[0] == 1:
action = np.squeeze(action, axis=0)



terminated = False
observation, reward_function = self.get_observation()
reward_function_call = np.vectorize(lambda reward_function, observation, action: reward_function(observation, action))
demand_per_SKU = reward_function_call(reward_function, observation, action)

reward_per_SKU = demand_per_SKU * action
reward = np.sum(reward_per_SKU)
info = dict(
demand=demand_per_SKU.copy(),
action=action.copy(),
reward_per_SKU=reward_per_SKU.copy()
)

truncated = self.set_index()

if truncated:

if self.mode == "test" or self.mode == "val":
observation, self.demand = None, None
else:
observation, self.demand = self.get_observation()

return observation, reward, terminated, truncated, info

else:


if self.print:
print("next_period:", self.index+1)
print("next observation:", observation)
print("next demand:", demand_per_SKU)
time.sleep(3)

return observation, reward, terminated, truncated, info
Loading

0 comments on commit 64e2176

Please sign in to comment.