Skip to content

Commit

Permalink
fixed error in normalization of oos SKUs
Browse files Browse the repository at this point in the history
  • Loading branch information
majoma7 committed Sep 4, 2024
1 parent cf9de2f commit 7289171
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 62 deletions.
11 changes: 3 additions & 8 deletions ddopnew/dataloaders/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,12 +864,13 @@ def normalize_demand_and_features_out_of_sample(self,
# Normalize demand targets
if self.demand_normalization != 'no_normalization':
# Normalizing per SKU on time dimension

self.scaler_out_of_sample_test_demand.fit(self.demand_out_of_sample_test[:self.train_index_end+1])
transformed_demand = self.scaler_out_of_sample_test_demand.transform(self.demand_lag_out_of_sample_test)
transformed_demand = self.scaler_out_of_sample_test_demand.transform(self.demand_out_of_sample_test)
self.demand_out_of_sample_test.iloc[:,:] = transformed_demand

self.scaler_out_of_sample_val_demand.fit(self.demand_out_of_sample_val[:self.train_index_end+1])
transformed_demand = self.scaler_out_of_sample_val_demand.transform(self.demand_lag_out_of_sample_val)
transformed_demand = self.scaler_out_of_sample_val_demand.transform(self.demand_out_of_sample_val)
self.demand_out_of_sample_val.iloc[:,:] = transformed_demand

# Set unit size for demand targets
Expand Down Expand Up @@ -932,12 +933,6 @@ def normalize_demand_and_features_out_of_sample(self,

self.normalized_out_of_sample_SKUs = True

print(self.demand_out_of_sample_test)
print(self.demand_out_of_sample_val)

print(self.demand_lag_out_of_sample_test)
print(self.demand_lag_out_of_sample_val)

else:
raise NotImplementedError('Training data can only normalized during initialization - later normlization not implemented yet')

Expand Down
115 changes: 61 additions & 54 deletions nbs/10_dataloaders/12_tabular_dataloaders.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"sample: [ 0.48934242 -1.5249851 ] [-4.20898833]\n",
"sample: [-0.93587536 0.27089274] [-0.56211375]\n",
"sample shape Y: (1,)\n",
"length: 100\n"
]
Expand Down Expand Up @@ -627,28 +627,28 @@
"length train: 6 length val: 2 length test: 2\n",
"\n",
"### Data from train set ###\n",
"idx: 0 data: [-0.03919358 -1.81524105] [-5.45285373]\n",
"idx: 1 data: [-0.33238759 -0.25239502] [-2.28257421]\n",
"idx: 2 data: [-0.28088596 1.04119928] [2.99245932]\n",
"idx: 3 data: [-1.41942608 -0.14294466] [-3.4857788]\n",
"idx: 4 data: [-0.94715851 -0.90926992] [-3.88762596]\n",
"idx: 5 data: [ 0.83067309 -1.60634229] [-2.1943678]\n",
"idx: 0 data: [0.76296415 0.56616799] [2.60420305]\n",
"idx: 1 data: [-0.49101782 0.87470328] [1.75651234]\n",
"idx: 2 data: [-1.06513362 -1.69496869] [-8.10126442]\n",
"idx: 3 data: [ 1.00620518 -1.75971785] [-2.53973768]\n",
"idx: 4 data: [ 0.30594514 -0.94143423] [-0.2292405]\n",
"idx: 5 data: [-0.9155726 0.20242375] [-2.83371802]\n",
"\n",
"### Data from val set ###\n",
"idx: 0 data: [0.31728831 0.1915907 ] [0.61714501]\n",
"idx: 1 data: [0.19605744 0.15569143] [-0.31029128]\n",
"idx: 0 data: [-1.96882192 -1.13342709] [-6.54580775]\n",
"idx: 1 data: [-0.62914049 0.1169921 ] [-1.07399925]\n",
"\n",
"### Data from test set ###\n",
"idx: 0 data: [ 0.60736842 -1.62492312] [-1.52729382]\n",
"idx: 1 data: [-0.33198421 0.91780232] [2.45557787]\n",
"idx: 0 data: [1.51584383 0.34358182] [5.4066298]\n",
"idx: 1 data: [-0.34475418 -1.16769798] [-4.39100845]\n",
"\n",
"### Data from train set again ###\n",
"idx: 0 data: [-0.03919358 -1.81524105] [-5.45285373]\n",
"idx: 1 data: [-0.33238759 -0.25239502] [-2.28257421]\n",
"idx: 2 data: [-0.28088596 1.04119928] [2.99245932]\n",
"idx: 3 data: [-1.41942608 -0.14294466] [-3.4857788]\n",
"idx: 4 data: [-0.94715851 -0.90926992] [-3.88762596]\n",
"idx: 5 data: [ 0.83067309 -1.60634229] [-2.1943678]\n"
"idx: 0 data: [0.76296415 0.56616799] [2.60420305]\n",
"idx: 1 data: [-0.49101782 0.87470328] [1.75651234]\n",
"idx: 2 data: [-1.06513362 -1.69496869] [-8.10126442]\n",
"idx: 3 data: [ 1.00620518 -1.75971785] [-2.53973768]\n",
"idx: 4 data: [ 0.30594514 -0.94143423] [-0.2292405]\n",
"idx: 5 data: [-0.9155726 0.20242375] [-2.83371802]\n"
]
}
],
Expand Down Expand Up @@ -702,8 +702,8 @@
{
"data": {
"text/plain": [
"array([[ 0.60736842, -1.62492312],\n",
" [-0.33198421, 0.91780232]])"
"array([[ 1.51584383, 0.34358182],\n",
" [-0.34475418, -1.16769798]])"
]
},
"execution_count": null,
Expand All @@ -727,8 +727,8 @@
{
"data": {
"text/plain": [
"array([[-1.52729382],\n",
" [ 2.45557787]])"
"array([[ 5.4066298 ],\n",
" [-4.39100845]])"
]
},
"execution_count": null,
Expand Down Expand Up @@ -764,36 +764,36 @@
"length train: 4 length val: 2 length test: 2\n",
"\n",
"### Data from train set ###\n",
"idx: 0 data: [[ 1.31636985 0.7897913 -0.75019741]\n",
" [-1.0363052 -0.33563099 5.30310363]] [-1.73725544]\n",
"idx: 1 data: [[-1.0363052 -0.33563099 5.30310363]\n",
" [ 0.61748301 0.12942096 -1.73725544]] [0.75991095]\n",
"idx: 2 data: [[ 0.61748301 0.12942096 -1.73725544]\n",
" [ 0.97507757 0.60012032 0.75991095]] [3.60080094]\n",
"idx: 3 data: [[0.97507757 0.60012032 0.75991095]\n",
" [0.05424665 0.05414227 3.60080094]] [1.06903634]\n",
"idx: 0 data: [[-0.19678927 -0.21825206 2.22286561]\n",
" [-0.08527385 -0.67115734 -0.15445037]] [-1.29696916]\n",
"idx: 1 data: [[-0.08527385 -0.67115734 -0.15445037]\n",
" [ 0.62316456 1.42684226 -1.29696916]] [7.0530133]\n",
"idx: 2 data: [[ 0.62316456 1.42684226 -1.29696916]\n",
" [-0.01915829 0.22782379 7.0530133 ]] [-0.15306061]\n",
"idx: 3 data: [[-0.01915829 0.22782379 7.0530133 ]\n",
" [-0.72957298 1.26745062 -0.15306061]] [3.34648077]\n",
"\n",
"### Data from val set ###\n",
"idx: 0 data: [[ 0.05424665 0.05414227 3.60080094]\n",
" [-0.19670518 2.25039121 1.06903634]] [6.53359576]\n",
"idx: 1 data: [[-0.19670518 2.25039121 1.06903634]\n",
" [-1.84005742 -0.24281547 6.53359576]] [-3.96123686]\n",
"idx: 0 data: [[-0.72957298 1.26745062 -0.15306061]\n",
" [ 1.14548309 -0.42060564 3.34648077]] [0.55794246]\n",
"idx: 1 data: [[ 1.14548309 -0.42060564 3.34648077]\n",
" [-0.11049003 -0.14022136 0.55794246]] [-0.38509046]\n",
"\n",
"### Data from test set ###\n",
"idx: 0 data: [[-1.84005742 -0.24281547 6.53359576]\n",
" [ 0.53974671 1.48055778 -3.96123686]] [5.10164607]\n",
"idx: 1 data: [[ 0.53974671 1.48055778 -3.96123686]\n",
" [ 0.0885949 1.45853039 5.10164607]] [5.47333133]\n",
"idx: 0 data: [[-0.11049003 -0.14022136 0.55794246]\n",
" [ 0.95929229 0.74463911 -0.38509046]] [4.0910728]\n",
"idx: 1 data: [[ 0.95929229 0.74463911 -0.38509046]\n",
" [ 1.20704249 -0.41622404 4.0910728 ]] [0.82957573]\n",
"\n",
"### Data from train set again ###\n",
"idx: 0 data: [[ 1.31636985 0.7897913 -0.75019741]\n",
" [-1.0363052 -0.33563099 5.30310363]] [-1.73725544]\n",
"idx: 1 data: [[-1.0363052 -0.33563099 5.30310363]\n",
" [ 0.61748301 0.12942096 -1.73725544]] [0.75991095]\n",
"idx: 2 data: [[ 0.61748301 0.12942096 -1.73725544]\n",
" [ 0.97507757 0.60012032 0.75991095]] [3.60080094]\n",
"idx: 3 data: [[0.97507757 0.60012032 0.75991095]\n",
" [0.05424665 0.05414227 3.60080094]] [1.06903634]\n"
"idx: 0 data: [[-0.19678927 -0.21825206 2.22286561]\n",
" [-0.08527385 -0.67115734 -0.15445037]] [-1.29696916]\n",
"idx: 1 data: [[-0.08527385 -0.67115734 -0.15445037]\n",
" [ 0.62316456 1.42684226 -1.29696916]] [7.0530133]\n",
"idx: 2 data: [[ 0.62316456 1.42684226 -1.29696916]\n",
" [-0.01915829 0.22782379 7.0530133 ]] [-0.15306061]\n",
"idx: 3 data: [[-0.01915829 0.22782379 7.0530133 ]\n",
" [-0.72957298 1.26745062 -0.15306061]] [3.34648077]\n"
]
}
],
Expand Down Expand Up @@ -845,7 +845,19 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "NameError",
"evalue": "name 'BaseDataLoader' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[6], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#| export\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mMultiShapeLoader\u001b[39;00m(\u001b[43mBaseDataLoader\u001b[49m):\n\u001b[1;32m 4\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;03m A class designed for comlex datasets with mutlipe feature types. The class is more\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124;03m memory-efficient than the XYDataLoader, as it separate the storeage of SKU-specific\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;124;03m specific SKU.\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# mandatory data\u001b[39;00m\n\u001b[1;32m 16\u001b[0m demand: pd\u001b[38;5;241m.\u001b[39mDataFrame, \u001b[38;5;66;03m# Demand data of shape time x SKU\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 40\u001b[0m provide_additional_target: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, \u001b[38;5;66;03m# follows ICL convention by providing actual demand to token, with the last token receiving 0\u001b[39;00m\n\u001b[1;32m 41\u001b[0m ):\n",
"\u001b[0;31mNameError\u001b[0m: name 'BaseDataLoader' is not defined"
]
}
],
"source": [
"#| export\n",
"class MultiShapeLoader(BaseDataLoader):\n",
Expand Down Expand Up @@ -1444,12 +1456,13 @@
" # Normalize demand targets\n",
" if self.demand_normalization != 'no_normalization':\n",
" # Normalizing per SKU on time dimension\n",
"\n",
" self.scaler_out_of_sample_test_demand.fit(self.demand_out_of_sample_test[:self.train_index_end+1])\n",
" transformed_demand = self.scaler_out_of_sample_test_demand.transform(self.demand_lag_out_of_sample_test)\n",
" transformed_demand = self.scaler_out_of_sample_test_demand.transform(self.demand_out_of_sample_test)\n",
" self.demand_out_of_sample_test.iloc[:,:] = transformed_demand\n",
"\n",
" self.scaler_out_of_sample_val_demand.fit(self.demand_out_of_sample_val[:self.train_index_end+1])\n",
" transformed_demand = self.scaler_out_of_sample_val_demand.transform(self.demand_lag_out_of_sample_val)\n",
" transformed_demand = self.scaler_out_of_sample_val_demand.transform(self.demand_out_of_sample_val)\n",
" self.demand_out_of_sample_val.iloc[:,:] = transformed_demand\n",
" \n",
" # Set unit size for demand targets\n",
Expand Down Expand Up @@ -1512,12 +1525,6 @@
" \n",
" self.normalized_out_of_sample_SKUs = True\n",
"\n",
" print(self.demand_out_of_sample_test)\n",
" print(self.demand_out_of_sample_val)\n",
"\n",
" print(self.demand_lag_out_of_sample_test)\n",
" print(self.demand_lag_out_of_sample_val)\n",
"\n",
" else:\n",
" raise NotImplementedError('Training data can only normalized during initialization - later normlization not implemented yet')\n",
"\n",
Expand Down

0 comments on commit 7289171

Please sign in to comment.