fixed error in normalization of oos SKUs

d3group · Sep 4, 2024 · 7289171 · 7289171
1 parent cf9de2f
commit 7289171
Show file tree

Hide file tree

Showing 2 changed files with 64 additions and 62 deletions.
diff --git a/ddopnew/dataloaders/tabular.py b/ddopnew/dataloaders/tabular.py
@@ -864,12 +864,13 @@ def normalize_demand_and_features_out_of_sample(self,
                 # Normalize demand targets
                 if self.demand_normalization != 'no_normalization':
                     # Normalizing per SKU on time dimension
+
                     self.scaler_out_of_sample_test_demand.fit(self.demand_out_of_sample_test[:self.train_index_end+1])
-                    transformed_demand = self.scaler_out_of_sample_test_demand.transform(self.demand_lag_out_of_sample_test)
+                    transformed_demand = self.scaler_out_of_sample_test_demand.transform(self.demand_out_of_sample_test)
                     self.demand_out_of_sample_test.iloc[:,:] = transformed_demand
 
                     self.scaler_out_of_sample_val_demand.fit(self.demand_out_of_sample_val[:self.train_index_end+1])
-                    transformed_demand = self.scaler_out_of_sample_val_demand.transform(self.demand_lag_out_of_sample_val)
+                    transformed_demand = self.scaler_out_of_sample_val_demand.transform(self.demand_out_of_sample_val)
                     self.demand_out_of_sample_val.iloc[:,:] = transformed_demand
 
                 # Set unit size for demand targets
@@ -932,12 +933,6 @@ def normalize_demand_and_features_out_of_sample(self,
 
                 self.normalized_out_of_sample_SKUs = True
 
-                print(self.demand_out_of_sample_test)
-                print(self.demand_out_of_sample_val)
-
-                print(self.demand_lag_out_of_sample_test)
-                print(self.demand_lag_out_of_sample_val)
-
             else:
                 raise NotImplementedError('Training data can only normalized during initialization - later normlization not implemented yet')
 

diff --git a/nbs/10_dataloaders/12_tabular_dataloaders.ipynb b/nbs/10_dataloaders/12_tabular_dataloaders.ipynb
@@ -588,7 +588,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "sample: [ 0.48934242 -1.5249851 ] [-4.20898833]\n",
+      "sample: [-0.93587536  0.27089274] [-0.56211375]\n",
       "sample shape Y: (1,)\n",
       "length: 100\n"
      ]
@@ -627,28 +627,28 @@
       "length train: 6 length val: 2 length test: 2\n",
       "\n",
       "### Data from train set ###\n",
-      "idx: 0 data: [-0.03919358 -1.81524105] [-5.45285373]\n",
-      "idx: 1 data: [-0.33238759 -0.25239502] [-2.28257421]\n",
-      "idx: 2 data: [-0.28088596  1.04119928] [2.99245932]\n",
-      "idx: 3 data: [-1.41942608 -0.14294466] [-3.4857788]\n",
-      "idx: 4 data: [-0.94715851 -0.90926992] [-3.88762596]\n",
-      "idx: 5 data: [ 0.83067309 -1.60634229] [-2.1943678]\n",
+      "idx: 0 data: [0.76296415 0.56616799] [2.60420305]\n",
+      "idx: 1 data: [-0.49101782  0.87470328] [1.75651234]\n",
+      "idx: 2 data: [-1.06513362 -1.69496869] [-8.10126442]\n",
+      "idx: 3 data: [ 1.00620518 -1.75971785] [-2.53973768]\n",
+      "idx: 4 data: [ 0.30594514 -0.94143423] [-0.2292405]\n",
+      "idx: 5 data: [-0.9155726   0.20242375] [-2.83371802]\n",
       "\n",
       "### Data from val set ###\n",
-      "idx: 0 data: [0.31728831 0.1915907 ] [0.61714501]\n",
-      "idx: 1 data: [0.19605744 0.15569143] [-0.31029128]\n",
+      "idx: 0 data: [-1.96882192 -1.13342709] [-6.54580775]\n",
+      "idx: 1 data: [-0.62914049  0.1169921 ] [-1.07399925]\n",
       "\n",
       "### Data from test set ###\n",
-      "idx: 0 data: [ 0.60736842 -1.62492312] [-1.52729382]\n",
-      "idx: 1 data: [-0.33198421  0.91780232] [2.45557787]\n",
+      "idx: 0 data: [1.51584383 0.34358182] [5.4066298]\n",
+      "idx: 1 data: [-0.34475418 -1.16769798] [-4.39100845]\n",
       "\n",
       "### Data from train set again ###\n",
-      "idx: 0 data: [-0.03919358 -1.81524105] [-5.45285373]\n",
-      "idx: 1 data: [-0.33238759 -0.25239502] [-2.28257421]\n",
-      "idx: 2 data: [-0.28088596  1.04119928] [2.99245932]\n",
-      "idx: 3 data: [-1.41942608 -0.14294466] [-3.4857788]\n",
-      "idx: 4 data: [-0.94715851 -0.90926992] [-3.88762596]\n",
-      "idx: 5 data: [ 0.83067309 -1.60634229] [-2.1943678]\n"
+      "idx: 0 data: [0.76296415 0.56616799] [2.60420305]\n",
+      "idx: 1 data: [-0.49101782  0.87470328] [1.75651234]\n",
+      "idx: 2 data: [-1.06513362 -1.69496869] [-8.10126442]\n",
+      "idx: 3 data: [ 1.00620518 -1.75971785] [-2.53973768]\n",
+      "idx: 4 data: [ 0.30594514 -0.94143423] [-0.2292405]\n",
+      "idx: 5 data: [-0.9155726   0.20242375] [-2.83371802]\n"
      ]
     }
    ],
@@ -702,8 +702,8 @@
     {
      "data": {
       "text/plain": [
-       "array([[ 0.60736842, -1.62492312],\n",
-       "       [-0.33198421,  0.91780232]])"
+       "array([[ 1.51584383,  0.34358182],\n",
+       "       [-0.34475418, -1.16769798]])"
       ]
      },
      "execution_count": null,
@@ -727,8 +727,8 @@
     {
      "data": {
       "text/plain": [
-       "array([[-1.52729382],\n",
-       "       [ 2.45557787]])"
+       "array([[ 5.4066298 ],\n",
+       "       [-4.39100845]])"
       ]
      },
      "execution_count": null,
@@ -764,36 +764,36 @@
       "length train: 4 length val: 2 length test: 2\n",
       "\n",
       "### Data from train set ###\n",
-      "idx: 0 data: [[ 1.31636985  0.7897913  -0.75019741]\n",
-      " [-1.0363052  -0.33563099  5.30310363]] [-1.73725544]\n",
-      "idx: 1 data: [[-1.0363052  -0.33563099  5.30310363]\n",
-      " [ 0.61748301  0.12942096 -1.73725544]] [0.75991095]\n",
-      "idx: 2 data: [[ 0.61748301  0.12942096 -1.73725544]\n",
-      " [ 0.97507757  0.60012032  0.75991095]] [3.60080094]\n",
-      "idx: 3 data: [[0.97507757 0.60012032 0.75991095]\n",
-      " [0.05424665 0.05414227 3.60080094]] [1.06903634]\n",
+      "idx: 0 data: [[-0.19678927 -0.21825206  2.22286561]\n",
+      " [-0.08527385 -0.67115734 -0.15445037]] [-1.29696916]\n",
+      "idx: 1 data: [[-0.08527385 -0.67115734 -0.15445037]\n",
+      " [ 0.62316456  1.42684226 -1.29696916]] [7.0530133]\n",
+      "idx: 2 data: [[ 0.62316456  1.42684226 -1.29696916]\n",
+      " [-0.01915829  0.22782379  7.0530133 ]] [-0.15306061]\n",
+      "idx: 3 data: [[-0.01915829  0.22782379  7.0530133 ]\n",
+      " [-0.72957298  1.26745062 -0.15306061]] [3.34648077]\n",
       "\n",
       "### Data from val set ###\n",
-      "idx: 0 data: [[ 0.05424665  0.05414227  3.60080094]\n",
-      " [-0.19670518  2.25039121  1.06903634]] [6.53359576]\n",
-      "idx: 1 data: [[-0.19670518  2.25039121  1.06903634]\n",
-      " [-1.84005742 -0.24281547  6.53359576]] [-3.96123686]\n",
+      "idx: 0 data: [[-0.72957298  1.26745062 -0.15306061]\n",
+      " [ 1.14548309 -0.42060564  3.34648077]] [0.55794246]\n",
+      "idx: 1 data: [[ 1.14548309 -0.42060564  3.34648077]\n",
+      " [-0.11049003 -0.14022136  0.55794246]] [-0.38509046]\n",
       "\n",
       "### Data from test set ###\n",
-      "idx: 0 data: [[-1.84005742 -0.24281547  6.53359576]\n",
-      " [ 0.53974671  1.48055778 -3.96123686]] [5.10164607]\n",
-      "idx: 1 data: [[ 0.53974671  1.48055778 -3.96123686]\n",
-      " [ 0.0885949   1.45853039  5.10164607]] [5.47333133]\n",
+      "idx: 0 data: [[-0.11049003 -0.14022136  0.55794246]\n",
+      " [ 0.95929229  0.74463911 -0.38509046]] [4.0910728]\n",
+      "idx: 1 data: [[ 0.95929229  0.74463911 -0.38509046]\n",
+      " [ 1.20704249 -0.41622404  4.0910728 ]] [0.82957573]\n",
       "\n",
       "### Data from train set again ###\n",
-      "idx: 0 data: [[ 1.31636985  0.7897913  -0.75019741]\n",
-      " [-1.0363052  -0.33563099  5.30310363]] [-1.73725544]\n",
-      "idx: 1 data: [[-1.0363052  -0.33563099  5.30310363]\n",
-      " [ 0.61748301  0.12942096 -1.73725544]] [0.75991095]\n",
-      "idx: 2 data: [[ 0.61748301  0.12942096 -1.73725544]\n",
-      " [ 0.97507757  0.60012032  0.75991095]] [3.60080094]\n",
-      "idx: 3 data: [[0.97507757 0.60012032 0.75991095]\n",
-      " [0.05424665 0.05414227 3.60080094]] [1.06903634]\n"
+      "idx: 0 data: [[-0.19678927 -0.21825206  2.22286561]\n",
+      " [-0.08527385 -0.67115734 -0.15445037]] [-1.29696916]\n",
+      "idx: 1 data: [[-0.08527385 -0.67115734 -0.15445037]\n",
+      " [ 0.62316456  1.42684226 -1.29696916]] [7.0530133]\n",
+      "idx: 2 data: [[ 0.62316456  1.42684226 -1.29696916]\n",
+      " [-0.01915829  0.22782379  7.0530133 ]] [-0.15306061]\n",
+      "idx: 3 data: [[-0.01915829  0.22782379  7.0530133 ]\n",
+      " [-0.72957298  1.26745062 -0.15306061]] [3.34648077]\n"
      ]
     }
    ],
@@ -845,7 +845,19 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'BaseDataLoader' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[6], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m#| export\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mMultiShapeLoader\u001b[39;00m(\u001b[43mBaseDataLoader\u001b[49m):\n\u001b[1;32m      4\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;124;03m    A class designed for comlex datasets with mutlipe feature types. The class is more\u001b[39;00m\n\u001b[1;32m      6\u001b[0m \u001b[38;5;124;03m    memory-efficient than the XYDataLoader, as it separate the storeage of SKU-specific\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[38;5;124;03m    specific SKU.\u001b[39;00m\n\u001b[1;32m     12\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m     14\u001b[0m     \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m     15\u001b[0m         \u001b[38;5;66;03m# mandatory data\u001b[39;00m\n\u001b[1;32m     16\u001b[0m         demand: pd\u001b[38;5;241m.\u001b[39mDataFrame, \u001b[38;5;66;03m# Demand data of shape time x SKU\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     40\u001b[0m         provide_additional_target: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, \u001b[38;5;66;03m# follows ICL convention by providing actual demand to token, with the last token receiving 0\u001b[39;00m\n\u001b[1;32m     41\u001b[0m     ):\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'BaseDataLoader' is not defined"
+     ]
+    }
+   ],
    "source": [
     "#| export\n",
     "class MultiShapeLoader(BaseDataLoader):\n",
@@ -1444,12 +1456,13 @@
     "                # Normalize demand targets\n",
     "                if self.demand_normalization != 'no_normalization':\n",
     "                    # Normalizing per SKU on time dimension\n",
+    "\n",
     "                    self.scaler_out_of_sample_test_demand.fit(self.demand_out_of_sample_test[:self.train_index_end+1])\n",
-    "                    transformed_demand = self.scaler_out_of_sample_test_demand.transform(self.demand_lag_out_of_sample_test)\n",
+    "                    transformed_demand = self.scaler_out_of_sample_test_demand.transform(self.demand_out_of_sample_test)\n",
     "                    self.demand_out_of_sample_test.iloc[:,:] = transformed_demand\n",
     "\n",
     "                    self.scaler_out_of_sample_val_demand.fit(self.demand_out_of_sample_val[:self.train_index_end+1])\n",
-    "                    transformed_demand = self.scaler_out_of_sample_val_demand.transform(self.demand_lag_out_of_sample_val)\n",
+    "                    transformed_demand = self.scaler_out_of_sample_val_demand.transform(self.demand_out_of_sample_val)\n",
     "                    self.demand_out_of_sample_val.iloc[:,:] = transformed_demand\n",
     "                \n",
     "                # Set unit size for demand targets\n",
@@ -1512,12 +1525,6 @@
     "            \n",
     "                self.normalized_out_of_sample_SKUs = True\n",
     "\n",
-    "                print(self.demand_out_of_sample_test)\n",
-    "                print(self.demand_out_of_sample_val)\n",
-    "\n",
-    "                print(self.demand_lag_out_of_sample_test)\n",
-    "                print(self.demand_lag_out_of_sample_val)\n",
-    "\n",
     "            else:\n",
     "                raise NotImplementedError('Training data can only normalized during initialization - later normlization not implemented yet')\n",
     "\n",