diff --git a/README.md b/README.md
index 23d4907..70cf408 100644
--- a/README.md
+++ b/README.md
@@ -20,19 +20,33 @@ This notebook performs Exploratory Data Analysis (EDA) on the cleaned dataset. T
 - **Correlation Analysis**: Understanding relationships between numerical features.
 - **Outlier Detection**: Using box plots to identify potential outliers.
 
+
+### 3. `Feature_Engineering.ipynb`
+This notebook focuses on feature engineering to enhance the dataset for modeling. Key tasks include:
+- **Aggregate Features**: Creating new features such as total transaction amount, average transaction amount, transaction count, and standard deviation of transaction amounts for each customer.
+- **Time-Based Features**: Extracting features from the transaction timestamp (hour, day, month, year).
+- **Encoding Categorical Variables**: Applying Weight of Evidence (WOE) transformation to categorical features for better model interpretability.
+- **Handling Missing Values**: Implementing strategies for filling or removing missing values in the dataset.
+- **Normalization/Standardization**: Scaling numerical features to ensure they are on a similar scale, improving model performance.
+
+
 ## Requirements
 To run the notebooks, you will need:
 - Python 3.x
 - Pandas
+- NumPy
 - Matplotlib
 - Seaborn
+- Scikit-learn
+- Scorecardpy
 
 
 ## Getting Started
 1. Clone the repository or download the project files.
 2. Install the required packages listed in `requirements.txt`.
 3. Open the notebooks in Jupyter Notebook or any compatible IDE.
-4. Execute the cells in the order provided to complete the data cleaning and EDA.
+4. Execute the cells in the order provided to complete the data cleaning, EDA, and feature engineering processes.
+
 
 ## Conclusion
 The outputs from the EDA and feature engineering notebooks will be utilized in subsequent modeling tasks to develop a robust credit scoring model. Your contributions and feedback are welcome!
diff --git a/notebooks/Feature_Engineering.ipynb b/notebooks/Feature_Engineering.ipynb
new file mode 100644
index 0000000..1cf8b14
--- /dev/null
+++ b/notebooks/Feature_Engineering.ipynb
@@ -0,0 +1,610 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Feature Engineering"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import necessary libraries\n",
+    "import pandas as pd\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "import scorecardpy as sc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load your dataset (replace 'your_file.csv' with your actual file path)\n",
+    "df = pd.read_csv('C:/Users/Administrator/Documents/kifiya/Week_6/cleaned_data.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Index(['TransactionId', 'BatchId', 'AccountId', 'SubscriptionId', 'CustomerId',\n",
+      "       'CurrencyCode', 'CountryCode', 'ProviderId', 'ProductId',\n",
+      "       'ProductCategory', 'ChannelId', 'Amount', 'Value',\n",
+      "       'TransactionStartTime', 'PricingStrategy', 'FraudResult'],\n",
+      "      dtype='object')\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df.columns)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Create Aggregate Features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Total Transaction Amount per customer\n",
+    "df['TotalTransactionAmount'] = df.groupby('CustomerId')['Amount'].transform('sum')\n",
+    "\n",
+    "# Average Transaction Amount per customer\n",
+    "df['AverageTransactionAmount'] = df.groupby('CustomerId')['Amount'].transform('mean')\n",
+    "\n",
+    "# Transaction Count per customer\n",
+    "df['TransactionCount'] = df.groupby('CustomerId')['TransactionId'].transform('count')\n",
+    "\n",
+    "# Standard Deviation of Transaction Amounts per customer\n",
+    "df['TransactionAmountStd'] = df.groupby('CustomerId')['Amount'].transform('std').fillna(0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Extract Time-Based Features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df['TransactionStartTime'] = pd.to_datetime(df['TransactionStartTime'], errors='coerce')\n",
+    "\n",
+    "df['TransactionHour'] = df['TransactionStartTime'].dt.hour\n",
+    "df['TransactionDay'] = df['TransactionStartTime'].dt.day\n",
+    "df['TransactionMonth'] = df['TransactionStartTime'].dt.month\n",
+    "df['TransactionYear'] = df['TransactionStartTime'].dt.year"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Encode Categorical Variables using WOE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[INFO] creating woe binning ...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\condition_fun.py:40: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n",
+      "  datetime_cols = dat.apply(pd.to_numeric,errors='ignore').select_dtypes(object).apply(pd.to_datetime,errors='ignore').select_dtypes('datetime64').columns.tolist()\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\condition_fun.py:40: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_datetime without passing `errors` and catch exceptions explicitly instead\n",
+      "  datetime_cols = dat.apply(pd.to_numeric,errors='ignore').select_dtypes(object).apply(pd.to_datetime,errors='ignore').select_dtypes('datetime64').columns.tolist()\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\condition_fun.py:40: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
+      "  datetime_cols = dat.apply(pd.to_numeric,errors='ignore').select_dtypes(object).apply(pd.to_datetime,errors='ignore').select_dtypes('datetime64').columns.tolist()\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\condition_fun.py:40: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_datetime without passing `errors` and catch exceptions explicitly instead\n",
+      "  datetime_cols = dat.apply(pd.to_numeric,errors='ignore').select_dtypes(object).apply(pd.to_datetime,errors='ignore').select_dtypes('datetime64').columns.tolist()\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\condition_fun.py:40: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
+      "  datetime_cols = dat.apply(pd.to_numeric,errors='ignore').select_dtypes(object).apply(pd.to_datetime,errors='ignore').select_dtypes('datetime64').columns.tolist()\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\condition_fun.py:40: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_datetime without passing `errors` and catch exceptions explicitly instead\n",
+      "  datetime_cols = dat.apply(pd.to_numeric,errors='ignore').select_dtypes(object).apply(pd.to_datetime,errors='ignore').select_dtypes('datetime64').columns.tolist()\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\condition_fun.py:40: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
+      "  datetime_cols = dat.apply(pd.to_numeric,errors='ignore').select_dtypes(object).apply(pd.to_datetime,errors='ignore').select_dtypes('datetime64').columns.tolist()\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:361: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  init_bin = init_bin.groupby('brkp', group_keys=False).agg({\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:361: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  init_bin = init_bin.groupby('brkp', group_keys=False).agg({\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:361: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  init_bin = init_bin.groupby('brkp', group_keys=False).agg({\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:361: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  init_bin = init_bin.groupby('brkp', group_keys=False).agg({\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:361: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  init_bin = init_bin.groupby('brkp', group_keys=False).agg({\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:410: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  total_iv_all_brks = pd.melt(\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:410: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  total_iv_all_brks = pd.melt(\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:410: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  total_iv_all_brks = pd.melt(\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:410: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  total_iv_all_brks = pd.melt(\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:361: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  init_bin = init_bin.groupby('brkp', group_keys=False).agg({\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:410: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  total_iv_all_brks = pd.melt(\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:410: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  total_iv_all_brks = pd.melt(\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:410: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  total_iv_all_brks = pd.melt(\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:410: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  total_iv_all_brks = pd.melt(\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:361: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  init_bin = init_bin.groupby('brkp', group_keys=False).agg({\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:410: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  total_iv_all_brks = pd.melt(\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:410: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  total_iv_all_brks = pd.melt(\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:410: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  total_iv_all_brks = pd.melt(\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:410: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  total_iv_all_brks = pd.melt(\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n",
+      "c:\\Users\\Administrator\\miniconda3\\envs\\jojo\\lib\\site-packages\\scorecardpy\\woebin.py:446: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
+      "  binning_1bst_brk = binning_1bst_brk.groupby(['variable', 'bstbin'], group_keys=False)\\\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[INFO] converting into woe values ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Assuming 'FraudResult' is the target variable and 'ProductCategory', 'ProviderId', 'ChannelId' are the features\n",
+    "features = ['ProductCategory', 'ProviderId', 'ChannelId']\n",
+    "\n",
+    "# Calculate the WOE and IV for each feature\n",
+    "bins = sc.woebin(df, y='FraudResult', x=features)\n",
+    "\n",
+    "# Apply the WOE transformation to the dataset\n",
+    "df_woe = sc.woebin_ply(df, bins)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Index(['TransactionDay', 'TransactionHour', 'BatchId', 'TransactionId',\n",
+      "       'Value', 'TransactionYear', 'ProductId', 'CurrencyCode',\n",
+      "       'TotalTransactionAmount', 'TransactionCount', 'Amount',\n",
+      "       'SubscriptionId', 'TransactionMonth', 'PricingStrategy', 'AccountId',\n",
+      "       'FraudResult', 'AverageTransactionAmount', 'TransactionStartTime',\n",
+      "       'CountryCode', 'TransactionAmountStd', 'CustomerId', 'ProviderId_woe',\n",
+      "       'ProductCategory_woe', 'ChannelId_woe'],\n",
+      "      dtype='object')\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df_woe.columns)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Handle Missing Values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_15060\\1868837317.py:3: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+      "\n",
+      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+      "\n",
+      "\n",
+      "  df_woe[col].fillna(df_woe[col].median(), inplace=True)\n",
+      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_15060\\1868837317.py:3: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+      "\n",
+      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+      "\n",
+      "\n",
+      "  df_woe[col].fillna(df_woe[col].median(), inplace=True)\n",
+      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_15060\\1868837317.py:3: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+      "\n",
+      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+      "\n",
+      "\n",
+      "  df_woe[col].fillna(df_woe[col].median(), inplace=True)\n",
+      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_15060\\1868837317.py:3: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+      "\n",
+      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+      "\n",
+      "\n",
+      "  df_woe[col].fillna(df_woe[col].median(), inplace=True)\n",
+      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_15060\\1868837317.py:3: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+      "\n",
+      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+      "\n",
+      "\n",
+      "  df_woe[col].fillna(df_woe[col].median(), inplace=True)\n",
+      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_15060\\1868837317.py:7: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+      "\n",
+      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+      "\n",
+      "\n",
+      "  df_woe[col].fillna(df_woe[col].mode()[0], inplace=True)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Fill missing numerical columns with median\n",
+    "for col in ['Amount', 'Value', 'TotalTransactionAmount', 'AverageTransactionAmount', 'TransactionAmountStd']:\n",
+    "    df_woe[col].fillna(df_woe[col].median(), inplace=True)\n",
+    "\n",
+    "# Handle missing values for categorical WOE columns\n",
+    "for col in ['ProductCategory_woe', 'CurrencyCode', 'ProviderId_woe', 'ChannelId_woe']:\n",
+    "    df_woe[col].fillna(df_woe[col].mode()[0], inplace=True)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Normalize/Standardize Numerical Features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Standardize (mean=0, std=1) the numerical features\n",
+    "scaler = StandardScaler()\n",
+    "df_woe[['Amount', 'Value', 'TotalTransactionAmount', 'AverageTransactionAmount', 'TransactionAmountStd']] = scaler.fit_transform(\n",
+    "    df_woe[['Amount', 'Value', 'TotalTransactionAmount', 'AverageTransactionAmount', 'TransactionAmountStd']]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>TransactionDay</th>\n",
+       "      <th>TransactionHour</th>\n",
+       "      <th>BatchId</th>\n",
+       "      <th>TransactionId</th>\n",
+       "      <th>Value</th>\n",
+       "      <th>TransactionYear</th>\n",
+       "      <th>ProductId</th>\n",
+       "      <th>CurrencyCode</th>\n",
+       "      <th>TotalTransactionAmount</th>\n",
+       "      <th>TransactionCount</th>\n",
+       "      <th>...</th>\n",
+       "      <th>AccountId</th>\n",
+       "      <th>FraudResult</th>\n",
+       "      <th>AverageTransactionAmount</th>\n",
+       "      <th>TransactionStartTime</th>\n",
+       "      <th>CountryCode</th>\n",
+       "      <th>TransactionAmountStd</th>\n",
+       "      <th>CustomerId</th>\n",
+       "      <th>ProviderId_woe</th>\n",
+       "      <th>ProductCategory_woe</th>\n",
+       "      <th>ChannelId_woe</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>15</td>\n",
+       "      <td>2</td>\n",
+       "      <td>BatchId_36123</td>\n",
+       "      <td>TransactionId_76871</td>\n",
+       "      <td>-0.072291</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>ProductId_10</td>\n",
+       "      <td>UGX</td>\n",
+       "      <td>0.170118</td>\n",
+       "      <td>119</td>\n",
+       "      <td>...</td>\n",
+       "      <td>AccountId_3957</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-0.067623</td>\n",
+       "      <td>2018-11-15 02:18:49+00:00</td>\n",
+       "      <td>256</td>\n",
+       "      <td>-0.167016</td>\n",
+       "      <td>CustomerId_4406</td>\n",
+       "      <td>-2.906446</td>\n",
+       "      <td>-1.690824</td>\n",
+       "      <td>0.484515</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>15</td>\n",
+       "      <td>2</td>\n",
+       "      <td>BatchId_15642</td>\n",
+       "      <td>TransactionId_73770</td>\n",
+       "      <td>-0.080251</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>ProductId_6</td>\n",
+       "      <td>UGX</td>\n",
+       "      <td>0.170118</td>\n",
+       "      <td>119</td>\n",
+       "      <td>...</td>\n",
+       "      <td>AccountId_4841</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-0.067623</td>\n",
+       "      <td>2018-11-15 02:19:08+00:00</td>\n",
+       "      <td>256</td>\n",
+       "      <td>-0.167016</td>\n",
+       "      <td>CustomerId_4406</td>\n",
+       "      <td>-2.906446</td>\n",
+       "      <td>0.607033</td>\n",
+       "      <td>-2.736867</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>15</td>\n",
+       "      <td>2</td>\n",
+       "      <td>BatchId_53941</td>\n",
+       "      <td>TransactionId_26203</td>\n",
+       "      <td>-0.076352</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>ProductId_1</td>\n",
+       "      <td>UGX</td>\n",
+       "      <td>0.165122</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>AccountId_4229</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-0.072568</td>\n",
+       "      <td>2018-11-15 02:44:21+00:00</td>\n",
+       "      <td>256</td>\n",
+       "      <td>-0.201209</td>\n",
+       "      <td>CustomerId_4683</td>\n",
+       "      <td>-2.906446</td>\n",
+       "      <td>-1.690824</td>\n",
+       "      <td>0.484515</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>15</td>\n",
+       "      <td>3</td>\n",
+       "      <td>BatchId_102363</td>\n",
+       "      <td>TransactionId_380</td>\n",
+       "      <td>0.096648</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>ProductId_21</td>\n",
+       "      <td>UGX</td>\n",
+       "      <td>0.175567</td>\n",
+       "      <td>38</td>\n",
+       "      <td>...</td>\n",
+       "      <td>AccountId_648</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-0.008155</td>\n",
+       "      <td>2018-11-15 03:32:55+00:00</td>\n",
+       "      <td>256</td>\n",
+       "      <td>-0.008243</td>\n",
+       "      <td>CustomerId_988</td>\n",
+       "      <td>1.939442</td>\n",
+       "      <td>0.607033</td>\n",
+       "      <td>0.484515</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>15</td>\n",
+       "      <td>3</td>\n",
+       "      <td>BatchId_38780</td>\n",
+       "      <td>TransactionId_28195</td>\n",
+       "      <td>-0.075183</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>ProductId_6</td>\n",
+       "      <td>UGX</td>\n",
+       "      <td>0.175567</td>\n",
+       "      <td>38</td>\n",
+       "      <td>...</td>\n",
+       "      <td>AccountId_4841</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-0.008155</td>\n",
+       "      <td>2018-11-15 03:34:21+00:00</td>\n",
+       "      <td>256</td>\n",
+       "      <td>-0.008243</td>\n",
+       "      <td>CustomerId_988</td>\n",
+       "      <td>-2.906446</td>\n",
+       "      <td>0.607033</td>\n",
+       "      <td>-2.736867</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 24 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   TransactionDay  TransactionHour         BatchId        TransactionId  \\\n",
+       "0              15                2   BatchId_36123  TransactionId_76871   \n",
+       "1              15                2   BatchId_15642  TransactionId_73770   \n",
+       "2              15                2   BatchId_53941  TransactionId_26203   \n",
+       "3              15                3  BatchId_102363    TransactionId_380   \n",
+       "4              15                3   BatchId_38780  TransactionId_28195   \n",
+       "\n",
+       "      Value  TransactionYear     ProductId CurrencyCode  \\\n",
+       "0 -0.072291             2018  ProductId_10          UGX   \n",
+       "1 -0.080251             2018   ProductId_6          UGX   \n",
+       "2 -0.076352             2018   ProductId_1          UGX   \n",
+       "3  0.096648             2018  ProductId_21          UGX   \n",
+       "4 -0.075183             2018   ProductId_6          UGX   \n",
+       "\n",
+       "   TotalTransactionAmount  TransactionCount  ...       AccountId FraudResult  \\\n",
+       "0                0.170118               119  ...  AccountId_3957           0   \n",
+       "1                0.170118               119  ...  AccountId_4841           0   \n",
+       "2                0.165122                 2  ...  AccountId_4229           0   \n",
+       "3                0.175567                38  ...   AccountId_648           0   \n",
+       "4                0.175567                38  ...  AccountId_4841           0   \n",
+       "\n",
+       "   AverageTransactionAmount      TransactionStartTime CountryCode  \\\n",
+       "0                 -0.067623 2018-11-15 02:18:49+00:00         256   \n",
+       "1                 -0.067623 2018-11-15 02:19:08+00:00         256   \n",
+       "2                 -0.072568 2018-11-15 02:44:21+00:00         256   \n",
+       "3                 -0.008155 2018-11-15 03:32:55+00:00         256   \n",
+       "4                 -0.008155 2018-11-15 03:34:21+00:00         256   \n",
+       "\n",
+       "   TransactionAmountStd       CustomerId ProviderId_woe  ProductCategory_woe  \\\n",
+       "0             -0.167016  CustomerId_4406      -2.906446            -1.690824   \n",
+       "1             -0.167016  CustomerId_4406      -2.906446             0.607033   \n",
+       "2             -0.201209  CustomerId_4683      -2.906446            -1.690824   \n",
+       "3             -0.008243   CustomerId_988       1.939442             0.607033   \n",
+       "4             -0.008243   CustomerId_988      -2.906446             0.607033   \n",
+       "\n",
+       "   ChannelId_woe  \n",
+       "0       0.484515  \n",
+       "1      -2.736867  \n",
+       "2       0.484515  \n",
+       "3       0.484515  \n",
+       "4      -2.736867  \n",
+       "\n",
+       "[5 rows x 24 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Final check of the dataset\n",
+    "display(df_woe.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save the processed dataset to CSV \n",
+    "df_woe.to_csv('C:/Users/Administrator/Documents/kifiya/Week_6/processed_data.csv', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "jojo",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.20"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/requirements.txt b/requirements.txt
index 3b9a24f..bfd7d91 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,7 @@
 pandas
 matplotlib
-seaborn
\ No newline at end of file
+seaborn
+scorecardpy
+Scikit-learn
+
+