feat: add date logic in merge script

bcgov · Jan 10, 2025 · ca39e08 · ca39e08
1 parent b113986
commit ca39e08
Showing 1 changed file with 18 additions and 2 deletions.
diff --git a/exports/merge_exports.py b/exports/merge_exports.py
@@ -9,22 +9,38 @@
 import pandas as pd
 
 def main():
-    # Define filenames
+    # Define constants
     complaint_file = "complaints.csv"
     case_file = "cases.csv"
     output_file = "NatCom_Export.csv"
     merge_column = "Record ID" # CEEB = "Record ID" COS = "Complaint Identifier"
+    complaint_date_column = "Date Received"
+    case_date_column = "Date Action Taken"
+
+    # Define the date range for filtering
+    start_date = pd.to_datetime("2024-10-01")  # Example start date
+    end_date = pd.to_datetime("2024-12-31")  # Example end date
 
     try:
         # Load data from both files
         complaint_df = pd.read_csv(complaint_file)
         case_df = pd.read_csv(case_file)
 
+        # Convert the date columns to datetime 
+        complaint_df[complaint_date_column] = pd.to_datetime(complaint_df[complaint_date_column], errors='coerce')
+        case_df[case_date_column] = pd.to_datetime(case_df[case_date_column], errors='coerce')
+
         # Merge data on 'Record ID' with validation
         combined_df = pd.merge(complaint_df, case_df, on=merge_column, how="outer", validate="many_to_many")
 
+        # Filter the data based on the date range for both complaint and case dates
+        filtered_df = combined_df[
+            ((combined_df[complaint_date_column] >= start_date) & (combined_df[complaint_date_column] <= end_date)) |
+            ((combined_df[case_date_column] >= start_date) & (combined_df[case_date_column] <= end_date))
+        ]
+
         # Save the merged data to a new CSV file
-        combined_df.to_csv(output_file, index=False, encoding='utf-8-sig')
+        filtered_df.to_csv(output_file, index=False, encoding='utf-8-sig')
         print(f"Data successfully merged into {output_file}")
 
     except FileNotFoundError as e: