Skip to content

Commit

Permalink
feat: add date logic in merge script
Browse files Browse the repository at this point in the history
  • Loading branch information
afwilcox committed Jan 10, 2025
1 parent b113986 commit ca39e08
Showing 1 changed file with 18 additions and 2 deletions.
20 changes: 18 additions & 2 deletions exports/merge_exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,38 @@
import pandas as pd

def main():
# Define filenames
# Define constants
complaint_file = "complaints.csv"
case_file = "cases.csv"
output_file = "NatCom_Export.csv"
merge_column = "Record ID" # CEEB = "Record ID" COS = "Complaint Identifier"
complaint_date_column = "Date Received"
case_date_column = "Date Action Taken"

# Define the date range for filtering
start_date = pd.to_datetime("2024-10-01") # Example start date
end_date = pd.to_datetime("2024-12-31") # Example end date

try:
# Load data from both files
complaint_df = pd.read_csv(complaint_file)
case_df = pd.read_csv(case_file)

# Convert the date columns to datetime
complaint_df[complaint_date_column] = pd.to_datetime(complaint_df[complaint_date_column], errors='coerce')
case_df[case_date_column] = pd.to_datetime(case_df[case_date_column], errors='coerce')

# Merge data on 'Record ID' with validation
combined_df = pd.merge(complaint_df, case_df, on=merge_column, how="outer", validate="many_to_many")

# Filter the data based on the date range for both complaint and case dates
filtered_df = combined_df[
((combined_df[complaint_date_column] >= start_date) & (combined_df[complaint_date_column] <= end_date)) |
((combined_df[case_date_column] >= start_date) & (combined_df[case_date_column] <= end_date))
]

# Save the merged data to a new CSV file
combined_df.to_csv(output_file, index=False, encoding='utf-8-sig')
filtered_df.to_csv(output_file, index=False, encoding='utf-8-sig')
print(f"Data successfully merged into {output_file}")

except FileNotFoundError as e:
Expand Down

0 comments on commit ca39e08

Please sign in to comment.