Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Issue #3851] Delete historical data from our tables #3954

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions api/bin/sql/delete_historical_data.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
-- Start transaction
BEGIN;

-- First verify counts of what we'll delete - for validation
SELECT 'Link Funding Instrument Historical Records' as table_name, COUNT(*) as count_to_delete
FROM api.link_opportunity_summary_funding_instrument li
JOIN api.opportunity_summary os ON li.opportunity_summary_id = os.opportunity_summary_id
WHERE os.revision_number IS NOT NULL;

SELECT 'Link Funding Category Historical Records' as table_name, COUNT(*) as count_to_delete
FROM api.link_opportunity_summary_funding_category lc
JOIN api.opportunity_summary os ON lc.opportunity_summary_id = os.opportunity_summary_id
WHERE os.revision_number IS NOT NULL;

SELECT 'Link Applicant Type Historical Records' as table_name, COUNT(*) as count_to_delete
FROM api.link_opportunity_summary_applicant_type la
JOIN api.opportunity_summary os ON la.opportunity_summary_id = os.opportunity_summary_id
WHERE os.revision_number IS NOT NULL;

SELECT 'Opportunity Summary Historical Records' as table_name, COUNT(*) as count_to_delete
FROM api.opportunity_summary
WHERE revision_number IS NOT NULL;

-- Delete from link tables first - referencing opportunity_summary
DELETE FROM api.link_opportunity_summary_funding_instrument li
USING api.opportunity_summary os
WHERE li.opportunity_summary_id = os.opportunity_summary_id
AND os.revision_number IS NOT NULL;

DELETE FROM api.link_opportunity_summary_funding_category lc
USING api.opportunity_summary os
WHERE lc.opportunity_summary_id = os.opportunity_summary_id
AND os.revision_number IS NOT NULL;

DELETE FROM api.link_opportunity_summary_applicant_type la
USING api.opportunity_summary os
WHERE la.opportunity_summary_id = os.opportunity_summary_id
AND os.revision_number IS NOT NULL;

-- Then delete from opportunity_summary
DELETE FROM api.opportunity_summary
WHERE revision_number IS NOT NULL;

-- Verify counts after deletion (should all be 0)
SELECT 'Remaining Link Funding Instrument Historical Records' as table_name, COUNT(*) as remaining_count
FROM api.link_opportunity_summary_funding_instrument li
JOIN api.opportunity_summary os ON li.opportunity_summary_id = os.opportunity_summary_id
WHERE os.revision_number IS NOT NULL;

SELECT 'Remaining Link Funding Category Historical Records' as table_name, COUNT(*) as remaining_count
FROM api.link_opportunity_summary_funding_category lc
JOIN api.opportunity_summary os ON lc.opportunity_summary_id = os.opportunity_summary_id
WHERE os.revision_number IS NOT NULL;

SELECT 'Remaining Link Applicant Type Historical Records' as table_name, COUNT(*) as remaining_count
FROM api.link_opportunity_summary_applicant_type la
JOIN api.opportunity_summary os ON la.opportunity_summary_id = os.opportunity_summary_id
WHERE os.revision_number IS NOT NULL;

SELECT 'Remaining Opportunity Summary Historical Records' as table_name, COUNT(*) as remaining_count
FROM api.opportunity_summary
WHERE revision_number IS NOT NULL;

-- If everything looks good, commit the transaction
-- If not, ROLLBACK instead

-- Should commit defaulting to ROLLBACK in case of accidental run
-- COMMIT;
ROLLBACK;
75 changes: 56 additions & 19 deletions api/tests/lib/seed_local_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,74 @@


def _build_opportunities(db_session: db.Session, iterations: int) -> None:
# Just create a variety of opportunities for local testing
# we can eventually look into creating more specific scenarios
for i in range(iterations):
logger.info(f"Creating opportunity batch number {i}")
# Create a few opportunities in various scenarios
factories.OpportunityFactory.create_batch(size=5, is_forecasted_summary=True)
factories.OpportunityFactory.create_batch(
size=5, is_posted_summary=True, has_attachments=True

# Create regular (non-historical) opportunities
opportunity1 = factories.OpportunityFactory.create(
no_current_summary=True, opportunity_assistance_listings=[]
)

# Create current summaries - only one per is_forecast value
factories.OpportunitySummaryFactory.create(
is_forecasted_summary=True,
revision_number=None, # Current records
opportunity=opportunity1,
)
factories.OpportunityFactory.create_batch(size=5, is_closed_summary=True)
factories.OpportunityFactory.create_batch(size=5, is_archived_non_forecast_summary=True)
factories.OpportunityFactory.create_batch(size=5, is_archived_forecast_summary=True)
factories.OpportunityFactory.create_batch(size=5, no_current_summary=True)
factories.OpportunityFactory.create_batch(
size=2, is_posted_summary=True, has_long_descriptions=True
factories.OpportunitySummaryFactory.create(
is_posted_summary=True,
revision_number=None, # Current records
opportunity=opportunity1,
)

# generate a few opportunities with mostly null values
# Create historical summaries with unique revision numbers
historical_summaries = []

# Forecast historical records
for rev_num in range(1, 4): # Revisions 1, 2, 3
summary = factories.OpportunitySummaryFactory.create(
is_forecasted_summary=True, revision_number=rev_num, opportunity=opportunity1
)
historical_summaries.append(summary)

# Posted historical records
for rev_num in range(4, 7): # Revisions 4, 5, 6
summary = factories.OpportunitySummaryFactory.create(
is_posted_summary=True, revision_number=rev_num, opportunity=opportunity1
)
historical_summaries.append(summary)

all_null_opportunities = factories.OpportunityFactory.create_batch(
size=5, all_fields_null=True
size=2, all_fields_null=True
)
for all_null_opportunity in all_null_opportunities:
summary = factories.OpportunitySummaryFactory.create(
# We set post_date to something so that running the set-current-opportunities logic
# won't get rid of it for having a null post date
for idx, all_null_opportunity in enumerate(all_null_opportunities):
# Current summary
current_summary = factories.OpportunitySummaryFactory.create(
all_fields_null=True,
opportunity=all_null_opportunity,
post_date=datetime_util.get_now_us_eastern_date(),
revision_number=None, # Current record
is_forecasted_summary=bool(idx % 2), # Alternate between forecast and posted
)
factories.CurrentOpportunitySummaryFactory.create(
opportunity=all_null_opportunity, opportunity_summary=summary
opportunity=all_null_opportunity, opportunity_summary=current_summary
)

# Historical summary - use unique revision numbers
historical_summary = factories.OpportunitySummaryFactory.create(
all_fields_null=True,
opportunity=all_null_opportunity,
post_date=datetime_util.get_now_us_eastern_date(),
revision_number=10 + idx, # Unique revision numbers (10, 11)
is_forecasted_summary=bool(idx % 2), # Alternate between forecast and posted
)

factories.LinkOpportunitySummaryFundingCategoryFactory.create(
opportunity_summary=historical_summary
)

factories.LinkOpportunitySummaryApplicantTypeFactory.create(
opportunity_summary=historical_summary
)

logger.info("Finished creating opportunities")
Expand Down