Skip to content

Commit

Permalink
(PC-33771)[API] script: clean offer with ean inside title (part1)
Browse files Browse the repository at this point in the history
First step: clean books, cds and vinyles. If the offer has its EAN
inside its name, find the product and update its information from it.
If the EAN is unknown or if the product is not allowed by the GCU,
reject it.
  • Loading branch information
jbaudet-pass committed Jan 16, 2025
1 parent 1c75be1 commit 2c77634
Show file tree
Hide file tree
Showing 2 changed files with 520 additions and 0 deletions.
242 changes: 242 additions & 0 deletions api/src/pcapi/scripts/clean_offer_titles_with_eans/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
from dataclasses import dataclass
from datetime import datetime
from datetime import timezone as tz
import functools
import json
import logging
from typing import Callable
from typing import Collection
from typing import cast

import sqlalchemy as sa

from pcapi.core.bookings import api as bookings_api
from pcapi.core.mails import transactional as transactional_mails
from pcapi.core.offers.models import GcuCompatibilityType
from pcapi.core.offers.models import Offer
from pcapi.core.offers.models import OfferValidationStatus
from pcapi.flask_app import app
from pcapi.models import db
from pcapi.models.offer_mixin import OfferValidationType
from pcapi.repository import atomic
from pcapi.repository import on_commit
from pcapi.utils.chunks import get_chunks


logger = logging.getLogger(__name__)

# Mandatory since this module uses atomic() which needs an application context.
app.app_context().push()


BOOKS_CDS_VINYLES_QUERY = """
SELECT
sub.id,
sub.ean,
sub.name,
sub."subcategoryId",
sub."isActive",
p.id is not null as exists,
p.id as product_id,
p.name as product_name,
p."jsonData" as product_json_data,
p."gcuCompatibilityType"
FROM (
SELECT
id,
substring("name" similar '%#"[[:digit:]]{13}#"%' escape '#') as ean,
name,
"subcategoryId",
"isActive"
FROM
offer
WHERE
"name" similar to '%\\d{13}%'
and "validation" != 'REJECTED'
and "subcategoryId" in (
'LIVRE_PAPIER',
'SUPPORT_PHYSIQUE_MUSIQUE_CD',
'SUPPORT_PHYSIQUE_MUSIQUE_VINYLE'
)
) sub
LEFT JOIN
product p on p."jsonData"->>'ean' = sub.ean
LIMIT
10000
"""


@dataclass(frozen=True)
class OfferEanQueryRow:
id: int
ean: str
name: str
subcategory: str
is_active: bool
exists: bool
product_id: int | None
product_name: str | None
product_json_data: str | None
gcu_compatibility: str | None


def get_offers_with_ean_inside_title() -> Collection[OfferEanQueryRow]:
query = sa.text(BOOKS_CDS_VINYLES_QUERY)
rows = []
for row in db.session.execute(query):
rows.append(
OfferEanQueryRow(
id=row[0],
ean=row[1],
name=row[2],
subcategory=row[3],
is_active=row[4],
exists=row[5],
product_id=row[6],
product_name=row[7],
product_json_data=json.dumps(row[8]) if row[8] is not None else None,
gcu_compatibility=row[9],
)
)

return rows


def run() -> None:
while True:
rows = get_offers_with_ean_inside_title()
if not rows:
break

parse_offers(rows)


def parse_offers(rows: Collection[OfferEanQueryRow]) -> None:
for chunk in get_chunks(rows, chunk_size=2_000):

unknown_offer_rows = set()
gcu_incompatible_offer_rows = set()
legit_offer_rows = set()

for offer_row in chunk:
if not offer_row.exists:
unknown_offer_rows.add(offer_row)
elif offer_row.gcu_compatibility != GcuCompatibilityType.COMPATIBLE.value:
gcu_incompatible_offer_rows.add(offer_row)
else:
legit_offer_rows.add(offer_row)

update_unknown_offers(unknown_offer_rows)
update_gcu_incompatible_offers(gcu_incompatible_offer_rows)
update_legit_offers(legit_offer_rows)


@atomic()
def retry_and_log(func: Callable) -> Callable:
def retry_one_chunk_at_a_time(offer_rows: Collection[OfferEanQueryRow]) -> None:
chunk_size = len(offer_rows) // 5
chunk_size = max(chunk_size, 1)

for chunk in get_chunks(offer_rows, chunk_size=chunk_size):
try:
with atomic():
func(chunk)
except Exception as exc: # pylint: disable=broad-exception-caught
if chunk_size == 1:
row = chunk[0]
msg = "[%s][%s] could not handle offer #%s with product #%s (ean: %s)"
logger.info(msg, str(exc), func.__name__, row.id, row.product_id, row.ean)
else:
retry_one_chunk_at_a_time(chunk)
continue

def inner(offer_rows: Collection[OfferEanQueryRow]) -> bool:
try:
func(offer_rows)
except Exception: # pylint: disable=broad-exception-caught
retry_one_chunk_at_a_time(offer_rows)
return False
return True

return inner


@atomic()
@retry_and_log
def update_unknown_offers(offer_rows: Collection[OfferEanQueryRow]) -> None:
reject_offers(offer_rows)


@atomic()
@retry_and_log
def update_gcu_incompatible_offers(offer_rows: Collection[OfferEanQueryRow]) -> None:
reject_offers(offer_rows)


@atomic()
@retry_and_log
def update_legit_offers(offer_rows: Collection[OfferEanQueryRow]) -> None:
ids = {row.id for row in offer_rows}
legit_offers = Offer.query.filter(Offer.id.in_(ids))

offer_to_product = {row.id: row for row in offer_rows}

with atomic():
for offer in legit_offers:
offer.name = offer_to_product[offer.id].product_name

if offer_to_product[offer.id].product_json_data:
extra_data = cast(str, offer_to_product[offer.id].product_json_data)
offer.extraData = json.loads(extra_data)
db.session.add(offer)


def reject_offers(offer_rows: Collection[OfferEanQueryRow]) -> None:
def cancel_booking(offer: Offer) -> None:
cancelled_bookings = bookings_api.cancel_bookings_from_rejected_offer(offer)
for booking in cancelled_bookings:
on_commit(
functools.partial(
transactional_mails.send_booking_cancellation_by_pro_to_beneficiary_email,
booking,
rejected_by_fraud_action=True,
)
)

def notify_offerer(offer: Offer) -> None:
if offer.venue.bookingEmail:
recipients = [offer.venue.bookingEmail]
else:
recipients = [recipient.user.email for recipient in offer.venue.managingOfferer.UserOfferers]

offer_data = transactional_mails.get_email_data_from_offer(
offer, offer.validation, OfferValidationStatus.REJECTED
)
on_commit(
functools.partial(
transactional_mails.send_offer_validation_status_update_email,
offer_data,
recipients,
)
)

ids = {row.id for row in offer_rows}
base_query = Offer.query.filter(
Offer.id.in_(ids),
Offer.status != OfferValidationStatus.REJECTED.value,
)

for offer in base_query:
cancel_booking(offer)
notify_offerer(offer)

base_query.update(
{
"validation": OfferValidationStatus.REJECTED.value,
"lastValidationDate": datetime.now(tz.utc), # pylint: disable=datetime-now
"lastValidationType": OfferValidationType.AUTO.value,
"lastValidationAuthorUserId": None,
"isActive": False,
},
synchronize_session=False,
)
Loading

0 comments on commit 2c77634

Please sign in to comment.