From 3c15d4eac842614a64d705870faa0c729bea7670 Mon Sep 17 00:00:00 2001 From: proffapt Date: Fri, 29 Nov 2024 19:05:36 +0530 Subject: [PATCH] refactor: shortlist logic (notice.py -> shortlist.py) --- mftp/mftp.py | 9 +++++---- mftp/notice.py | 25 +++---------------------- mftp/shortlist.py | 39 +++++++++++++++++++++++++++++++++------ 3 files changed, 41 insertions(+), 32 deletions(-) diff --git a/mftp/mftp.py b/mftp/mftp.py index e5de152..479ff93 100644 --- a/mftp/mftp.py +++ b/mftp/mftp.py @@ -5,6 +5,7 @@ import ntfy import notice import company +import shortlist import logging import requests @@ -101,16 +102,16 @@ ) notice_db.connect() - notices, shortlists = notice.fetch(headers, session, ssoToken, notice_db) - if shortlists: - if args.gmail_api or args.smtp: - pass + notices = notice.fetch(headers, session, ssoToken, notice_db) if notices: if args.ntfy: notifications = ntfy.format_notices(notices) if notifications: ntfy.send_notices(notifications, notice_db) else: + shortlists = shortlist.search(notices) + if shortlists: + pass mails = mail.format_notices(notices) if mails: mail.send_notices(mails, args.smtp, args.gmail_api, notice_db) diff --git a/mftp/notice.py b/mftp/notice.py index dd4f6a5..a2e1976 100644 --- a/mftp/notice.py +++ b/mftp/notice.py @@ -3,7 +3,6 @@ from bs4 import BeautifulSoup as bs from endpoints import TPSTUDENT_URL, NOTICEBOARD_URL, NOTICES_URL, ATTACHMENT_URL, NOTICE_CONTENT_URL -import shortlist LAST_NOTICES_CHECK_COUNT = 30 @@ -26,7 +25,6 @@ def fetch(headers, session, ssoToken, notice_db): logging.error(f" Failed to extract data from Noticeboard ~ {str(e)}") return [] - shortlists = [] latest_X_notices = [] for i, row in enumerate(root.findall('row')): if i >= LAST_NOTICES_CHECK_COUNT: @@ -50,15 +48,6 @@ def fetch(headers, session, ssoToken, notice_db): logging.error(f" Failed to parse notice body ~ {str(e)}") break - # Handling Shortists in Body - try: - body_shortlists = shortlist.from_notice_body(notice) - if body_shortlists: - shortlists.append(body_shortlists) - except Exception as e: - logging.error(f" Failed to parse shortlists from notice body ~ {str(e)}") - continue - # Handling attachment try: attachment = parse_attachment(session, year, id_) @@ -68,16 +57,6 @@ def fetch(headers, session, ssoToken, notice_db): logging.error(f" Failed to parse attachment ~ {str(e)}") break - # Handling Shortlist in attachment - try: - if 'Attachment' in notice: - attachment_shortlists = shortlist.from_attachment(notice) - if attachment_shortlists: - shortlists.append(attachment_shortlists) - except Exception as e: - logging.error(f" Failed to parse shortlists from attachment ~ {str(e)}") - continue - latest_X_notices.append(notice) # This is done to reduce DB queries @@ -94,7 +73,9 @@ def fetch(headers, session, ssoToken, notice_db): for notice in modified_notices: logging.info(f" [MODIFIED NOTICE]: #{notice['UID'].split('_')[0]} | {notice['Type']} | {notice['Subject']} | {notice['Company']} | {notice['Time']}") - return new_notices + modified_notices, shortlists + notices = new_notices + modified_notices + + return notices def parse_body_data(session, year, id_): diff --git a/mftp/shortlist.py b/mftp/shortlist.py index 55cf031..21c29c0 100644 --- a/mftp/shortlist.py +++ b/mftp/shortlist.py @@ -6,7 +6,34 @@ from env import HOSTER_INTERESTED_ROLLS, ROLL_MAIL, ROLL_NAME -def from_notice_body(notice): +def search(notices): + print('[SEARCHING SHORTLISTS]', flush=True) + + shortlists = [] + for notice in notices: + # Handling Shortists in Body + try: + body_shortlists = search_body(notice) + if body_shortlists: + shortlists.append(body_shortlists) + except Exception as e: + logging.error(f" Failed to parse shortlists from notice body ~ {str(e)}") + continue + + # Handling Shortlist in attachment + try: + if 'Attachment' in notice: + attachment_shortlists = search_attachment(notice) + if attachment_shortlists: + shortlists.append(attachment_shortlists) + except Exception as e: + logging.error(f" Failed to parse shortlists from attachment ~ {str(e)}") + continue + + return shortlists + + +def search_body(notice): shortlists = defaultdict(dict) body_data = notice["BodyData"] body = body_data.decode_contents(formatter="html") @@ -27,18 +54,18 @@ def from_notice_body(notice): "mails": mails, } logging.info( - f" [SHORTLIST (noticebody)] {name} ({count}) -> {company} (#{id_})" + f" [NOTICEBODY] {name} ({count}) -> {company} (#{id_})" ) return shortlists -def from_attachment(notice): +def search_attachment(notice): shortlists = defaultdict(dict) attachment = notice["Attachment"] for roll in HOSTER_INTERESTED_ROLLS: - count = search_pdf_bytes(attachment, roll) + count = parse_pdf_bytes(attachment, roll) if count > 0: id_ = notice["UID"].split("_")[0] company = notice["Company"] @@ -53,13 +80,13 @@ def from_attachment(notice): "mails": mails, } logging.info( - f" [SHORTLIST (attachment)] {name} ({count}) -> {company} (#{id_})" + f" [ATTACHMENT] {name} ({count}) -> {company} (#{id_})" ) return shortlists -def search_pdf_bytes(pdf_bytes, search_string): +def parse_pdf_bytes(pdf_bytes, search_string): try: pdf_file = io.BytesIO(pdf_bytes) pdf_reader = PyPDF2.PdfReader(pdf_file)