Skip to content

Commit

Permalink
refactor: shortlist logic (notice.py -> shortlist.py)
Browse files Browse the repository at this point in the history
  • Loading branch information
proffapt committed Nov 30, 2024
1 parent bf24f8f commit 3c15d4e
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 32 deletions.
9 changes: 5 additions & 4 deletions mftp/mftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import ntfy
import notice
import company
import shortlist

import logging
import requests
Expand Down Expand Up @@ -101,16 +102,16 @@
)
notice_db.connect()

notices, shortlists = notice.fetch(headers, session, ssoToken, notice_db)
if shortlists:
if args.gmail_api or args.smtp:
pass
notices = notice.fetch(headers, session, ssoToken, notice_db)
if notices:
if args.ntfy:
notifications = ntfy.format_notices(notices)
if notifications:
ntfy.send_notices(notifications, notice_db)
else:
shortlists = shortlist.search(notices)
if shortlists:
pass
mails = mail.format_notices(notices)
if mails:
mail.send_notices(mails, args.smtp, args.gmail_api, notice_db)
Expand Down
25 changes: 3 additions & 22 deletions mftp/notice.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from bs4 import BeautifulSoup as bs
from endpoints import TPSTUDENT_URL, NOTICEBOARD_URL, NOTICES_URL, ATTACHMENT_URL, NOTICE_CONTENT_URL

import shortlist

LAST_NOTICES_CHECK_COUNT = 30

Expand All @@ -26,7 +25,6 @@ def fetch(headers, session, ssoToken, notice_db):
logging.error(f" Failed to extract data from Noticeboard ~ {str(e)}")
return []

shortlists = []
latest_X_notices = []
for i, row in enumerate(root.findall('row')):
if i >= LAST_NOTICES_CHECK_COUNT:
Expand All @@ -50,15 +48,6 @@ def fetch(headers, session, ssoToken, notice_db):
logging.error(f" Failed to parse notice body ~ {str(e)}")
break

# Handling Shortists in Body
try:
body_shortlists = shortlist.from_notice_body(notice)
if body_shortlists:
shortlists.append(body_shortlists)
except Exception as e:
logging.error(f" Failed to parse shortlists from notice body ~ {str(e)}")
continue

# Handling attachment
try:
attachment = parse_attachment(session, year, id_)
Expand All @@ -68,16 +57,6 @@ def fetch(headers, session, ssoToken, notice_db):
logging.error(f" Failed to parse attachment ~ {str(e)}")
break

# Handling Shortlist in attachment
try:
if 'Attachment' in notice:
attachment_shortlists = shortlist.from_attachment(notice)
if attachment_shortlists:
shortlists.append(attachment_shortlists)
except Exception as e:
logging.error(f" Failed to parse shortlists from attachment ~ {str(e)}")
continue

latest_X_notices.append(notice)

# This is done to reduce DB queries
Expand All @@ -94,7 +73,9 @@ def fetch(headers, session, ssoToken, notice_db):
for notice in modified_notices:
logging.info(f" [MODIFIED NOTICE]: #{notice['UID'].split('_')[0]} | {notice['Type']} | {notice['Subject']} | {notice['Company']} | {notice['Time']}")

return new_notices + modified_notices, shortlists
notices = new_notices + modified_notices

return notices


def parse_body_data(session, year, id_):
Expand Down
39 changes: 33 additions & 6 deletions mftp/shortlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,34 @@
from env import HOSTER_INTERESTED_ROLLS, ROLL_MAIL, ROLL_NAME


def from_notice_body(notice):
def search(notices):
print('[SEARCHING SHORTLISTS]', flush=True)

shortlists = []
for notice in notices:
# Handling Shortists in Body
try:
body_shortlists = search_body(notice)
if body_shortlists:
shortlists.append(body_shortlists)
except Exception as e:
logging.error(f" Failed to parse shortlists from notice body ~ {str(e)}")
continue

# Handling Shortlist in attachment
try:
if 'Attachment' in notice:
attachment_shortlists = search_attachment(notice)
if attachment_shortlists:
shortlists.append(attachment_shortlists)
except Exception as e:
logging.error(f" Failed to parse shortlists from attachment ~ {str(e)}")
continue

return shortlists


def search_body(notice):
shortlists = defaultdict(dict)
body_data = notice["BodyData"]
body = body_data.decode_contents(formatter="html")
Expand All @@ -27,18 +54,18 @@ def from_notice_body(notice):
"mails": mails,
}
logging.info(
f" [SHORTLIST (noticebody)] {name} ({count}) -> {company} (#{id_})"
f" [NOTICEBODY] {name} ({count}) -> {company} (#{id_})"
)

return shortlists


def from_attachment(notice):
def search_attachment(notice):
shortlists = defaultdict(dict)
attachment = notice["Attachment"]

for roll in HOSTER_INTERESTED_ROLLS:
count = search_pdf_bytes(attachment, roll)
count = parse_pdf_bytes(attachment, roll)
if count > 0:
id_ = notice["UID"].split("_")[0]
company = notice["Company"]
Expand All @@ -53,13 +80,13 @@ def from_attachment(notice):
"mails": mails,
}
logging.info(
f" [SHORTLIST (attachment)] {name} ({count}) -> {company} (#{id_})"
f" [ATTACHMENT] {name} ({count}) -> {company} (#{id_})"
)

return shortlists


def search_pdf_bytes(pdf_bytes, search_string):
def parse_pdf_bytes(pdf_bytes, search_string):
try:
pdf_file = io.BytesIO(pdf_bytes)
pdf_reader = PyPDF2.PdfReader(pdf_file)
Expand Down

0 comments on commit 3c15d4e

Please sign in to comment.