-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Calendar Performance Update #241
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import datetime | ||
|
||
import requests | ||
from bs4 import BeautifulSoup | ||
from django.core.management.base import BaseCommand | ||
from django.utils import timezone | ||
|
||
from penndata.models import CalendarEvent | ||
|
||
|
||
UPENN_ALMANAC_WEBSITE = "https://almanac.upenn.edu/penn-academic-calendar" | ||
|
||
|
||
class Command(BaseCommand): | ||
def handle(self, *args, **kwargs): | ||
|
||
# Clears out previous CalendarEvents | ||
CalendarEvent.objects.all().delete() | ||
|
||
# Scrapes UPenn Almanac | ||
try: | ||
resp = requests.get(UPENN_ALMANAC_WEBSITE) | ||
except ConnectionError: | ||
return None | ||
|
||
soup = BeautifulSoup(resp.content.decode("utf8"), "html5lib") | ||
|
||
# Relevant Table class | ||
table = soup.find( | ||
"table", | ||
{ | ||
"class": ( | ||
"table table-bordered table-striped " | ||
"table-condensed table-responsive calendar-table" | ||
) | ||
}, | ||
) | ||
|
||
rows = table.find_all("tr") | ||
current_time = timezone.localtime() | ||
current_year = current_time.year | ||
row_year = 0 | ||
|
||
for row in rows: | ||
header = row.find_all("th") | ||
|
||
# Gets data from header | ||
if len(header) > 0: | ||
row_year = header[0].get_text().split(" ")[0] | ||
continue | ||
|
||
# Only get data from relevant year | ||
if int(row_year) != current_year: | ||
continue | ||
|
||
data = row.find_all("td") | ||
event = data[0].get_text() | ||
date_info = data[1].get_text() | ||
|
||
""" | ||
Match works for different date types; always matches begin date: | ||
- Range date in same month: August 1-3 | ||
- Range date across months: August 1-September 1 | ||
- Single date: August 1 | ||
""" | ||
try: | ||
month = date_info.split(" ")[0] | ||
day = date_info.split(" ")[1].split("-")[0] | ||
date = datetime.datetime.strptime( | ||
month + day + str(current_year) + "-04:00", "%B%d%Y%z" | ||
) | ||
if date and date >= timezone.localtime(): | ||
CalendarEvent.objects.get_or_create(event=event, date=date_info, date_obj=date) | ||
except ValueError: | ||
continue | ||
|
||
self.stdout.write("Uploaded Calendar Events!") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Generated by Django 4.2.9 on 2024-02-04 23:53 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
("penndata", "0007_fitnessroom_image_url"), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name="CalendarEvent", | ||
fields=[ | ||
( | ||
"id", | ||
models.AutoField( | ||
auto_created=True, primary_key=True, serialize=False, verbose_name="ID" | ||
), | ||
), | ||
("event", models.CharField(max_length=255)), | ||
("date", models.CharField(blank=True, max_length=50, null=True)), | ||
("date_obj", models.DateTimeField(blank=True, null=True)), | ||
], | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,3 +58,14 @@ | |
|
||
def __str__(self): | ||
return f"{self.cell_type}-{self.user.username}" | ||
|
||
|
||
class CalendarEvent(models.Model): | ||
event = models.CharField(max_length=255) | ||
date = models.CharField(max_length=50, null=True, blank=True) | ||
# NOTE: This is bad practice, though is necessary for the time being | ||
# since frontends use the string date field | ||
date_obj = models.DateTimeField(null=True, blank=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We probably need to change this in the future. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should have fields for a start date and a nullable end date bc some calendar events are ranges. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the reminder. I will leave this for you to do in a future events PR you create so we can group relevant code |
||
|
||
def __str__(self): | ||
return f"{self.date}-{self.event}" | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import datetime | ||
from datetime import timedelta | ||
|
||
import requests | ||
from bs4 import BeautifulSoup | ||
|
@@ -10,9 +11,17 @@ | |
from rest_framework.response import Response | ||
from rest_framework.views import APIView | ||
|
||
from penndata.models import AnalyticsEvent, Event, FitnessRoom, FitnessSnapshot, HomePageOrder | ||
from penndata.models import ( | ||
AnalyticsEvent, | ||
CalendarEvent, | ||
Event, | ||
FitnessRoom, | ||
FitnessSnapshot, | ||
HomePageOrder, | ||
) | ||
from penndata.serializers import ( | ||
AnalyticsEventSerializer, | ||
CalendarEventSerializer, | ||
EventSerializer, | ||
FitnessRoomSerializer, | ||
HomePageOrderSerializer, | ||
|
@@ -70,80 +79,19 @@ def get(self, request): | |
return Response({"error": "Site could not be reached or could not be parsed."}) | ||
|
||
|
||
class Calendar(APIView): | ||
class Calendar(generics.ListAPIView): | ||
""" | ||
GET: Returns upcoming university events (within 2 weeks away) | ||
list: Returns upcoming university events (within 2 weeks away) | ||
""" | ||
|
||
def get_calendar(self): | ||
# scapes almanac from upenn website | ||
try: | ||
resp = requests.get("https://almanac.upenn.edu/penn-academic-calendar") | ||
except ConnectionError: | ||
return None | ||
soup = BeautifulSoup(resp.content.decode("utf8"), "html5lib") | ||
# finds table with all information and gets the rows | ||
table = soup.find( | ||
"table", | ||
{ | ||
"class": ( | ||
"table table-bordered table-striped " | ||
"table-condensed table-responsive calendar-table" | ||
) | ||
}, | ||
) | ||
rows = table.find_all("tr") | ||
calendar = [] | ||
current_year = timezone.localtime().year | ||
row_year = 0 | ||
|
||
# collect end dates on all events and filter based on that | ||
for row in rows: | ||
header = row.find_all("th") | ||
if len(header) > 0: | ||
row_year = header[0].get_text().split(" ")[0] | ||
# skips calculation if years don't align | ||
if int(row_year) != current_year: | ||
continue | ||
if len(header) == 0: | ||
data = row.find_all("td") | ||
date_info = data[1].get_text() | ||
date = None | ||
try: | ||
# handles case for date ex. August 31 | ||
date = datetime.datetime.strptime( | ||
date_info + str(current_year) + "-04:00", "%B %d%Y%z" | ||
) | ||
except ValueError: | ||
try: | ||
# handles case for date ex. August 1-3 | ||
month = date_info.split(" ")[0] | ||
day = date_info.split("-")[1] | ||
date = datetime.datetime.strptime( | ||
month + day + str(current_year) + "-04:00", "%B%d%Y%z" | ||
) | ||
except (ValueError, IndexError): | ||
try: | ||
# handles case for date ex. August 1-September 31 | ||
last_date = date_info.split("-")[0].split(" ") | ||
month = last_date[0] | ||
day = last_date[1] | ||
date = datetime.datetime.strptime( | ||
month + day + str(current_year) + "-04:00", "%B%d%Y%z" | ||
) | ||
except (ValueError, IndexError): | ||
pass | ||
|
||
# TODO: add this: and date < timezone.localtime() + datetime.timedelta(days=14) | ||
if date and date > timezone.localtime(): | ||
calendar.append({"event": data[0].get_text(), "date": data[1].get_text()}) | ||
# only returns the 3 most recent events | ||
if len(calendar) == 3: | ||
break | ||
return calendar | ||
permission_classes = [AllowAny] | ||
serializer_class = CalendarEventSerializer | ||
|
||
def get(self, request): | ||
return Response(self.get_calendar()) | ||
def get_queryset(self): | ||
return CalendarEvent.objects.filter( | ||
date_obj__gte=timezone.localtime(), | ||
date_obj__lte=timezone.localtime() + timedelta(days=30), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For now we need this because we never clear the database. @ashleyzhang01 what do you think? Do you think the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't call the manage.py thing often enough that it deletes all previous. I had something in my Calendar method in the views.py for this-- can you move it in too? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks, just did There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wait also if it is only called once a month, then by the end of the month there won't be many future events left in the db, so we wouldn't really have any calendar events? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah yes good point. I'll change the |
||
) | ||
|
||
|
||
class Events(generics.ListAPIView): | ||
|
@@ -237,7 +185,7 @@ def get(self, request): | |
cells.append(self.Cell("new-version-released", None, 10000)) | ||
|
||
# adds events up to 2 weeks | ||
cells.append(self.Cell("calendar", {"calendar": Calendar.get_calendar(self)}, 40)) | ||
# cells.append(self.Cell("calendar", {"calendar": Calendar.get_calendar(self)}, 40)) | ||
|
||
# adds front page article of DP | ||
cells.append(self.Cell("news", {"article": News.get_article(self)}, 50)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i know a lot of this code was just moved but if u understand it, could we break this function down into more modular components? the super nested
try-except
statements are a bit hard to read.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
agreed. just cleaned it up to make it more readable.