Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calendar Performance Update #241

Merged
merged 5 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion backend/penndata/admin.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
from django.contrib import admin
from django.utils.html import escape, mark_safe

from penndata.models import AnalyticsEvent, Event, FitnessRoom, FitnessSnapshot, HomePageOrder
from penndata.models import (
AnalyticsEvent,
CalendarEvent,
Event,
FitnessRoom,
FitnessSnapshot,
HomePageOrder,
)


class FitnessRoomAdmin(admin.ModelAdmin):
Expand All @@ -13,6 +20,7 @@ def image_tag(self, instance):


admin.site.register(Event)
admin.site.register(CalendarEvent)
admin.site.register(HomePageOrder)
admin.site.register(FitnessRoom, FitnessRoomAdmin)
admin.site.register(FitnessSnapshot)
Expand Down
77 changes: 77 additions & 0 deletions backend/penndata/management/commands/get_calendar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import datetime

import requests
from bs4 import BeautifulSoup
from django.core.management.base import BaseCommand
from django.utils import timezone

from penndata.models import CalendarEvent


UPENN_ALMANAC_WEBSITE = "https://almanac.upenn.edu/penn-academic-calendar"


class Command(BaseCommand):
def handle(self, *args, **kwargs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i know a lot of this code was just moved but if u understand it, could we break this function down into more modular components? the super nested try-except statements are a bit hard to read.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed. just cleaned it up to make it more readable.


# Clears out previous CalendarEvents
CalendarEvent.objects.all().delete()

# Scrapes UPenn Almanac
try:
resp = requests.get(UPENN_ALMANAC_WEBSITE)
except ConnectionError:
return None

Check warning on line 24 in backend/penndata/management/commands/get_calendar.py

View check run for this annotation

Codecov / codecov/patch

backend/penndata/management/commands/get_calendar.py#L23-L24

Added lines #L23 - L24 were not covered by tests

soup = BeautifulSoup(resp.content.decode("utf8"), "html5lib")

# Relevant Table class
table = soup.find(
"table",
{
"class": (
"table table-bordered table-striped "
"table-condensed table-responsive calendar-table"
)
},
)

rows = table.find_all("tr")
current_time = timezone.localtime()
current_year = current_time.year
row_year = 0

for row in rows:
header = row.find_all("th")

# Gets data from header
if len(header) > 0:
row_year = header[0].get_text().split(" ")[0]
continue

# Only get data from relevant year
if int(row_year) != current_year:
continue

data = row.find_all("td")
event = data[0].get_text()
date_info = data[1].get_text()

"""
Match works for different date types; always matches begin date:
- Range date in same month: August 1-3
- Range date across months: August 1-September 1
- Single date: August 1
"""
try:
month = date_info.split(" ")[0]
day = date_info.split(" ")[1].split("-")[0]
date = datetime.datetime.strptime(
month + day + str(current_year) + "-04:00", "%B%d%Y%z"
)
if date and date >= timezone.localtime():
CalendarEvent.objects.get_or_create(event=event, date=date_info, date_obj=date)
except ValueError:
continue

self.stdout.write("Uploaded Calendar Events!")
27 changes: 27 additions & 0 deletions backend/penndata/migrations/0008_calendarevent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Generated by Django 4.2.9 on 2024-02-04 23:53

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("penndata", "0007_fitnessroom_image_url"),
]

operations = [
migrations.CreateModel(
name="CalendarEvent",
fields=[
(
"id",
models.AutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
("event", models.CharField(max_length=255)),
("date", models.CharField(blank=True, max_length=50, null=True)),
("date_obj", models.DateTimeField(blank=True, null=True)),
],
),
]
11 changes: 11 additions & 0 deletions backend/penndata/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,14 @@

def __str__(self):
return f"{self.cell_type}-{self.user.username}"


class CalendarEvent(models.Model):
event = models.CharField(max_length=255)
date = models.CharField(max_length=50, null=True, blank=True)
# NOTE: This is bad practice, though is necessary for the time being
# since frontends use the string date field
date_obj = models.DateTimeField(null=True, blank=True)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably need to change this in the future.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should have fields for a start date and a nullable end date bc some calendar events are ranges.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the reminder. I will leave this for you to do in a future events PR you create so we can group relevant code


def __str__(self):
return f"{self.date}-{self.event}"

Check warning on line 71 in backend/penndata/models.py

View check run for this annotation

Codecov / codecov/patch

backend/penndata/models.py#L71

Added line #L71 was not covered by tests
15 changes: 14 additions & 1 deletion backend/penndata/serializers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from rest_framework import serializers

from penndata.models import AnalyticsEvent, Event, FitnessRoom, FitnessSnapshot, HomePageOrder
from penndata.models import (
AnalyticsEvent,
CalendarEvent,
Event,
FitnessRoom,
FitnessSnapshot,
HomePageOrder,
)


class EventSerializer(serializers.ModelSerializer):
Expand All @@ -19,6 +26,12 @@ class Meta:
)


class CalendarEventSerializer(serializers.ModelSerializer):
class Meta:
model = CalendarEvent
fields = ("event", "date")


class HomePageOrderSerializer(serializers.ModelSerializer):
class Meta:
model = HomePageOrder
Expand Down
92 changes: 20 additions & 72 deletions backend/penndata/views.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
from datetime import timedelta

import requests
from bs4 import BeautifulSoup
Expand All @@ -10,9 +11,17 @@
from rest_framework.response import Response
from rest_framework.views import APIView

from penndata.models import AnalyticsEvent, Event, FitnessRoom, FitnessSnapshot, HomePageOrder
from penndata.models import (
AnalyticsEvent,
CalendarEvent,
Event,
FitnessRoom,
FitnessSnapshot,
HomePageOrder,
)
from penndata.serializers import (
AnalyticsEventSerializer,
CalendarEventSerializer,
EventSerializer,
FitnessRoomSerializer,
HomePageOrderSerializer,
Expand Down Expand Up @@ -70,80 +79,19 @@ def get(self, request):
return Response({"error": "Site could not be reached or could not be parsed."})


class Calendar(APIView):
class Calendar(generics.ListAPIView):
"""
GET: Returns upcoming university events (within 2 weeks away)
list: Returns upcoming university events (within 2 weeks away)
"""

def get_calendar(self):
# scapes almanac from upenn website
try:
resp = requests.get("https://almanac.upenn.edu/penn-academic-calendar")
except ConnectionError:
return None
soup = BeautifulSoup(resp.content.decode("utf8"), "html5lib")
# finds table with all information and gets the rows
table = soup.find(
"table",
{
"class": (
"table table-bordered table-striped "
"table-condensed table-responsive calendar-table"
)
},
)
rows = table.find_all("tr")
calendar = []
current_year = timezone.localtime().year
row_year = 0

# collect end dates on all events and filter based on that
for row in rows:
header = row.find_all("th")
if len(header) > 0:
row_year = header[0].get_text().split(" ")[0]
# skips calculation if years don't align
if int(row_year) != current_year:
continue
if len(header) == 0:
data = row.find_all("td")
date_info = data[1].get_text()
date = None
try:
# handles case for date ex. August 31
date = datetime.datetime.strptime(
date_info + str(current_year) + "-04:00", "%B %d%Y%z"
)
except ValueError:
try:
# handles case for date ex. August 1-3
month = date_info.split(" ")[0]
day = date_info.split("-")[1]
date = datetime.datetime.strptime(
month + day + str(current_year) + "-04:00", "%B%d%Y%z"
)
except (ValueError, IndexError):
try:
# handles case for date ex. August 1-September 31
last_date = date_info.split("-")[0].split(" ")
month = last_date[0]
day = last_date[1]
date = datetime.datetime.strptime(
month + day + str(current_year) + "-04:00", "%B%d%Y%z"
)
except (ValueError, IndexError):
pass

# TODO: add this: and date < timezone.localtime() + datetime.timedelta(days=14)
if date and date > timezone.localtime():
calendar.append({"event": data[0].get_text(), "date": data[1].get_text()})
# only returns the 3 most recent events
if len(calendar) == 3:
break
return calendar
permission_classes = [AllowAny]
serializer_class = CalendarEventSerializer

def get(self, request):
return Response(self.get_calendar())
def get_queryset(self):
return CalendarEvent.objects.filter(
date_obj__gte=timezone.localtime(),
date_obj__lte=timezone.localtime() + timedelta(days=30),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now we need this because we never clear the database. @ashleyzhang01 what do you think? Do you think the manage.py command should delete all previous CalendarEvents that way we don't have to re-do this filtering for events?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't call the manage.py thing often enough that it deletes all previous. I had something in my Calendar method in the views.py for this-- can you move it in too?
Basically, it deletes any previous events when called each time, so we don't have to do the manage.py as often and don't have to filter through the time range as often.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks, just did

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait also if it is only called once a month, then by the end of the month there won't be many future events left in the db, so we wouldn't really have any calendar events?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes good point. I'll change the get_calendar to create CalendarEvents that exceed 30 days as well. Then we will leave this view as is.

)


class Events(generics.ListAPIView):
Expand Down Expand Up @@ -237,7 +185,7 @@ def get(self, request):
cells.append(self.Cell("new-version-released", None, 10000))

# adds events up to 2 weeks
cells.append(self.Cell("calendar", {"calendar": Calendar.get_calendar(self)}, 40))
# cells.append(self.Cell("calendar", {"calendar": Calendar.get_calendar(self)}, 40))

# adds front page article of DP
cells.append(self.Cell("news", {"article": News.get_article(self)}, 50))
Expand Down
10 changes: 6 additions & 4 deletions backend/tests/penndata/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ def test_response(self):


class TestCalender(TestCase):
def setUp(self):
call_command("get_calendar")

def test_response(self):
response = self.client.get(reverse("calendar"))
res_json = json.loads(response.content)
Expand Down Expand Up @@ -102,8 +105,8 @@ def test_first_response(self):
self.assertIn(1442, dining_info)
self.assertIn(636, dining_info)

self.assertEqual(res_json[3]["type"], "laundry")
self.assertEqual(res_json[3]["info"]["room_id"], 0)
self.assertEqual(res_json[2]["type"], "laundry")
self.assertEqual(res_json[2]["info"]["room_id"], 0)

self.test_user.profile.dining_preferences.add(Venue.objects.get(venue_id=747))
self.test_user.profile.dining_preferences.add(Venue.objects.get(venue_id=1733))
Expand All @@ -122,10 +125,9 @@ def test_first_response(self):
self.assertIn(1733, new_dining_info)
self.assertIn(638, new_dining_info)

self.assertEqual(new_res_json[3]["info"]["room_id"], 3)
self.assertEqual(new_res_json[2]["info"]["room_id"], 3)

self.assertEqual(new_res_json[1]["type"], "news")
self.assertEqual(new_res_json[2]["type"], "calendar")


class TestGetRecentFitness(TestCase):
Expand Down
8 changes: 8 additions & 0 deletions k8s/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,14 @@ export class MyChart extends PennLabsChart {
cmd: ["python", "manage.py", "load_target_populations"],
env: [{ name: "DJANGO_SETTINGS_MODULE", value: "pennmobile.settings.production" }]
});

new CronJob(this, 'get-calendar', {
schedule: '0 3 1 * *', // This expression means run at 3 AM on the 1st day of every month
image: backendImage,
secret,
cmd: ["python", "manage.py", "get_calendar"],
env: [{ name: "DJANGO_SETTINGS_MODULE", value: "pennmobile.settings.production" }]
});
}
}

Expand Down
Loading