Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Script to Write Prereqs #872

Open
wants to merge 11 commits into
base: andrew--parse
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ langchain = "*"
openai = "*"
numpy = "*"
parsy = "*"
firebase-admin = "*"

[requires]
python_version = "3.8"
1,174 changes: 837 additions & 337 deletions python/Pipfile.lock

Large diffs are not rendered by default.

62 changes: 62 additions & 0 deletions python/firebase_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import firebase_admin
from firebase_admin import credentials, firestore
import os
import json

SERVICE_ACCOUNT_PROD = "serviceAccountProd.json"
SERVICE_ACCOUNT_DEV = "serviceAccount.json"
DATABASE_URL_PROD = "https://cornell-courseplan.firebaseio.com"
DATABASE_URL_DEV = "https://cornelldti-courseplan-dev.firebaseio.com"


def get_database(service_account: dict, databaseURL: str, app_name=None):
cred = credentials.Certificate(service_account)
if app_name is None:
app = firebase_admin.initialize_app(
credential=cred, options={"databaseURL": databaseURL}
)
else:
app = firebase_admin.initialize_app(
credential=cred, options={"databaseURL": databaseURL}, name=app_name
)
return firestore.client(app)


is_prod = os.environ.get("PROD") == "true"
service_account_filename = SERVICE_ACCOUNT_PROD if is_prod else SERVICE_ACCOUNT_DEV
if os.environ.get("SERVICE_ACCOUNT") is not None:
service_account_unparsed = os.environ.get("SERVICE_ACCOUNT")
else:
with open(os.path.join(os.getcwd(), "..", service_account_filename)) as f:
service_account_unparsed = f.read()
service_account = json.loads(service_account_unparsed)
databaseURL = DATABASE_URL_PROD if is_prod else DATABASE_URL_DEV
db = get_database(service_account, databaseURL)

user_collections = {
"name": "user-name",
"semesters": "user-semesters",
"toggleable": "user-toggleable-requirement-choices",
"overridden": "user-overridden-fulfillment-choices",
"colors": "user-subject-colors",
"unique": "user-unique-incrementer",
"onboarding": "user-onboarding-data",
}

user_collection_names = user_collections.values()

username_collection = db.collection(user_collections["name"])
semesters_collection = db.collection(user_collections["semesters"])
toggleable_requirement_choices_collection = db.collection(
user_collections["toggleable"]
)
overridden_fulfillment_choices_collection = db.collection(
user_collections["overridden"]
)
subject_colors_collection = db.collection(user_collections["colors"])
unique_incrementer_collection = db.collection(user_collections["unique"])
onboarding_data_collection = db.collection(user_collections["onboarding"])
track_users_collection = db.collection("track-users")
courses_collection = db.collection("courses")
available_rosters_for_course_collection = db.collection("available-rosters-for-course")
crse_id_to_catalog_nbr_collection = db.collection("crseid-to-catalognbr")
48 changes: 44 additions & 4 deletions python/prereq_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
llm = ChatOpenAI(openai_api_key=OPEN_AI_API_KEY, temperature=0, model=MODEL)


def get_raw_prereqs_and_coreqs(verbose=False) -> SequentialChain:
def _get_raw_prereqs_and_coreqs(verbose=False) -> SequentialChain:
"""
Returns a chain that takes in a course description and returns the prerequisites and corequisites as two separate lists.
"""
Expand Down Expand Up @@ -89,11 +89,11 @@ def parse_prerequisites_corequisites(inputs: Dict[str, str]):
return raw_prereqs_coreqs_chain


def get_prereqs_coreqs(course_desc: str, verbose=False) -> Tuple[str]:
def _get_prereqs_coreqs(course_desc: str, verbose=False) -> Tuple[str]:
"""
Takes in a course description and returns a boolean expression with the course names representing the prerequisites and corequisites.
"""
raw_prereqs_coreqs_chain = get_raw_prereqs_and_coreqs(verbose)
raw_prereqs_coreqs_chain = _get_raw_prereqs_and_coreqs(verbose)

template = """
You are given the course description for a course.
Expand Down Expand Up @@ -198,7 +198,7 @@ def parse_prerequisites_corequisites(inputs: Dict[str, str]):
return (prereqs_response, coreqs_response)


def parse_boolean_string(raw_output: str):
def _parse_boolean_string(raw_output: str):
if raw_output == "":
return {}
course = regex("[A-Z]{2,6} \d{4}").map(lambda x: {"type": "ATOM", "exprs": x})
Expand All @@ -217,3 +217,43 @@ def parse_boolean_string(raw_output: str):
except:
output = expr.parse(f"({raw_output})")
return output


def parse_prereq_coreq_string(course_desc):
"""
Takes in the raw prerequisite/corequisite string and returns a parsed
dictionary representing the string. Returns a tuple with two values, one
dict for the prerequisites and the other for the corequisite.

e.g.
input: 'Prerequisite: general chemistry (CHEM 1560, CHEM 2070, and/or CHEM 2080), organic chemistry (CHEM 1570, CHEM 3570, and/or CHEM 3580), and Food Chemistry I (FDSC 4170).'
output: (
{
"type": "AND",
"exprs": [
{
"type": "OR",
"exprs": [
{"type": "ATOM", "exprs": "CHEM 1560"},
{"type": "ATOM", "exprs": "CHEM 2070"},
{"type": "ATOM", "exprs": "CHEM 2080"},
],
},
{
"type": "OR",
"exprs": [
{"type": "ATOM", "exprs": "CHEM 1570"},
{"type": "ATOM", "exprs": "CHEM 3570"},
{"type": "ATOM", "exprs": "CHEM 3580"},
],
},
{"type": "ATOM", "exprs": "FDSC 4170"},
],
},
{}
)
"""
(prereqs_response, coreqs_response) = _get_prereqs_coreqs(course_desc)
parsed_prereqs = _parse_boolean_string(prereqs_response)
parsed_coreqs = _parse_boolean_string(coreqs_response)
return (parsed_prereqs, parsed_coreqs)
74 changes: 74 additions & 0 deletions python/read_prereqs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import requests
import json
from typing import List

prefix = "https://classes.cornell.edu/api/2.0"


def get_semesters():
"""
Returns all valid semesters from the roster
"""
json_str = requests.get(f"{prefix}/config/rosters.json").text
json_dict = json.loads(json_str)
semesters = json_dict["data"]["rosters"]
return [semester["slugs"] for semester in semesters]


def _get_subjects(semester: str):
"""
Gets subjects based on semester (like MATH or PE)
"""
json_str = requests.get(f"{prefix}/config/subjects.json?roster={semester}").text
json_dict = json.loads(json_str)
subjects = json_dict["data"]["subjects"]
return [subject["value"] for subject in subjects]


def _get_classes_by_subject(semester: str, subject: str):
"""
Gets courses based on the subject and semester (like MATH 2940 or PE 1510)
"""
json_str = requests.get(
f"{prefix}/search/classes.json?roster={semester}&subject={subject}"
).text
json_dict = json.loads(json_str)
courses = json_dict["data"]["classes"]
return courses


def _get_req_str_dict_from_courses(courses: List[str]):
"""
Gets the prerequisite/corequisite string from a list of courses and puts it
in a dictionary with keys being the course name + ID and values being the
string.

e.g.
{
'MATH 2940': 'MATH 1920 or equivalent'
}
"""
req_strs = {}
for course in courses:
if (
not course["catalogPrereqCoreq"] == ""
and course["catalogPrereqCoreq"] is not None
):
req_strs[f"{course['subject']} {course['catalogNbr']}"] = course[
"catalogPrereqCoreq"
]
return req_strs


def get_all_classes_req_strs_from_semester(semester):
"""
Gets all prerequisite/corequisite string and puts it into a dictionary with
keys being the course name + ID and values being the string given just the
semester.
"""
all_courses = []
subjects = _get_subjects(semester)
for subject in subjects:
all_courses += _get_classes_by_subject(semester, subject)
req_strs = _get_req_str_dict_from_courses(all_courses)
return req_strs
12 changes: 6 additions & 6 deletions python/test_prereq_parse.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from prereq_parse import (
get_raw_prereqs_and_coreqs,
get_prereqs_coreqs,
parse_boolean_string,
_get_raw_prereqs_and_coreqs,
_get_prereqs_coreqs,
_parse_boolean_string,
)
import os
import pytest
Expand Down Expand Up @@ -86,7 +86,7 @@ def test_raw_prereqs_coreqs(verbose=False):
]
for test in test_cases:
(course_desc, answer) = test
response = get_raw_prereqs_and_coreqs(course_desc)
response = _get_raw_prereqs_and_coreqs(course_desc)
assert response == answer


Expand Down Expand Up @@ -154,7 +154,7 @@ def test_prereqs_coreqs(index=None, shorten=False, verbose=False, hard=False):
]
for test in test_cases:
(course_desc, answer) = test
response = get_prereqs_coreqs(course_desc)
response = _get_prereqs_coreqs(course_desc)
assert response == answer


Expand Down Expand Up @@ -308,7 +308,7 @@ def test_parse_boolean_string(verbose=False):

for test in test_cases:
(boolean_string, answer) = test
response = parse_boolean_string(boolean_string)
response = _parse_boolean_string(boolean_string)
assert response == answer


Expand Down
38 changes: 38 additions & 0 deletions python/write_prereqs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from read_prereqs import get_all_classes_req_strs_from_semester
from prereq_parse import parse_prereq_coreq_string
from firebase_config import (
courses_collection,
)


def write_prereqs(semester):
req_str_dict = get_all_classes_req_strs_from_semester(semester)
for course in req_str_dict:
raw_str = req_str_dict[course]
(prereqs, coreqs) = parse_prereq_coreq_string(raw_str)
subject = course.split(" ")[0]
course_number = course.split(" ")[1]
course_ref = (
courses_collection.document("FA23")
.collection(subject)
.document(course_number)
)
course_ref.set({"course": {"prereqs": prereqs}}, merge=True)
course_ref.set({"course": {"coreqs": coreqs}}, merge=True)


def write_cs_4787():
course = "CS 4787"
prereqs = "Prereqs"
coreqs = "Coreqs"
subject = course.split(" ")[0]
course_number = course.split(" ")[1]
course_ref = (
courses_collection.document("FA23").collection(subject).document(course_number)
)
course_ref.set({"course": {"prereqs": prereqs}}, merge=True)
course_ref.set({"course": {"coreqs": coreqs}}, merge=True)


if __name__ == "__main__":
write_cs_4787()
Loading