From f66791d0fb46ff88b5b1073d01853b7eae5f9c3b Mon Sep 17 00:00:00 2001 From: Raymond Pulsipher Date: Fri, 4 Jun 2021 12:46:04 -0700 Subject: [PATCH] - Fix json import issue when re-scanning documents/media from imported tool. - Work on tagging media/documents based on what classes they are used in. --- web2py/applications/smc/controllers/media.py | 174 +++++++++++++++++- .../applications/smc/models/b_media_code.py | 13 ++ web2py/applications/smc/models/menu.py | 1 + web2py/applications/smc/models/x_scheduler.py | 172 ++++++++++++++++- 4 files changed, 356 insertions(+), 4 deletions(-) diff --git a/web2py/applications/smc/controllers/media.py b/web2py/applications/smc/controllers/media.py index 2c12a88c..7e0bf248 100644 --- a/web2py/applications/smc/controllers/media.py +++ b/web2py/applications/smc/controllers/media.py @@ -20,6 +20,174 @@ from bs4 import BeautifulSoup as bs + +@auth.requires(auth.has_membership('Faculty') or auth.has_membership('Administrators')) +def tag_resources_by_class_status(): + running_query = ( + (db_scheduler.scheduler_task.task_name=="canvas_tag_smc_resources") & + (db_scheduler.scheduler_task.status=="RUNNING") + ) + + completed_query = ( + (db_scheduler.scheduler_task.task_name=="canvas_tag_smc_resources") & + (db_scheduler.scheduler_task.status=="COMPLETED") + ) + + queued_query = ( + (db_scheduler.scheduler_task.task_name=="canvas_tag_smc_resources") & + (db_scheduler.scheduler_task.status=="QUEUED") + ) + + failed_query = ( + (db_scheduler.scheduler_task.task_name=="canvas_tag_smc_resources") & + (db_scheduler.scheduler_task.status=="FAILED") + ) + + last_run_query = ( + (db_scheduler.scheduler_task.task_name=="canvas_tag_smc_resources") & + ((db_scheduler.scheduler_task.status=="FAILED") | (db_scheduler.scheduler_task.status=="COMPLETED")) + ) + + running_count = db_scheduler(running_query).count() + completed_count = db_scheduler(completed_query).count() + queued_count = db_scheduler(queued_query).count() + failed_count = db_scheduler(failed_query).count() + + last_task_record = db_scheduler(last_run_query).select(orderby=~db_scheduler.scheduler_task.id).first() + # Pull the output + last_run_output = "" + last_run_traceback = "" + last_run_status = "" + if last_task_record: + last_run_status = last_task_record.status + for r in last_task_record.scheduler_run.select(): + last_run_output = r['run_output'] + last_run_traceback = r['traceback'] + + if last_run_output is None: + last_run_output = "" + if last_run_traceback is None: + last_run_traceback = "" + + last_run_output = "" + str(last_run_status) + "\n" + last_run_output + "\n" + last_run_traceback + last_run_output = last_run_output.replace("\n", "
") + + # Make a table for the results + t = TABLE( + TR( + TH("Job Stats"), + TH("") + + ), + TR( + TD("Queued:", _style="font-size: small;"), + TD(queued_count, _style="font-size: small;") + ), + TR( + TD("Running:", _style="font-size: small;"), + TD(running_count, _style="font-size: small;") + ), + TR( + TD("Completed:", _style="font-size: small;"), + TD(completed_count, _style="font-size: small;") + ), + TR( + TD("Failed:", _style="font-size: small;"), + TD(failed_count, _style="font-size: small;") + ), + TR( + TD(" "), + TD(" ") + ), + TR( + TH("Last Run Output:") + ), + TR( + TD( + XML(last_run_output), + _colspan=2, + _style="font-size: x-small" + ) + ), + _style='width: 400px; margin-left: 25px; margin-right: 25px;', + ) + + return XML(t) + +@auth.requires(auth.has_membership('Faculty') or auth.has_membership('Administrators')) +def tag_resources_by_class(): + ret = { + 'form': None + } + + Canvas.Close() + Canvas.Init() + course_list = [] + course_dict = dict() + + if Canvas._canvas_integration_enabled is not True: + form = "Canvas Integration needs to be Enabled in the admin menu before this tool will work." + return dict(form=XML(form)) + + courses = Canvas.get_courses_for_faculty(auth.user.username) + + sorted_course_dict = dict() + for c in courses: + sorted_course_dict[courses[c]] = str(c) + course_dict[str(c)] = courses[c] + # Sort the keys and add them to the select list + course_list.append(OPTION("ALL COURSES", _vaule="ALL COURSES")) + for k in sorted(sorted_course_dict.keys()): + course_list.append(OPTION(str(k), _value=str(sorted_course_dict[k]))) + + course_select = SELECT(course_list, _name="current_course", _id="current_course", _style="width: 600px;") + + form = FORM(TABLE(TR("Choose a course: ", course_select), + TR("", INPUT(_type="submit", _value="Next"))), _name="fr_step1").process(formname="fr_step1", + keepvalues=True) + ret['form'] = form + + if form.accepted: + selected_course = form.vars.current_course + # print(selected_course) + # print(courses) + selected_courses = dict() + if selected_course == "ALL COURSES": + # print("ALL COURSES") + selected_courses = courses + else: + # Convert to ID + selected_course = int(selected_course) + if selected_course in courses: + # print("Adding Course: " + str(selected_course)) + selected_courses[selected_course] = courses[selected_course] + else: + # print("Not in courses: " + str(selected_course)) + pass + + if len(selected_courses.keys()) < 1: + response.flash = "No courses selected!" + else: + job_count = 0 + # Loop through the course list and create jobs for each one. + for c in selected_courses.keys(): + class_id = c + class_name = selected_courses[c] + result = scheduler.queue_task( + 'canvas_tag_smc_resources', + pvars=dict(class_id=c, class_name=class_name), + timeout=3600, + immediate=True, + sync_output=1, + group_name="misc" + ) + # print(c) + job_count += 1 + + response.flash = str(job_count) + " job(s) scheduled." + + return ret + def media_list(): response.view = "default.json" ret = list() @@ -421,11 +589,11 @@ def dl_document(): document_file = db(db.document_files.document_guid == document_id).select().first() media_type = "" if document_file is not None: - title = document_file.title - description = document_file.description + title = str(document_file.title) + description = str(document_file.description) tags = ",".join(document_file.tags) views = document_file.views - original_file_name = document_file.original_file_name + original_file_name = str(document_file.original_file_name) pass p, media_type = os.path.splitext(original_file_name) mimetypes.init() diff --git a/web2py/applications/smc/models/b_media_code.py b/web2py/applications/smc/models/b_media_code.py index 3e0a1185..f5daa34b 100644 --- a/web2py/applications/smc/models/b_media_code.py +++ b/web2py/applications/smc/models/b_media_code.py @@ -15,6 +15,7 @@ import requests from langcodes import * import webvtt +import traceback from ednet.canvas import Canvas @@ -236,6 +237,10 @@ def load_media_file_json(file_guid): else: meta[f] = "" + if type(meta['tags']) is list: + # Need to convert to string for later + meta['tags'] = dumps(meta['tags']) + # See if the item is in the database item = db.media_files(media_guid=meta['media_guid']) if item is None: @@ -328,6 +333,10 @@ def load_document_file_json(file_guid): else: meta[f] = "" + if type(meta['tags']) is list: + # Need to convert to string for later + meta['tags'] = dumps(meta['tags']) + # See if the item is in the database item = db.document_files(document_guid=meta['document_guid']) if item is None: @@ -346,8 +355,11 @@ def load_document_file_json(file_guid): db.commit() except Exception as ex: print("Error processing document file: ", json_file, str(ex)) + traceback.print_exc() # db.rollback() + # Make sure to release the lock on this db + db.commit() return True @@ -483,3 +495,4 @@ def getPDFURLS(txt): if 'pdf' in r: ret.append(r) return ret + diff --git a/web2py/applications/smc/models/menu.py b/web2py/applications/smc/models/menu.py index c063a7f1..0c118266 100644 --- a/web2py/applications/smc/models/menu.py +++ b/web2py/applications/smc/models/menu.py @@ -40,6 +40,7 @@ (T(u'Media Utilities \u2bc8 Scan Media Files'), False, URL('media', 'scan_media_files')), (T(u'Media Utilities \u2bc8 Re-try failed YouTube Videos'), False, URL('media', 'yt_requeue')), (T(u'Media Utilities \u2bc8 Refresh Google Docs'), False, URL('media', 'refresh_google_docs')), + (T(u'Media Utilities \u2bc8 Tag Resouces By Class'), False, URL('media', 'tag_resources_by_class')), (T(u'Media Utilities \u2bc8 WAMAP Import'), False, URL('media', 'wamap_import')), #]), diff --git a/web2py/applications/smc/models/x_scheduler.py b/web2py/applications/smc/models/x_scheduler.py index 11f7b52b..1391fa06 100644 --- a/web2py/applications/smc/models/x_scheduler.py +++ b/web2py/applications/smc/models/x_scheduler.py @@ -1057,7 +1057,176 @@ def flush_redis_keys(): print("Error flushing redis keys! \n" + str(ex)) # Slight pause - let scheduler grab output time.sleep(5) - return true + return True + +def tag_media_in_class(media_id, class_name): + row = db(db.media_files.media_guid==media_id).select().first() + if row is None: + # print("Invalid Media ID: " + str(media_id)) + return False + + # Add course name to tags + tags = row['tags'] + if tags is None: + tags = list() + if class_name not in tags: + tags.append(class_name) + row.update_record(tags=tags) + db.commit() + save_media_file_json(media_id) + return True + +def tag_document_in_class(document_id, class_name): + row = db(db.document_files.document_guid==document_id).select().first() + if row is None: + # print("Invalid Document ID: " + str(document_id)) + return False + + # Add course name to tags + tags = row['tags'] + if tags is None: + tags = list() + if class_name not in tags: + tags.append(class_name) + row.update_record(tags=tags) + db.commit() + save_document_file_json(document_id) + return True + +def find_smc_media_in_text(class_id, class_name, search_text): + import re + links_found = 0 + if search_text is None: + return 0 + + # Regular expression to find google docs + media_find_str = r'''(/static/media/[a-zA-Z0-9]{2}/|/media/player(\.load){0,1}/)([a-zA-Z0-9]+)(\?){0,1}''' + document_find_str = r'''(/media/dl_document/)([a-zA-Z0-9]+)(\?){0,1}''' + + # Match examples + # + # + # https://smc.ed/media/player.load/24bf1a954e3640f1bdcda6804f7d99c4 + # https://smc.ed/media/player.load/24bf1a954e3640f1bdcda6804f7d99c4?autoplay=true + # https://smc.ed/media/player/24bf1a954e3640f1bdcda6804f7d99c4 + # + # https://smc.ed/smc/static/media/24/24bf1a954e3640f1bdcda6804f7d99c4.mp4 + + # + # https://smc.ed/media/dl_document/3fa5529ded38433ebebe6e1cc41398e9 + + # Find media matches + matches = re.finditer(media_find_str, search_text) + for m in matches: + links_found += 1 + + # ID should be in group 3 + media_id = m.group(3) + # print("Found Media ID: " + media_id) + # Tag media w course info + tag_media_in_class(media_id, class_name) + + + # Find document matches + matches = re.finditer(document_find_str, search_text) + for m in matches: + links_found += 1 + + # ID should be in group 2 + document_id = m.group(2) + # print("Found Document ID: " + document_id) + # Tag it + tag_document_in_class(document_id, class_name) + + return links_found + +def canvas_tag_smc_resources(class_id, class_name): + # print("canvas_tag_smc_resources " + str(class_id) + "/" + str(class_name)) + print("Processing " + str(class_name) + "/" + str(class_id)) + + log_txt = "" + + # === Pull all pages and extract links === + items = Canvas.get_page_list_for_course(class_id) + total_pages = len(items) + total_pages_links = 0 + for i in items: + orig_text = items[i] + + log_txt += "\n\nWorking on Page: " + str(i) + links_found = find_smc_media_in_text(class_id, class_name, orig_text) + total_pages_links += links_found + log_txt += "\n - " + str(links_found) + " links found in text" + + + # === Pull all quizzes and extract links === + items = Canvas.get_quiz_list_for_course(class_id) + total_quizzes = len(items) + total_quizzes_links = 0 + total_questions_links = 0 + for i in items: + orig_text = items[i] + log_txt += "\n\nWorking on Quiz: " + str(i) + + links_found = find_smc_media_in_text(class_id, class_name, orig_text) + total_quizzes_links += links_found + log_txt += "\n - " + str(links_found) + " links found in text" + + quiz_id = i + # === Pull all questions and extract links === + q_items = Canvas.get_quiz_questions_for_quiz(class_id, quiz_id) + total_questions = len(q_items) + for q in q_items: + q_orig_text = q_items[q] + log_txt += "\n\n    Working on question: " + str(q) + + links_found = find_smc_media_in_text(class_id, class_name, q_orig_text) + total_questions_links += links_found + log_txt += "\n - " + str(links_found) + " links found in text" + + + # === Pull all discussion topics and extract links === + items = Canvas.get_discussion_list_for_course(class_id) + total_discussions = len(items) + total_discussions_links = 0 + for i in items: + orig_text = items[i] + log_txt += "\n\nWorking on Discussion: " + str(i) + + links_found = find_smc_media_in_text(class_id, class_name, orig_text) + total_discussions_links += links_found + log_txt += "\n - " + str(links_found) + " links found in text" + + + # === Pull all assignments and extract links === + items = Canvas.get_assignment_list_for_course(class_id) + total_assignments = len(items) + total_assignments_links = 0 + for i in items: + orig_text = items[i] + log_txt += "\n\nWorking on Assignment: " + str(i) + + links_found = find_smc_media_in_text(class_id, class_name, orig_text) + total_assignments_links += links_found + log_txt += "\n - " + str(links_found) + " links found in text" + + total_all_links = total_pages_links + total_quizzes_links + total_questions_links + total_discussions_links + total_assignments_links + print( + "SMC Links Found\n" + + "-----------------------------\n" + + "Page Links {0}\n".format(total_pages_links) + + "Quizz Links {0}\n".format(total_quizzes_links) + + " Quizz Question Links {0}\n".format(total_questions_links) + + "Discussion Links {0}\n".format(total_discussions_links) + + "Assignment Links {0}\n".format(total_assignments_links) + + "-----------------------------\n" + + "Total Links {0}\n".format(total_all_links) + ) + + print(log_txt) + # Slight pause so that output gets sent out + time.sleep(2) + return True # Enable the scheduler from gluon.scheduler import Scheduler @@ -1076,6 +1245,7 @@ def flush_redis_keys(): update_document_database_from_json_files=update_document_database_from_json_files, flush_redis_keys=flush_redis_keys, pull_youtube_caption=pull_youtube_caption, + canvas_tag_smc_resources=canvas_tag_smc_resources, )) current.scheduler = scheduler