From f66791d0fb46ff88b5b1073d01853b7eae5f9c3b Mon Sep 17 00:00:00 2001
From: Raymond Pulsipher <ray@cmagic.biz>
Date: Fri, 4 Jun 2021 12:46:04 -0700
Subject: [PATCH] - Fix json import issue when re-scanning documents/media from
 imported tool. - Work on tagging media/documents based on what classes they
 are used in.

---
 web2py/applications/smc/controllers/media.py  | 174 +++++++++++++++++-
 .../applications/smc/models/b_media_code.py   |  13 ++
 web2py/applications/smc/models/menu.py        |   1 +
 web2py/applications/smc/models/x_scheduler.py | 172 ++++++++++++++++-
 4 files changed, 356 insertions(+), 4 deletions(-)
diff --git a/web2py/applications/smc/controllers/media.py b/web2py/applications/smc/controllers/media.py
index 2c12a88c..7e0bf248 100644
--- a/web2py/applications/smc/controllers/media.py
+++ b/web2py/applications/smc/controllers/media.py
@@ -20,6 +20,174 @@
 from bs4 import BeautifulSoup as bs
 
 
+
+@auth.requires(auth.has_membership('Faculty') or auth.has_membership('Administrators'))
+def tag_resources_by_class_status():
+    running_query = (
+        (db_scheduler.scheduler_task.task_name=="canvas_tag_smc_resources") &
+        (db_scheduler.scheduler_task.status=="RUNNING")
+        )
+    
+    completed_query = (
+        (db_scheduler.scheduler_task.task_name=="canvas_tag_smc_resources") &
+        (db_scheduler.scheduler_task.status=="COMPLETED")
+        )
+    
+    queued_query = (
+        (db_scheduler.scheduler_task.task_name=="canvas_tag_smc_resources") &
+        (db_scheduler.scheduler_task.status=="QUEUED")
+        )
+    
+    failed_query = (
+        (db_scheduler.scheduler_task.task_name=="canvas_tag_smc_resources") &
+        (db_scheduler.scheduler_task.status=="FAILED")
+        )
+
+    last_run_query = (
+        (db_scheduler.scheduler_task.task_name=="canvas_tag_smc_resources") &
+        ((db_scheduler.scheduler_task.status=="FAILED") | (db_scheduler.scheduler_task.status=="COMPLETED"))
+    )
+    
+    running_count = db_scheduler(running_query).count()
+    completed_count = db_scheduler(completed_query).count()
+    queued_count = db_scheduler(queued_query).count()
+    failed_count = db_scheduler(failed_query).count()
+
+    last_task_record = db_scheduler(last_run_query).select(orderby=~db_scheduler.scheduler_task.id).first()
+    # Pull the output
+    last_run_output = ""
+    last_run_traceback = ""
+    last_run_status = ""
+    if last_task_record:
+        last_run_status = last_task_record.status
+        for r in last_task_record.scheduler_run.select():
+            last_run_output = r['run_output']
+            last_run_traceback = r['traceback']
+    
+    if last_run_output is None:
+        last_run_output = ""
+    if last_run_traceback is None:
+        last_run_traceback = ""
+
+    last_run_output = "<b style='color: blue'>" + str(last_run_status) + "</b>\n" + last_run_output + "\n" + last_run_traceback
+    last_run_output = last_run_output.replace("\n", "<br>")
+
+    # Make a table for the results
+    t = TABLE(
+        TR(
+            TH("Job Stats"),
+            TH("")
+            
+        ),
+        TR(
+            TD("Queued:", _style="font-size: small;"),
+            TD(queued_count, _style="font-size: small;")
+        ),
+        TR(
+            TD("Running:", _style="font-size: small;"),
+            TD(running_count, _style="font-size: small;")
+        ),
+        TR(
+            TD("Completed:", _style="font-size: small;"),
+            TD(completed_count, _style="font-size: small;")
+        ),
+        TR(
+            TD("Failed:", _style="font-size: small;"),
+            TD(failed_count, _style="font-size: small;")
+        ),
+        TR(
+            TD(" "),
+            TD(" ")
+        ),
+        TR(
+            TH("Last Run Output:")
+        ),
+        TR(
+            TD(
+                XML(last_run_output),
+                _colspan=2,
+                _style="font-size: x-small"
+                )
+        ),
+        _style='width: 400px; margin-left: 25px; margin-right: 25px;',
+    )
+
+    return XML(t)
+
+@auth.requires(auth.has_membership('Faculty') or auth.has_membership('Administrators'))
+def tag_resources_by_class():
+    ret = {
+        'form': None
+    }
+
+    Canvas.Close()
+    Canvas.Init()
+    course_list = []
+    course_dict = dict()
+
+    if Canvas._canvas_integration_enabled is not True:
+        form = "<b style='color: red; font-size: 48px;'>Canvas Integration needs to be Enabled in the admin menu before this tool will work.</b>"
+        return dict(form=XML(form))
+
+    courses = Canvas.get_courses_for_faculty(auth.user.username)
+
+    sorted_course_dict = dict()
+    for c in courses:
+        sorted_course_dict[courses[c]] = str(c)
+        course_dict[str(c)] = courses[c]
+    # Sort the keys and add them to the select list
+    course_list.append(OPTION("ALL COURSES", _vaule="ALL COURSES"))
+    for k in sorted(sorted_course_dict.keys()):    
+        course_list.append(OPTION(str(k), _value=str(sorted_course_dict[k])))
+
+    course_select = SELECT(course_list, _name="current_course", _id="current_course", _style="width: 600px;")
+
+    form = FORM(TABLE(TR("Choose a course: ", course_select),
+                      TR("", INPUT(_type="submit", _value="Next"))), _name="fr_step1").process(formname="fr_step1",
+                                                                                               keepvalues=True)
+    ret['form'] = form
+
+    if form.accepted:
+        selected_course = form.vars.current_course
+        # print(selected_course)
+        # print(courses)
+        selected_courses = dict()
+        if selected_course == "ALL COURSES":
+            # print("ALL COURSES")
+            selected_courses = courses
+        else:
+            # Convert to ID
+            selected_course = int(selected_course)
+            if selected_course in courses:
+                # print("Adding Course: " + str(selected_course))
+                selected_courses[selected_course] = courses[selected_course]
+            else:
+                # print("Not in courses: " + str(selected_course))
+                pass
+
+        if len(selected_courses.keys()) < 1:
+            response.flash = "No courses selected!"
+        else:
+            job_count = 0
+            # Loop through the course list and create jobs for each one.
+            for c in selected_courses.keys():
+                class_id = c
+                class_name = selected_courses[c]
+                result = scheduler.queue_task(
+                    'canvas_tag_smc_resources',
+                    pvars=dict(class_id=c, class_name=class_name),
+                    timeout=3600,
+                    immediate=True,
+                    sync_output=1,
+                    group_name="misc"
+                )
+                # print(c)
+                job_count += 1
+            
+            response.flash = str(job_count) + " job(s) scheduled."
+
+    return ret
+
 def media_list():
     response.view = "default.json"
     ret = list()
@@ -421,11 +589,11 @@ def dl_document():
         document_file = db(db.document_files.document_guid == document_id).select().first()
         media_type = ""
         if document_file is not None:
-            title = document_file.title
-            description = document_file.description
+            title = str(document_file.title)
+            description = str(document_file.description)
             tags = ",".join(document_file.tags)
             views = document_file.views
-            original_file_name = document_file.original_file_name
+            original_file_name = str(document_file.original_file_name)
         pass
         p, media_type = os.path.splitext(original_file_name)
         mimetypes.init()
diff --git a/web2py/applications/smc/models/b_media_code.py b/web2py/applications/smc/models/b_media_code.py
index 3e0a1185..f5daa34b 100644
--- a/web2py/applications/smc/models/b_media_code.py
+++ b/web2py/applications/smc/models/b_media_code.py
@@ -15,6 +15,7 @@
 import requests
 from langcodes import *
 import webvtt
+import traceback
 
 from ednet.canvas import Canvas
 
@@ -236,6 +237,10 @@ def load_media_file_json(file_guid):
                 else:
                     meta[f] = ""
         
+        if type(meta['tags']) is list:
+            # Need to convert to string for later
+            meta['tags'] = dumps(meta['tags'])
+            
         # See if the item is in the database
         item = db.media_files(media_guid=meta['media_guid'])
         if item is None:
@@ -328,6 +333,10 @@ def load_document_file_json(file_guid):
                 else:
                     meta[f] = ""
         
+        if type(meta['tags']) is list:
+            # Need to convert to string for later
+            meta['tags'] = dumps(meta['tags'])
+
         # See if the item is in the database
         item = db.document_files(document_guid=meta['document_guid'])
         if item is None:
@@ -346,8 +355,11 @@ def load_document_file_json(file_guid):
             db.commit()
     except Exception as ex:
         print("Error processing document file: ", json_file, str(ex))
+        traceback.print_exc()
         # db.rollback()
 
+    # Make sure to release the lock on this db
+    db.commit()
     return True
 
 
@@ -483,3 +495,4 @@ def getPDFURLS(txt):
         if 'pdf' in r:
             ret.append(r)
     return ret
+
diff --git a/web2py/applications/smc/models/menu.py b/web2py/applications/smc/models/menu.py
index c063a7f1..0c118266 100644
--- a/web2py/applications/smc/models/menu.py
+++ b/web2py/applications/smc/models/menu.py
@@ -40,6 +40,7 @@
             (T(u'Media Utilities \u2bc8 Scan Media Files'), False, URL('media', 'scan_media_files')),
             (T(u'Media Utilities \u2bc8 Re-try failed YouTube Videos'), False, URL('media', 'yt_requeue')),
             (T(u'Media Utilities \u2bc8 Refresh Google Docs'), False, URL('media', 'refresh_google_docs')),
+            (T(u'Media Utilities \u2bc8 Tag Resouces By Class'), False, URL('media', 'tag_resources_by_class')),
             (T(u'Media Utilities \u2bc8 WAMAP Import'), False, URL('media', 'wamap_import')),
         #]),
 
diff --git a/web2py/applications/smc/models/x_scheduler.py b/web2py/applications/smc/models/x_scheduler.py
index 11f7b52b..1391fa06 100644
--- a/web2py/applications/smc/models/x_scheduler.py
+++ b/web2py/applications/smc/models/x_scheduler.py
@@ -1057,7 +1057,176 @@ def flush_redis_keys():
         print("Error flushing redis keys! \n" + str(ex))
     # Slight pause - let scheduler grab output
     time.sleep(5)
-    return true
+    return True
+
+def tag_media_in_class(media_id, class_name):
+    row = db(db.media_files.media_guid==media_id).select().first()
+    if row is None:
+        # print("Invalid Media ID: " + str(media_id))
+        return False
+    
+    # Add course name to tags
+    tags = row['tags']
+    if tags is None:
+        tags = list()
+    if class_name not in tags:
+        tags.append(class_name)
+        row.update_record(tags=tags)
+        db.commit()
+        save_media_file_json(media_id)
+    return True
+
+def tag_document_in_class(document_id, class_name):
+    row = db(db.document_files.document_guid==document_id).select().first()
+    if row is None:
+        # print("Invalid Document ID: " + str(document_id))
+        return False
+    
+    # Add course name to tags
+    tags = row['tags']
+    if tags is None:
+        tags = list()
+    if class_name not in tags:
+        tags.append(class_name)
+        row.update_record(tags=tags)
+        db.commit()
+        save_document_file_json(document_id)
+    return True
+
+def find_smc_media_in_text(class_id, class_name, search_text):
+    import re
+    links_found = 0
+    if search_text is None:
+        return 0
+
+    # Regular expression to find google docs
+    media_find_str = r'''(/static/media/[a-zA-Z0-9]{2}/|/media/player(\.load){0,1}/)([a-zA-Z0-9]+)(\?){0,1}'''
+    document_find_str = r'''(/media/dl_document/)([a-zA-Z0-9]+)(\?){0,1}'''
+
+    # Match examples
+    # <iframe width="650" height="405" src="https://smc.ed/media/player.load/24bf1a954e3640f1bdcda6804f7d99c4" frameborder="0" allowfullscreen></iframe>
+    # <iframe width="650" height="405" src="https://smc.ed/media/player.load/24bf1a954e3640f1bdcda6804f7d99c4?autoplay=true" frameborder="0" allowfullscreen></iframe>
+    # https://smc.ed/media/player.load/24bf1a954e3640f1bdcda6804f7d99c4
+    # https://smc.ed/media/player.load/24bf1a954e3640f1bdcda6804f7d99c4?autoplay=true
+    # https://smc.ed/media/player/24bf1a954e3640f1bdcda6804f7d99c4
+    # <iframe width="650" height="405" src="https://smc.ed/media/player/24bf1a954e3640f1bdcda6804f7d99c4" frameborder="0" allowfullscreen></iframe>
+    # https://smc.ed/smc/static/media/24/24bf1a954e3640f1bdcda6804f7d99c4.mp4
+
+    # <iframe src="https://smc.ed/smc/static/ViewerJS/index.html#/media/dl_document/3fa5529ded38433ebebe6e1cc41398e9" width="100%" height="720" allowfullscreen="allowfullscreen" webkitallowfullscreen="webkitallowfullscreen"></iframe>
+    # https://smc.ed/media/dl_document/3fa5529ded38433ebebe6e1cc41398e9
+
+    # Find media matches
+    matches = re.finditer(media_find_str, search_text)
+    for m in matches:
+        links_found += 1
+
+        # ID should be in group 3
+        media_id = m.group(3)
+        # print("Found Media ID: " + media_id)
+        # Tag media w course info
+        tag_media_in_class(media_id, class_name)
+
+
+    # Find document matches
+    matches = re.finditer(document_find_str, search_text)
+    for m in matches:
+        links_found += 1
+
+        # ID should be in group 2
+        document_id = m.group(2)
+        # print("Found Document ID: " + document_id)
+        # Tag it
+        tag_document_in_class(document_id, class_name)
+
+    return links_found
+
+def canvas_tag_smc_resources(class_id, class_name):
+    # print("canvas_tag_smc_resources " + str(class_id) + "/" + str(class_name))
+    print("Processing " + str(class_name) + "/" + str(class_id))
+
+    log_txt = ""
+
+    # === Pull all pages and extract links ===
+    items = Canvas.get_page_list_for_course(class_id)
+    total_pages = len(items)
+    total_pages_links = 0
+    for i in items:
+        orig_text = items[i]
+        
+        log_txt += "\n\nWorking on Page: " + str(i)
+        links_found = find_smc_media_in_text(class_id, class_name, orig_text)
+        total_pages_links += links_found
+        log_txt += "\n - " + str(links_found) + " links found in text"
+
+
+    # === Pull all quizzes and extract links ===
+    items = Canvas.get_quiz_list_for_course(class_id)
+    total_quizzes = len(items)
+    total_quizzes_links = 0
+    total_questions_links = 0
+    for i in items:
+        orig_text = items[i]
+        log_txt += "\n\nWorking on Quiz: " + str(i)
+
+        links_found = find_smc_media_in_text(class_id, class_name, orig_text)
+        total_quizzes_links += links_found
+        log_txt += "\n - " + str(links_found) + " links found in text"
+
+        quiz_id = i
+        # === Pull all questions and extract links ===
+        q_items = Canvas.get_quiz_questions_for_quiz(class_id, quiz_id)
+        total_questions = len(q_items)
+        for q in q_items:
+            q_orig_text = q_items[q]
+            log_txt += "\n\n&nbsp;&nbsp;&nbsp;&nbsp;Working on question: " + str(q)
+
+            links_found = find_smc_media_in_text(class_id, class_name, q_orig_text)
+            total_questions_links += links_found
+            log_txt += "\n - " + str(links_found) + " links found in text"
+
+
+    # === Pull all discussion topics and extract links ===
+    items = Canvas.get_discussion_list_for_course(class_id)
+    total_discussions = len(items)
+    total_discussions_links = 0
+    for i in items:
+        orig_text = items[i]
+        log_txt += "\n\nWorking on Discussion: " + str(i)
+
+        links_found = find_smc_media_in_text(class_id, class_name, orig_text)
+        total_discussions_links += links_found
+        log_txt += "\n - " + str(links_found) + " links found in text"
+        
+    
+    # === Pull all assignments and extract links ===
+    items = Canvas.get_assignment_list_for_course(class_id)
+    total_assignments = len(items)
+    total_assignments_links = 0
+    for i in items:
+        orig_text = items[i]
+        log_txt += "\n\nWorking on Assignment: " + str(i)
+
+        links_found = find_smc_media_in_text(class_id, class_name, orig_text)
+        total_assignments_links += links_found
+        log_txt += "\n - " + str(links_found) + " links found in text"
+   
+    total_all_links = total_pages_links + total_quizzes_links + total_questions_links + total_discussions_links + total_assignments_links
+    print(
+        "<b>SMC Links Found</b>\n" +
+        "-----------------------------\n" +
+        "Page Links                 {0}\n".format(total_pages_links) +
+        "Quizz Links                {0}\n".format(total_quizzes_links) +
+        " Quizz Question Links      {0}\n".format(total_questions_links) +
+        "Discussion Links           {0}\n".format(total_discussions_links) +
+        "Assignment Links           {0}\n".format(total_assignments_links) +
+        "-----------------------------\n" +
+        "Total Links                    {0}\n".format(total_all_links)
+    )
+
+    print(log_txt)
+    # Slight pause so that output gets sent out
+    time.sleep(2)
+    return True
 
 # Enable the scheduler
 from gluon.scheduler import Scheduler
@@ -1076,6 +1245,7 @@ def flush_redis_keys():
                                  update_document_database_from_json_files=update_document_database_from_json_files,
                                  flush_redis_keys=flush_redis_keys,
                                  pull_youtube_caption=pull_youtube_caption,
+                                 canvas_tag_smc_resources=canvas_tag_smc_resources,
                                  ))
 current.scheduler = scheduler