From c9a3424330b8794c3d54b0dee2d4b2fe2aa1fe32 Mon Sep 17 00:00:00 2001
From: dj346 <djgaming346@gmail.com>
Date: Thu, 23 Dec 2021 22:33:02 -0800
Subject: [PATCH] Begun adding support for singlefile, added to classes and
 created new input. Also cleaned up code a bit.

---
 .gitignore |   2 +
 export.py  | 222 ++++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 153 insertions(+), 71 deletions(-)

diff --git a/.gitignore b/.gitignore
index 6f331e9..a77f76c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,6 @@
 .vscode
 output/
 node_modules/
+
 credentials.yaml
+cookies.txt
diff --git a/export.py b/export.py
index a339540..8f3910e 100644
--- a/export.py
+++ b/export.py
@@ -5,8 +5,8 @@ import string
 
 # external
 from canvasapi import Canvas
-from canvasapi.exceptions import ResourceDoesNotExist
-from canvasapi.exceptions import Unauthorized
+from canvasapi.exceptions import ResourceDoesNotExist, Unauthorized
+
 import dateutil.parser
 import jsonpickle
 import requests
@@ -22,10 +22,13 @@ except OSError:
     API_KEY = ""
     # My Canvas User ID
     USER_ID = 0000000
+    # Browser Cookies File
+    COOKIES_PATH = ""
 else:
     API_URL = credentials["API_URL"]
     API_KEY = credentials["API_KEY"]
     USER_ID = credentials["USER_ID"]
+    COOKIES_PATH = credentials["COOKIES_PATH"]
 
 # Directory in which to download course information to (will be created if not
 # present)
@@ -37,12 +40,18 @@ DATE_TEMPLATE = "%B %d, %Y %I:%M %p"
 
 
 class moduleItemView():
+    id = 0
+    
     title = ""
     content_type = ""
+    
+    url = ""
     external_url = ""
 
 
 class moduleView():
+    id = 0
+
     name = ""
     items = []
 
@@ -51,6 +60,8 @@ class moduleView():
 
 
 class pageView():
+    id = 0
+
     title = ""
     body = ""
     created_date = ""
@@ -58,12 +69,16 @@ class pageView():
 
 
 class topicReplyView():
+    id = 0
+
     author = ""
     posted_date = ""
     body = ""
 
 
 class topicEntryView():
+    id = 0
+
     author = ""
     posted_date = ""
     body = ""
@@ -74,70 +89,82 @@ class topicEntryView():
 
 
 class discussionView():
+    id = 0
+
     title = ""
     author = ""
     posted_date = ""
     body = ""
     topic_entries = []
 
+    url = ""
+    amount_pages = 0
+
     def __init__(self):
         self.topic_entries = []
 
 
 class submissionView():
+    id = 0
+
     attachments = []
     grade = ""
     raw_score = ""
     submission_comments = ""
     total_possible_points = ""
+    attempt = 0
     user_id = "no-id"
 
+    preview_url = ""
+    ext_url = ""
+
     def __init__(self):
         self.attachments = []
-        self.grade = ""
-        self.raw_score = ""
-        self.submission_comments = ""
-        self.total_possible_points = ""
-        self.user_id = None  # integer
-
 
 class attachmentView():
-    filename = ""
     id = 0
+
+    filename = ""
     url = ""
 
-    def __init__(self):
-        self.filename = ""
-        self.id = 0
-        self.url = ""
-
-
 class assignmentView():
+    id = 0
+
     title = ""
     description = ""
     assigned_date = ""
     due_date = ""
     submissions = []
 
+    html_url = ""
+    ext_url = ""
+    updated_url = ""
+    
     def __init__(self):
         self.submissions = []
 
 
 class courseView():
+    course_id = 0
+    
     term = ""
     course_code = ""
     name = ""
     assignments = []
     announcements = []
     discussions = []
+    modules = []
 
     def __init__(self):
         self.assignments = []
         self.announcements = []
         self.discussions = []
-
+        self.modules = []
 
 def makeValidFilename(input_str):
+    if(not input_str):
+        return input_str
+
     # Remove invalid characters
     valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
     input_str = input_str.replace("+"," ") # Canvas default for spaces
@@ -148,6 +175,9 @@ def makeValidFilename(input_str):
     # Remove leading and trailing whitespace
     input_str = input_str.lstrip().rstrip()
 
+    ##Splits strings to prevent extremely long names
+    #input_str=input_str[:40]
+
     return input_str
 
 def makeValidFolderPath(input_str):
@@ -162,7 +192,10 @@ def makeValidFolderPath(input_str):
 
     # Replace path separators with OS default
     input_str=input_str.replace("/",os.sep)
-    
+
+    ##Splits strings to prevent extremely long names
+    #input_str=input_str[:40]
+
     return input_str
 
 def findCourseModules(course, course_view):
@@ -181,6 +214,9 @@ def findCourseModules(course, course_view):
         for module in modules:
             module_view = moduleView()
 
+            # ID
+            module_view.id = module.id if hasattr(module, "id") else ""
+
             # Name
             module_view.name = str(module.name) if hasattr(module, "name") else ""
 
@@ -191,17 +227,23 @@ def findCourseModules(course, course_view):
                 for module_item in module_items:
                     module_item_view = moduleItemView()
 
+                    # ID
+                    module_item_view.id = module_item.id if hasattr(module_item, "id") else 0
+
                     # Title
                     module_item_view.title = str(module_item.title) if hasattr(module_item, "title") else ""
-
                     # Type
                     module_item_view.content_type = str(module_item.type) if hasattr(module_item, "type") else ""
 
+                    # URL
+                    module_item_view.url = str(module_item.html_url) if hasattr(module_item, "html_url") else ""
                     # External URL
                     module_item_view.external_url = str(module_item.external_url) if hasattr(module_item, "external_url") else ""
 
                     if module_item_view.content_type == "File":
-                        module_dir = modules_dir + "/" + makeValidFilename(str(module.name))
+                        # If problems arise due to long pathnames, changing module.name to module.id might help
+                        # A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path)
+                        module_dir = os.path.join(modules_dir, makeValidFolderPath(str(module.id)), "files") 
 
                         try:
                             # Create directory for current module if not present
@@ -212,7 +254,7 @@ def findCourseModules(course, course_view):
                             module_file = course.get_file(str(module_item.content_id))
 
                             # Create path for module file download
-                            module_file_path = module_dir + "/" + makeValidFilename(str(module_file.display_name))
+                            module_file_path = os.path.join(module_dir, makeValidFilename(str(module_file.display_name)))
 
                             # Download file if it doesn't already exist
                             if not os.path.exists(module_file_path):
@@ -250,13 +292,12 @@ def downloadCourseFiles(course, course_view):
         for file in files:
             file_folder=course.get_folder(file.folder_id)
             
-            folder_dl_dir=os.path.join(dl_dir,makeValidFolderPath(file_folder.full_name))
+            folder_dl_dir=os.path.join(dl_dir, makeValidFolderPath(file_folder.full_name))
             
             if not os.path.exists(folder_dl_dir):
                 os.makedirs(folder_dl_dir)
         
-            dl_path = os.path.join(folder_dl_dir,
-                                   makeValidFilename(str(file.display_name)))
+            dl_path = os.path.join(folder_dl_dir, makeValidFilename(str(file.display_name)))
 
             # Download file if it doesn't already exist
             if not os.path.exists(dl_path):
@@ -324,22 +365,19 @@ def findCoursePages(course):
 
             page_view = pageView()
 
+            # ID
+            page_view.id = page.id if hasattr(page, "id") else 0
+
             # Title
             page_view.title = str(page.title) if hasattr(page, "title") else ""
             # Body
             page_view.body = str(page.body) if hasattr(page, "body") else ""
             # Date created
-            if hasattr(page, "created_at"):
-                page_view.created_date = dateutil.parser.parse(
-                    page.created_at).strftime(DATE_TEMPLATE)
-            else:
-                page_view.created_date = ""
+            page_view.created_date = dateutil.parser.parse(page.created_at).strftime(DATE_TEMPLATE) if \
+                hasattr(page, "created_at") else ""
             # Date last updated
-            if hasattr(page, "updated_at"):
-                page_view.last_updated_date = dateutil.parser.parse(
-                    page.updated_at).strftime(DATE_TEMPLATE)
-            else:
-                page_view.last_updated_date = ""
+            page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime(DATE_TEMPLATE) if \
+                hasattr(page, "updated_at") else ""
 
             page_views.append(page_view)
     except Exception as e:
@@ -360,26 +398,33 @@ def findCourseAssignments(course):
             # Create a new assignment view
             assignment_view = assignmentView()
 
+            #ID
+            assignment_view.id = assignment.id if \
+                hasattr(assignment, "id") else ""
+
             # Title
-            if hasattr(assignment, "name"):
-                assignment_view.title = makeValidFilename(str(assignment.name))
-            else:
-                assignment_view.title = ""
+            assignment_view.title = makeValidFilename(str(assignment.name)) if \
+                hasattr(assignment, "name") else ""
             # Description
-            if hasattr(assignment, "description"):
-                assignment_view.description = str(assignment.description)
-            else:
-                assignment_view.description = ""
+            assignment_view.description = str(assignment.description) if \
+                hasattr(assignment, "description") else ""
+            
             # Assigned date
-            if hasattr(assignment, "created_at_date"):
-                assignment_view.assigned_date = assignment.created_at_date.strftime(DATE_TEMPLATE)
-            else:
-                assignment_view.assigned_date = ""
+            assignment_view.assigned_date = assignment.created_at_date.strftime(DATE_TEMPLATE) if \
+                hasattr(assignment, "created_at_date") else ""
             # Due date
-            if hasattr(assignment, "due_at_date"):
-                assignment_view.due_date = assignment.due_at_date.strftime(DATE_TEMPLATE)
-            else:
-                assignment_view.due_date = ""
+            assignment_view.due_date = assignment.due_at_date.strftime(DATE_TEMPLATE) if \
+                hasattr(assignment, "due_at_date") else ""    
+
+            # HTML Url
+            assignment_view.html_url = assignment.html_url if \
+                hasattr(assignment, "html_url") else ""   
+            # External URL
+            assignment_view.ext_url = str(assignment.url) if \
+                hasattr(assignment, "url") else ""
+            # Other URL (more up-to-date)
+            assignment_view.updated_url = str(assignment.submissions_download_url).split("submissions?")[0] if \
+                hasattr(assignment, "submissions_download_url") else ""
 
             try:
                 try: # Download all submissions for entire class
@@ -401,31 +446,35 @@ def findCourseAssignments(course):
 
                         sub_view = submissionView()
 
+                        # Submission ID
+                        sub_view.id = submission.id if \
+                            hasattr(submission, "id") else 0
+                            
                         # My grade
-                        if hasattr(submission, "grade"):
-                            sub_view.grade = str(submission.grade)
-                        else:
-                            sub_view.grade = ""
+                        sub_view.grade = str(submission.grade) if \
+                            hasattr(submission, "grade") else ""
                         # My raw score
-                        if hasattr(submission, "score"):
-                            sub_view.raw_score = str(submission.score)
-                        else:
-                            sub_view.raw_score = ""
+                        sub_view.raw_score = str(submission.score) if \
+                            hasattr(submission, "score") else ""
                         # Total possible score
-                        if hasattr(assignment, "points_possible"):
-                            sub_view.total_possible_points = str(assignment.points_possible)
-                        else:
-                            sub_view.total_possible_points = ""
+                        sub_view.total_possible_points = str(assignment.points_possible) if \
+                            hasattr(assignment, "points_possible") else ""
                         # Submission comments
-                        if hasattr(submission, "submission_comments"):
-                            sub_view.submission_comments = str(submission.submission_comments)
-                        else:
-                            sub_view.submission_comments = ""
-
-                        if hasattr(submission, "user_id"):
-                            sub_view.user_id = str(submission.user_id)
-                        else:
-                            sub_view.user_id = "no-id"
+                        sub_view.submission_comments = str(submission.submission_comments) if \
+                            hasattr(submission, "submission_comments") else ""
+                        # Attempt
+                        sub_view.attempt = submission.attempt if \
+                            hasattr(submission, "attempt") else 0
+                        # User ID
+                        sub_view.user_id = str(submission.user_id) if \
+                            hasattr(submission, "user_id") else ""
+                        
+                        # Submission URL
+                        sub_view.preview_url = str(submission.preview_url) if \
+                            hasattr(submission, "preview_url") else ""
+                        #   External URL
+                        sub_view.ext_url = str(submission.url) if \
+                            hasattr(submission, "url") else ""
 
                         try:
                             submission.attachments
@@ -472,6 +521,9 @@ def getDiscussionView(discussion_topic):
     # Create discussion view
     discussion_view = discussionView()
 
+    #ID
+    discussion_view.id = discussion_topic.id if hasattr(discussion_topic, "id") else 0
+
     # Title
     discussion_view.title = str(discussion_topic.title) if hasattr(discussion_topic, "title") else ""
     # Author
@@ -480,6 +532,13 @@ def getDiscussionView(discussion_topic):
     discussion_view.posted_date = discussion_topic.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(discussion_topic, "created_at_date") else ""
     # Body
     discussion_view.body = str(discussion_topic.message) if hasattr(discussion_topic, "message") else ""
+
+    # URL
+    discussion_view.url = str(discussion_topic.html_url) if hasattr(discussion_topic, "html_url") else ""
+    
+    # Keeps track of how many topic_entries there are.
+    topic_entries_counter = 0
+
     # Topic entries
     if hasattr(discussion_topic, "discussion_subentry_count") and discussion_topic.discussion_subentry_count > 0:
         # Need to get replies to entries recursively?
@@ -488,9 +547,13 @@ def getDiscussionView(discussion_topic):
 
         try:
             for topic_entry in discussion_topic_entries:
+                topic_entries_counter += 1
+                
                 # Create new discussion view for the topic_entry
                 topic_entry_view = topicEntryView()
 
+                # ID
+                topic_entry_view.id = topic_entry.id if hasattr(topic_entry, "id") else 0
                 # Author
                 topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else ""
                 # Posted date
@@ -505,6 +568,9 @@ def getDiscussionView(discussion_topic):
                     for topic_reply in topic_entry_replies:
                         # Create new topic reply view
                         topic_reply_view = topicReplyView()
+                        
+                        # ID
+                        topic_reply_view.id = topic_reply.id if hasattr(topic_reply, "id") else 0
 
                         # Author
                         topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else ""
@@ -522,7 +588,10 @@ def getDiscussionView(discussion_topic):
         except Exception as e:
             print("Tried to enumerate discussion topic entries but received the following error:")
             print(e)
-
+        
+    # Amount of pages  
+    discussion_view.amount_pages = int(topic_entries_counter/50) + 1 # Typically 50 topic entries are stored on a page before it creates another page.
+    
     return discussion_view
 
 
@@ -547,6 +616,9 @@ def findCourseDiscussions(course):
 def getCourseView(course):
     course_view = courseView()
 
+    # Course ID
+    course_view.course_id = course.id if hasattr(course, "id") else 0
+
     # Course term
     course_view.term = makeValidFilename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "")
 
@@ -616,6 +688,14 @@ if __name__ == "__main__":
               "logging in to canvas and then going to this URL in the same "
               "browser {yourCanvasBaseUrl}/api/v1/users/self")
         USER_ID = input("Enter your Canvas User ID: ")
+    
+    if COOKIES_PATH == "": 
+        # Cookies path
+        print("\nWe will need your browsers cookies file. This needs to be "
+              "exported using another tool. This needs to be a path to a file "
+              "formatted in the NetScape format. This can be left blank if an html "
+              "images aren't wanted. ")
+        COOKIES_PATH = input("Enter your cookies path: ")
 
     print("\nConnecting to canvas\n")