From c9a3424330b8794c3d54b0dee2d4b2fe2aa1fe32 Mon Sep 17 00:00:00 2001 From: dj346 Date: Thu, 23 Dec 2021 22:33:02 -0800 Subject: [PATCH] Begun adding support for singlefile, added to classes and created new input. Also cleaned up code a bit. --- .gitignore | 2 + export.py | 222 ++++++++++++++++++++++++++++++++++++----------------- 2 files changed, 153 insertions(+), 71 deletions(-) diff --git a/.gitignore b/.gitignore index 6f331e9..a77f76c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ .vscode output/ node_modules/ + credentials.yaml +cookies.txt diff --git a/export.py b/export.py index a339540..8f3910e 100644 --- a/export.py +++ b/export.py @@ -5,8 +5,8 @@ import string # external from canvasapi import Canvas -from canvasapi.exceptions import ResourceDoesNotExist -from canvasapi.exceptions import Unauthorized +from canvasapi.exceptions import ResourceDoesNotExist, Unauthorized + import dateutil.parser import jsonpickle import requests @@ -22,10 +22,13 @@ except OSError: API_KEY = "" # My Canvas User ID USER_ID = 0000000 + # Browser Cookies File + COOKIES_PATH = "" else: API_URL = credentials["API_URL"] API_KEY = credentials["API_KEY"] USER_ID = credentials["USER_ID"] + COOKIES_PATH = credentials["COOKIES_PATH"] # Directory in which to download course information to (will be created if not # present) @@ -37,12 +40,18 @@ DATE_TEMPLATE = "%B %d, %Y %I:%M %p" class moduleItemView(): + id = 0 + title = "" content_type = "" + + url = "" external_url = "" class moduleView(): + id = 0 + name = "" items = [] @@ -51,6 +60,8 @@ class moduleView(): class pageView(): + id = 0 + title = "" body = "" created_date = "" @@ -58,12 +69,16 @@ class pageView(): class topicReplyView(): + id = 0 + author = "" posted_date = "" body = "" class topicEntryView(): + id = 0 + author = "" posted_date = "" body = "" @@ -74,70 +89,82 @@ class topicEntryView(): class discussionView(): + id = 0 + title = "" author = "" posted_date = "" body = "" topic_entries = [] + url = "" + amount_pages = 0 + def __init__(self): self.topic_entries = [] class submissionView(): + id = 0 + attachments = [] grade = "" raw_score = "" submission_comments = "" total_possible_points = "" + attempt = 0 user_id = "no-id" + preview_url = "" + ext_url = "" + def __init__(self): self.attachments = [] - self.grade = "" - self.raw_score = "" - self.submission_comments = "" - self.total_possible_points = "" - self.user_id = None # integer - class attachmentView(): - filename = "" id = 0 + + filename = "" url = "" - def __init__(self): - self.filename = "" - self.id = 0 - self.url = "" - - class assignmentView(): + id = 0 + title = "" description = "" assigned_date = "" due_date = "" submissions = [] + html_url = "" + ext_url = "" + updated_url = "" + def __init__(self): self.submissions = [] class courseView(): + course_id = 0 + term = "" course_code = "" name = "" assignments = [] announcements = [] discussions = [] + modules = [] def __init__(self): self.assignments = [] self.announcements = [] self.discussions = [] - + self.modules = [] def makeValidFilename(input_str): + if(not input_str): + return input_str + # Remove invalid characters valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) input_str = input_str.replace("+"," ") # Canvas default for spaces @@ -148,6 +175,9 @@ def makeValidFilename(input_str): # Remove leading and trailing whitespace input_str = input_str.lstrip().rstrip() + ##Splits strings to prevent extremely long names + #input_str=input_str[:40] + return input_str def makeValidFolderPath(input_str): @@ -162,7 +192,10 @@ def makeValidFolderPath(input_str): # Replace path separators with OS default input_str=input_str.replace("/",os.sep) - + + ##Splits strings to prevent extremely long names + #input_str=input_str[:40] + return input_str def findCourseModules(course, course_view): @@ -181,6 +214,9 @@ def findCourseModules(course, course_view): for module in modules: module_view = moduleView() + # ID + module_view.id = module.id if hasattr(module, "id") else "" + # Name module_view.name = str(module.name) if hasattr(module, "name") else "" @@ -191,17 +227,23 @@ def findCourseModules(course, course_view): for module_item in module_items: module_item_view = moduleItemView() + # ID + module_item_view.id = module_item.id if hasattr(module_item, "id") else 0 + # Title module_item_view.title = str(module_item.title) if hasattr(module_item, "title") else "" - # Type module_item_view.content_type = str(module_item.type) if hasattr(module_item, "type") else "" + # URL + module_item_view.url = str(module_item.html_url) if hasattr(module_item, "html_url") else "" # External URL module_item_view.external_url = str(module_item.external_url) if hasattr(module_item, "external_url") else "" if module_item_view.content_type == "File": - module_dir = modules_dir + "/" + makeValidFilename(str(module.name)) + # If problems arise due to long pathnames, changing module.name to module.id might help + # A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path) + module_dir = os.path.join(modules_dir, makeValidFolderPath(str(module.id)), "files") try: # Create directory for current module if not present @@ -212,7 +254,7 @@ def findCourseModules(course, course_view): module_file = course.get_file(str(module_item.content_id)) # Create path for module file download - module_file_path = module_dir + "/" + makeValidFilename(str(module_file.display_name)) + module_file_path = os.path.join(module_dir, makeValidFilename(str(module_file.display_name))) # Download file if it doesn't already exist if not os.path.exists(module_file_path): @@ -250,13 +292,12 @@ def downloadCourseFiles(course, course_view): for file in files: file_folder=course.get_folder(file.folder_id) - folder_dl_dir=os.path.join(dl_dir,makeValidFolderPath(file_folder.full_name)) + folder_dl_dir=os.path.join(dl_dir, makeValidFolderPath(file_folder.full_name)) if not os.path.exists(folder_dl_dir): os.makedirs(folder_dl_dir) - dl_path = os.path.join(folder_dl_dir, - makeValidFilename(str(file.display_name))) + dl_path = os.path.join(folder_dl_dir, makeValidFilename(str(file.display_name))) # Download file if it doesn't already exist if not os.path.exists(dl_path): @@ -324,22 +365,19 @@ def findCoursePages(course): page_view = pageView() + # ID + page_view.id = page.id if hasattr(page, "id") else 0 + # Title page_view.title = str(page.title) if hasattr(page, "title") else "" # Body page_view.body = str(page.body) if hasattr(page, "body") else "" # Date created - if hasattr(page, "created_at"): - page_view.created_date = dateutil.parser.parse( - page.created_at).strftime(DATE_TEMPLATE) - else: - page_view.created_date = "" + page_view.created_date = dateutil.parser.parse(page.created_at).strftime(DATE_TEMPLATE) if \ + hasattr(page, "created_at") else "" # Date last updated - if hasattr(page, "updated_at"): - page_view.last_updated_date = dateutil.parser.parse( - page.updated_at).strftime(DATE_TEMPLATE) - else: - page_view.last_updated_date = "" + page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime(DATE_TEMPLATE) if \ + hasattr(page, "updated_at") else "" page_views.append(page_view) except Exception as e: @@ -360,26 +398,33 @@ def findCourseAssignments(course): # Create a new assignment view assignment_view = assignmentView() + #ID + assignment_view.id = assignment.id if \ + hasattr(assignment, "id") else "" + # Title - if hasattr(assignment, "name"): - assignment_view.title = makeValidFilename(str(assignment.name)) - else: - assignment_view.title = "" + assignment_view.title = makeValidFilename(str(assignment.name)) if \ + hasattr(assignment, "name") else "" # Description - if hasattr(assignment, "description"): - assignment_view.description = str(assignment.description) - else: - assignment_view.description = "" + assignment_view.description = str(assignment.description) if \ + hasattr(assignment, "description") else "" + # Assigned date - if hasattr(assignment, "created_at_date"): - assignment_view.assigned_date = assignment.created_at_date.strftime(DATE_TEMPLATE) - else: - assignment_view.assigned_date = "" + assignment_view.assigned_date = assignment.created_at_date.strftime(DATE_TEMPLATE) if \ + hasattr(assignment, "created_at_date") else "" # Due date - if hasattr(assignment, "due_at_date"): - assignment_view.due_date = assignment.due_at_date.strftime(DATE_TEMPLATE) - else: - assignment_view.due_date = "" + assignment_view.due_date = assignment.due_at_date.strftime(DATE_TEMPLATE) if \ + hasattr(assignment, "due_at_date") else "" + + # HTML Url + assignment_view.html_url = assignment.html_url if \ + hasattr(assignment, "html_url") else "" + # External URL + assignment_view.ext_url = str(assignment.url) if \ + hasattr(assignment, "url") else "" + # Other URL (more up-to-date) + assignment_view.updated_url = str(assignment.submissions_download_url).split("submissions?")[0] if \ + hasattr(assignment, "submissions_download_url") else "" try: try: # Download all submissions for entire class @@ -401,31 +446,35 @@ def findCourseAssignments(course): sub_view = submissionView() + # Submission ID + sub_view.id = submission.id if \ + hasattr(submission, "id") else 0 + # My grade - if hasattr(submission, "grade"): - sub_view.grade = str(submission.grade) - else: - sub_view.grade = "" + sub_view.grade = str(submission.grade) if \ + hasattr(submission, "grade") else "" # My raw score - if hasattr(submission, "score"): - sub_view.raw_score = str(submission.score) - else: - sub_view.raw_score = "" + sub_view.raw_score = str(submission.score) if \ + hasattr(submission, "score") else "" # Total possible score - if hasattr(assignment, "points_possible"): - sub_view.total_possible_points = str(assignment.points_possible) - else: - sub_view.total_possible_points = "" + sub_view.total_possible_points = str(assignment.points_possible) if \ + hasattr(assignment, "points_possible") else "" # Submission comments - if hasattr(submission, "submission_comments"): - sub_view.submission_comments = str(submission.submission_comments) - else: - sub_view.submission_comments = "" - - if hasattr(submission, "user_id"): - sub_view.user_id = str(submission.user_id) - else: - sub_view.user_id = "no-id" + sub_view.submission_comments = str(submission.submission_comments) if \ + hasattr(submission, "submission_comments") else "" + # Attempt + sub_view.attempt = submission.attempt if \ + hasattr(submission, "attempt") else 0 + # User ID + sub_view.user_id = str(submission.user_id) if \ + hasattr(submission, "user_id") else "" + + # Submission URL + sub_view.preview_url = str(submission.preview_url) if \ + hasattr(submission, "preview_url") else "" + # External URL + sub_view.ext_url = str(submission.url) if \ + hasattr(submission, "url") else "" try: submission.attachments @@ -472,6 +521,9 @@ def getDiscussionView(discussion_topic): # Create discussion view discussion_view = discussionView() + #ID + discussion_view.id = discussion_topic.id if hasattr(discussion_topic, "id") else 0 + # Title discussion_view.title = str(discussion_topic.title) if hasattr(discussion_topic, "title") else "" # Author @@ -480,6 +532,13 @@ def getDiscussionView(discussion_topic): discussion_view.posted_date = discussion_topic.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(discussion_topic, "created_at_date") else "" # Body discussion_view.body = str(discussion_topic.message) if hasattr(discussion_topic, "message") else "" + + # URL + discussion_view.url = str(discussion_topic.html_url) if hasattr(discussion_topic, "html_url") else "" + + # Keeps track of how many topic_entries there are. + topic_entries_counter = 0 + # Topic entries if hasattr(discussion_topic, "discussion_subentry_count") and discussion_topic.discussion_subentry_count > 0: # Need to get replies to entries recursively? @@ -488,9 +547,13 @@ def getDiscussionView(discussion_topic): try: for topic_entry in discussion_topic_entries: + topic_entries_counter += 1 + # Create new discussion view for the topic_entry topic_entry_view = topicEntryView() + # ID + topic_entry_view.id = topic_entry.id if hasattr(topic_entry, "id") else 0 # Author topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else "" # Posted date @@ -505,6 +568,9 @@ def getDiscussionView(discussion_topic): for topic_reply in topic_entry_replies: # Create new topic reply view topic_reply_view = topicReplyView() + + # ID + topic_reply_view.id = topic_reply.id if hasattr(topic_reply, "id") else 0 # Author topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else "" @@ -522,7 +588,10 @@ def getDiscussionView(discussion_topic): except Exception as e: print("Tried to enumerate discussion topic entries but received the following error:") print(e) - + + # Amount of pages + discussion_view.amount_pages = int(topic_entries_counter/50) + 1 # Typically 50 topic entries are stored on a page before it creates another page. + return discussion_view @@ -547,6 +616,9 @@ def findCourseDiscussions(course): def getCourseView(course): course_view = courseView() + # Course ID + course_view.course_id = course.id if hasattr(course, "id") else 0 + # Course term course_view.term = makeValidFilename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "") @@ -616,6 +688,14 @@ if __name__ == "__main__": "logging in to canvas and then going to this URL in the same " "browser {yourCanvasBaseUrl}/api/v1/users/self") USER_ID = input("Enter your Canvas User ID: ") + + if COOKIES_PATH == "": + # Cookies path + print("\nWe will need your browsers cookies file. This needs to be " + "exported using another tool. This needs to be a path to a file " + "formatted in the NetScape format. This can be left blank if an html " + "images aren't wanted. ") + COOKIES_PATH = input("Enter your cookies path: ") print("\nConnecting to canvas\n")