From 2040415d1a551ecee4e19227ecff63bcbbb12dfe Mon Sep 17 00:00:00 2001 From: 17acres Date: Wed, 5 May 2021 16:33:36 -0400 Subject: [PATCH 01/10] Fix valid filename crashes, organize files more 1. Fixes issues where the program crashes because things like assignment names aren't valid filenames. Makes the following improvements: 2. Stores assignment files in an "assignments" subfolder. 3. Doesn't make attachment directory if there are no attachments. 4. Organizes course files inside subfolders as created on Canvas (requiring the makeValidFolderPath function to sanitize the path but leave slashes) --- export.py | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/export.py b/export.py index d293194..22056be 100644 --- a/export.py +++ b/export.py @@ -148,6 +148,18 @@ def makeValidFilename(input_str): return input_str +def makeValidFolderPath(input_str): + # Remove invalid characters + valid_chars = "-_.()/ %s%s" % (string.ascii_letters, string.digits) + input_str = "".join(c for c in input_str if c in valid_chars) + + # Remove leading and trailing whitespace + input_str = input_str.lstrip().rstrip().strip("/").strip("\\") + + # Replace path separators with OS default + input_str=input_str.replace("/",os.sep) + + return input_str def findCourseModules(course, course_view): modules_dir = os.path.join(DL_LOCATION, course_view.term, @@ -220,8 +232,9 @@ def findCourseModules(course, course_view): def downloadCourseFiles(course, course_view): + # file full_name starts with "course files" dl_dir = os.path.join(DL_LOCATION, course_view.term, - course_view.course_code, "files") + course_view.course_code) # Create directory if not present if not os.path.exists(dl_dir): @@ -231,7 +244,14 @@ def downloadCourseFiles(course, course_view): files = course.get_files() for file in files: - dl_path = os.path.join(dl_dir, + file_folder=course.get_folder(file.folder_id) + + folder_dl_dir=os.path.join(dl_dir,makeValidFolderPath(file_folder.full_name)) + + if not os.path.exists(folder_dl_dir): + os.makedirs(folder_dl_dir) + + dl_path = os.path.join(folder_dl_dir, makeValidFilename(str(file.display_name))) # Download file if it doesn't already exist @@ -253,13 +273,13 @@ def download_submission_attachments(course, course_view): for assignment in course_view.assignments: for submission in assignment.submissions: - attachment_dir = os.path.join(course_dir, assignment.title, + attachment_dir = os.path.join(course_dir, "assignments", assignment.title, str(submission.user_id)) - if not os.path.exists(attachment_dir): + if (not os.path.exists(attachment_dir)) and (submission.attachments): os.makedirs(attachment_dir) for attachment in submission.attachments: - filepath = os.path.join(attachment_dir, str(attachment.id) + - "_" + attachment.filename) + filepath = os.path.join(attachment_dir, makeValidFilename(str(attachment.id) + + "_" + attachment.filename)) if not os.path.exists(filepath): print('Downloading attachment: {}'.format(filepath)) r = requests.get(attachment.url, allow_redirects=True) @@ -337,7 +357,7 @@ def findCourseAssignments(course): # Title if hasattr(assignment, "name"): - assignment_view.title = str(assignment.name) + assignment_view.title = makeValidFilename(str(assignment.name)) else: assignment_view.title = "" # Description @@ -534,10 +554,10 @@ def getCourseView(course): course_view = courseView() # Course term - course_view.term = course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "" + course_view.term = makeValidFilename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "") # Course code - course_view.course_code = course.course_code if hasattr(course, "course_code") else "" + course_view.course_code = makeValidFilename(course.course_code if hasattr(course, "course_code") else "") # Course name course_view.name = course.name if hasattr(course, "name") else "" From ad174fcbb5d2b17232429576ee7ae4696d16dda0 Mon Sep 17 00:00:00 2001 From: 17acres Date: Thu, 6 May 2021 02:19:01 -0400 Subject: [PATCH 02/10] Handle student-mode submissions and teacher-mode submissions the same Treat submissions from the list of submissions from every student the same as submissions from a single individual user (student) so they are downloaded the same and processed. Before, it seems like it just didn't care to download submission attachments for single-student-account-only individual submissions. Now, handle both types the same. This means that there are a lot of single-element lists of submissions in the final output if the user is a student, but it makes the code simpler. Could clean up those lists later. Also, display the different types of errors in fetching submissions. --- export.py | 46 +++++++++++++++++----------------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/export.py b/export.py index 22056be..713e473 100644 --- a/export.py +++ b/export.py @@ -6,6 +6,7 @@ import string # external from canvasapi import Canvas from canvasapi.exceptions import ResourceDoesNotExist +from canvasapi.exceptions import Unauthorized import dateutil.parser import jsonpickle import requests @@ -116,11 +117,9 @@ class assignmentView(): description = "" assigned_date = "" due_date = "" - submission = None submissions = [] def __init__(self): - self.submission = submissionView() self.submissions = [] @@ -376,12 +375,20 @@ def findCourseAssignments(course): else: assignment_view.due_date = "" - # Download all submissions try: - submissions = assignment.get_submissions() - # TODO : Figure out the exact error raised - except: - print("Got no submissions for this assignment") + try: # Download all submissions for entire class + submissions = assignment.get_submissions() + submissions[0] # Trigger Unauthorized if not allowed + except Unauthorized: + print("Not authorized to download entire class submissions for this assignment") + # Download submission for this user only + submissions = [assignment.get_submission(USER_ID)] + submissions[0] #throw error if no submissions found at all but without error + except (ResourceDoesNotExist, NameError, IndexError): + print('Got no submissions from either class or user: {}'.format(USER_ID)) + except Exception as e: + print("Failed to retrieve submissions for this assignment") + print(e.__class__.__name__) else: try: for submission in submissions: @@ -430,25 +437,6 @@ def findCourseAssignments(course): print("Skipping submission that gave the following error:") print(e) - # The following is only useful if you are a student in the class. - # Get my user"s submission object - try: - submission = assignment.get_submission(USER_ID) - except ResourceDoesNotExist: - print('No submission for user: {}'.format(USER_ID)) - else: - # Create a new submission view - assignment_view.submission = submissionView() - - # My grade - assignment_view.submission.grade = str(submission.grade) if hasattr(submission, "grade") else "" - # My raw score - assignment_view.submission.raw_score = str(submission.score) if hasattr(submission, "score") else "" - # Total possible score - assignment_view.submission.total_possible_points = str(assignment.points_possible) if hasattr(assignment, "points_possible") else "" - # Submission comments - assignment_view.submission.submission_comments = str(submission.submission_comments) if hasattr(submission, "submission_comments") else "" - assignment_views.append(assignment_view) except Exception as e: print("Skipping course assignments that gave the following error:") @@ -570,11 +558,11 @@ def getCourseView(course): # Course announcements print(" Getting announcements") - course_view.announcements = findCourseAnnouncements(course) + #course_view.announcements = findCourseAnnouncements(course) # Course discussions print(" Getting discussions") - course_view.discussions = findCourseDiscussions(course) + #course_view.discussions = findCourseDiscussions(course) # Course pages print(" Getting pages") @@ -649,7 +637,7 @@ if __name__ == "__main__": all_courses_views.append(course_view) print(" Downloading all files") - downloadCourseFiles(course, course_view) + #downloadCourseFiles(course, course_view) print(" Downloading submission attachments") download_submission_attachments(course, course_view) From bcf362c46aa6e7120ce59ea0e98b695e4bf897f8 Mon Sep 17 00:00:00 2001 From: 17acres Date: Thu, 6 May 2021 12:09:29 -0400 Subject: [PATCH 03/10] Fix accidental commenting-out of features --- export.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/export.py b/export.py index 713e473..8b742b1 100644 --- a/export.py +++ b/export.py @@ -558,11 +558,11 @@ def getCourseView(course): # Course announcements print(" Getting announcements") - #course_view.announcements = findCourseAnnouncements(course) + course_view.announcements = findCourseAnnouncements(course) # Course discussions print(" Getting discussions") - #course_view.discussions = findCourseDiscussions(course) + course_view.discussions = findCourseDiscussions(course) # Course pages print(" Getting pages") @@ -637,7 +637,7 @@ if __name__ == "__main__": all_courses_views.append(course_view) print(" Downloading all files") - #downloadCourseFiles(course, course_view) + downloadCourseFiles(course, course_view) print(" Downloading submission attachments") download_submission_attachments(course, course_view) From bb0633a8e747daeb52a0eda16c85deacd081bcef Mon Sep 17 00:00:00 2001 From: 17acres Date: Thu, 6 May 2021 12:46:17 -0400 Subject: [PATCH 04/10] Gitignore default output directory --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 600d2d3..e718a0f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -.vscode \ No newline at end of file +.vscode +output/ \ No newline at end of file From 66696c57333e33918bf92354ca429fb17d8aa683 Mon Sep 17 00:00:00 2001 From: 17acres Date: Thu, 6 May 2021 13:09:56 -0400 Subject: [PATCH 05/10] Replace important invalid characters in filename strings with useful separators --- export.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/export.py b/export.py index 8b742b1..e12b84d 100644 --- a/export.py +++ b/export.py @@ -140,6 +140,9 @@ class courseView(): def makeValidFilename(input_str): # Remove invalid characters valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) + input_str = input_str.replace("+"," ") # Canvas default for spaces + input_str = input_str.replace(":","-") + input_str = input_str.replace("/","-") input_str = "".join(c for c in input_str if c in valid_chars) # Remove leading and trailing whitespace @@ -150,9 +153,12 @@ def makeValidFilename(input_str): def makeValidFolderPath(input_str): # Remove invalid characters valid_chars = "-_.()/ %s%s" % (string.ascii_letters, string.digits) + input_str = input_str.replace("+"," ") # Canvas default for spaces + input_str = input_str.replace(":","-") + input_str = input_str.replace("/","-") input_str = "".join(c for c in input_str if c in valid_chars) - # Remove leading and trailing whitespace + # Remove leading and trailing whitespace, separators input_str = input_str.lstrip().rstrip().strip("/").strip("\\") # Replace path separators with OS default From 63151c3075ea43668331499ebb592b94b2df5d6a Mon Sep 17 00:00:00 2001 From: 17acres Date: Thu, 6 May 2021 13:18:02 -0400 Subject: [PATCH 06/10] Don't create user id subfolder for submissions if only 1 exists --- export.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/export.py b/export.py index e12b84d..29f9995 100644 --- a/export.py +++ b/export.py @@ -278,8 +278,9 @@ def download_submission_attachments(course, course_view): for assignment in course_view.assignments: for submission in assignment.submissions: - attachment_dir = os.path.join(course_dir, "assignments", assignment.title, - str(submission.user_id)) + attachment_dir = os.path.join(course_dir, "assignments", assignment.title) + if(len(assignment.submissions)!=1): + attachment_dir = os.path.join(attachment_dir,str(submission.user_id)) if (not os.path.exists(attachment_dir)) and (submission.attachments): os.makedirs(attachment_dir) for attachment in submission.attachments: From d9b6c66c33d09b061bf2290e0706c05e79b1f3e7 Mon Sep 17 00:00:00 2001 From: 17acres Date: Thu, 6 May 2021 13:18:55 -0400 Subject: [PATCH 07/10] Fix 66696c5 so folder paths work properly --- export.py | 1 - 1 file changed, 1 deletion(-) diff --git a/export.py b/export.py index 29f9995..7b5f74e 100644 --- a/export.py +++ b/export.py @@ -155,7 +155,6 @@ def makeValidFolderPath(input_str): valid_chars = "-_.()/ %s%s" % (string.ascii_letters, string.digits) input_str = input_str.replace("+"," ") # Canvas default for spaces input_str = input_str.replace(":","-") - input_str = input_str.replace("/","-") input_str = "".join(c for c in input_str if c in valid_chars) # Remove leading and trailing whitespace, separators From 114f8e95eaa3b7983dc22cef4ab10f497326e4aa Mon Sep 17 00:00:00 2001 From: 17acres Date: Sat, 3 Jul 2021 12:44:56 -0400 Subject: [PATCH 08/10] Add missing pyyaml dependency to lists --- docs/README.md | 2 ++ requirements.txt | 1 + 2 files changed, 3 insertions(+) diff --git a/docs/README.md b/docs/README.md index 1e06bf7..7377b67 100644 --- a/docs/README.md +++ b/docs/README.md @@ -35,12 +35,14 @@ Example: - all_output.json # Getting Started + ## Dependencies To run the program, you will need the following dependencies: `pip install requests` `pip install jsonpickle` `pip install canvasapi` `pip install python-dateutil` +`pip install PyYAML` You can install these dependencies using `pip install -r requirements.txt` diff --git a/requirements.txt b/requirements.txt index aa336e9..0a6a4fc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ requests jsonpickle canvasapi python-dateutil +PyYAML From 02887beffe8e46ef4c19c8d8e728857a813aec24 Mon Sep 17 00:00:00 2001 From: 17acres Date: Sat, 3 Jul 2021 12:46:24 -0400 Subject: [PATCH 09/10] Update README to match change of directory names --- docs/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/README.md b/docs/README.md index 7377b67..bafccfc 100644 --- a/docs/README.md +++ b/docs/README.md @@ -14,11 +14,11 @@ The tool will export your data in JSON format, and will organize it nicely into Example: - Fall 2013 - Econ 101 - - files + - course files - modules - Econ 101.json - English 101 - - files + - course files - modules - English 101.json - Fall 2014 From 16911fb8fb7aac224b53e0c236d20ebe5ef0248e Mon Sep 17 00:00:00 2001 From: 17acres Date: Sat, 3 Jul 2021 12:48:12 -0400 Subject: [PATCH 10/10] Fix formatting --- docs/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/README.md b/docs/README.md index bafccfc..fd27e78 100644 --- a/docs/README.md +++ b/docs/README.md @@ -40,8 +40,8 @@ Example: To run the program, you will need the following dependencies: `pip install requests` `pip install jsonpickle` -`pip install canvasapi` -`pip install python-dateutil` +`pip install canvasapi` +`pip install python-dateutil` `pip install PyYAML` You can install these dependencies using