Added support to download canvas webpages #7

Merged
dj346 merged 16 commits from master into master 2022-01-22 10:21:05 -07:00
2 changed files with 17 additions and 9 deletions
Showing only changes of commit a7e05ec674 - Show all commits

3
.gitignore vendored
View File

@ -1,6 +1,7 @@
.vscode
output/
__pycache__/
node_modules/
output/
credentials.yaml
cookies.txt

View File

@ -177,6 +177,9 @@ def makeValidFilename(input_str):
# Remove leading and trailing whitespace
input_str = input_str.lstrip().rstrip()
# Remove trailing periods
input_str = input_str.rstrip(".")
##Splits strings to prevent extremely long names
#input_str=input_str[:40]
@ -192,6 +195,9 @@ def makeValidFolderPath(input_str):
# Remove leading and trailing whitespace, separators
input_str = input_str.lstrip().rstrip().strip("/").strip("\\")
# Remove trailing periods
input_str = input_str.rstrip(".")
# Replace path separators with OS default
input_str=input_str.replace("/",os.sep)
@ -246,7 +252,7 @@ def findCourseModules(course, course_view):
if module_item_view.content_type == "File":
# If problems arise due to long pathnames, changing module.name to module.id might help
# A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path)
module_dir = os.path.join(modules_dir, makeValidFolderPath(str(module.id)), "files")
module_dir = os.path.join(modules_dir, makeValidFilename(str(module.id)), "files")
try:
# Create directory for current module if not present
@ -295,7 +301,7 @@ def downloadCourseFiles(course, course_view):
for file in files:
file_folder=course.get_folder(file.folder_id)
folder_dl_dir=os.path.join(dl_dir, makeValidFolderPath(file_folder.full_name))
folder_dl_dir=os.path.join(dl_dir, makeValidFilename(file_folder.full_name))
if not os.path.exists(folder_dl_dir):
os.makedirs(folder_dl_dir)
@ -689,7 +695,7 @@ def downloadCourseHomePageHTML(api_url, course_view, cookies_path):
# file full_name starts with "course files"
dl_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code)
course_view.course_code)
# Create directory if not present
if not os.path.exists(dl_dir):
@ -719,7 +725,7 @@ def downloadAssignmentPages(api_url, course_view, cookies_path):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html")
for assignment in course_view.assignments:
assign_dir = os.path.join(base_assign_dir, makeValidFolderPath(assignment.title))
assign_dir = os.path.join(base_assign_dir, makeValidFilename(assignment.title))
# Download an html image of each assignment (includes assignment instructions and other stuff).
# Currently, this will only download the main assignment page and not external pages, this is
@ -768,7 +774,7 @@ def downloadAssignmentPages(api_url, course_view, cookies_path):
submission_page_attempt_dir = os.path.join(submission_dir, filename)
if not os.path.exists(submission_page_attempt_dir):
download_page(assignment.updated_url + "/history?version=" + str(i+1), cookies_path, filename)
download_page(assignment.updated_url + "/history?version=" + str(i+1), cookies_path, submission_dir, filename)
def downloadCourseModulePages(api_url, course_view, cookies_path):
if(cookies_path == "" or len(course_view.modules) == 0):
@ -791,7 +797,7 @@ def downloadCourseModulePages(api_url, course_view, cookies_path):
for item in module.items:
# If problems arise due to long pathnames, changing module.title to module.id might help, this can also be done with item.title
# A change would also have to be made in findCourseModules(course, course_view)
items_dir = os.path.join(modules_dir, makeValidFolderPath(str(module.id)))
items_dir = os.path.join(modules_dir, makeValidFilename(str(module.id)))
# Create modules directory if not present
if item.url != "":
@ -823,7 +829,7 @@ def downloadCourseAnnouncementPages(api_url, course_view, cookies_path):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/announcements/", cookies_path, base_announce_dir, "announcement_list.html")
for announcements in course_view.announcements:
announce_dir = os.path.join(base_announce_dir, makeValidFolderPath(announcements.title))
announce_dir = os.path.join(base_announce_dir, makeValidFilename(announcements.title))
if announcements.url == "":
continue
@ -857,7 +863,7 @@ def downloadCourseDicussionPages(api_url, course_view, cookies_path):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html")
for discussion in course_view.discussions:
dicussion_dir = os.path.join(base_discussion_dir, makeValidFolderPath(discussion.title))
dicussion_dir = os.path.join(base_discussion_dir, makeValidFilename(discussion.title))
if discussion.url == "":
continue
@ -921,6 +927,7 @@ if __name__ == "__main__":
skip = set(COURSES_TO_SKIP)
if (COOKIES_PATH):
print(" Downloading course list page")
downloadCourseHTML(API_URL, COOKIES_PATH)