Fixed bug with trailing periods, Fixed folder naming bug, Fixed bug where pages were being downloaded to cookies folder.

This commit is contained in:
dj346 2021-12-24 07:46:22 -08:00
parent cdb15e251a
commit a7e05ec674
2 changed files with 17 additions and 9 deletions

3
.gitignore vendored
View File

@ -1,6 +1,7 @@
.vscode .vscode
output/ __pycache__/
node_modules/ node_modules/
output/
credentials.yaml credentials.yaml
cookies.txt cookies.txt

View File

@ -177,6 +177,9 @@ def makeValidFilename(input_str):
# Remove leading and trailing whitespace # Remove leading and trailing whitespace
input_str = input_str.lstrip().rstrip() input_str = input_str.lstrip().rstrip()
# Remove trailing periods
input_str = input_str.rstrip(".")
##Splits strings to prevent extremely long names ##Splits strings to prevent extremely long names
#input_str=input_str[:40] #input_str=input_str[:40]
@ -192,6 +195,9 @@ def makeValidFolderPath(input_str):
# Remove leading and trailing whitespace, separators # Remove leading and trailing whitespace, separators
input_str = input_str.lstrip().rstrip().strip("/").strip("\\") input_str = input_str.lstrip().rstrip().strip("/").strip("\\")
# Remove trailing periods
input_str = input_str.rstrip(".")
# Replace path separators with OS default # Replace path separators with OS default
input_str=input_str.replace("/",os.sep) input_str=input_str.replace("/",os.sep)
@ -246,7 +252,7 @@ def findCourseModules(course, course_view):
if module_item_view.content_type == "File": if module_item_view.content_type == "File":
# If problems arise due to long pathnames, changing module.name to module.id might help # If problems arise due to long pathnames, changing module.name to module.id might help
# A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path) # A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path)
module_dir = os.path.join(modules_dir, makeValidFolderPath(str(module.id)), "files") module_dir = os.path.join(modules_dir, makeValidFilename(str(module.id)), "files")
try: try:
# Create directory for current module if not present # Create directory for current module if not present
@ -295,7 +301,7 @@ def downloadCourseFiles(course, course_view):
for file in files: for file in files:
file_folder=course.get_folder(file.folder_id) file_folder=course.get_folder(file.folder_id)
folder_dl_dir=os.path.join(dl_dir, makeValidFolderPath(file_folder.full_name)) folder_dl_dir=os.path.join(dl_dir, makeValidFilename(file_folder.full_name))
if not os.path.exists(folder_dl_dir): if not os.path.exists(folder_dl_dir):
os.makedirs(folder_dl_dir) os.makedirs(folder_dl_dir)
@ -689,7 +695,7 @@ def downloadCourseHomePageHTML(api_url, course_view, cookies_path):
# file full_name starts with "course files" # file full_name starts with "course files"
dl_dir = os.path.join(DL_LOCATION, course_view.term, dl_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code) course_view.course_code)
# Create directory if not present # Create directory if not present
if not os.path.exists(dl_dir): if not os.path.exists(dl_dir):
@ -719,7 +725,7 @@ def downloadAssignmentPages(api_url, course_view, cookies_path):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html") download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html")
for assignment in course_view.assignments: for assignment in course_view.assignments:
assign_dir = os.path.join(base_assign_dir, makeValidFolderPath(assignment.title)) assign_dir = os.path.join(base_assign_dir, makeValidFilename(assignment.title))
# Download an html image of each assignment (includes assignment instructions and other stuff). # Download an html image of each assignment (includes assignment instructions and other stuff).
# Currently, this will only download the main assignment page and not external pages, this is # Currently, this will only download the main assignment page and not external pages, this is
@ -768,7 +774,7 @@ def downloadAssignmentPages(api_url, course_view, cookies_path):
submission_page_attempt_dir = os.path.join(submission_dir, filename) submission_page_attempt_dir = os.path.join(submission_dir, filename)
if not os.path.exists(submission_page_attempt_dir): if not os.path.exists(submission_page_attempt_dir):
download_page(assignment.updated_url + "/history?version=" + str(i+1), cookies_path, filename) download_page(assignment.updated_url + "/history?version=" + str(i+1), cookies_path, submission_dir, filename)
def downloadCourseModulePages(api_url, course_view, cookies_path): def downloadCourseModulePages(api_url, course_view, cookies_path):
if(cookies_path == "" or len(course_view.modules) == 0): if(cookies_path == "" or len(course_view.modules) == 0):
@ -791,7 +797,7 @@ def downloadCourseModulePages(api_url, course_view, cookies_path):
for item in module.items: for item in module.items:
# If problems arise due to long pathnames, changing module.title to module.id might help, this can also be done with item.title # If problems arise due to long pathnames, changing module.title to module.id might help, this can also be done with item.title
# A change would also have to be made in findCourseModules(course, course_view) # A change would also have to be made in findCourseModules(course, course_view)
items_dir = os.path.join(modules_dir, makeValidFolderPath(str(module.id))) items_dir = os.path.join(modules_dir, makeValidFilename(str(module.id)))
# Create modules directory if not present # Create modules directory if not present
if item.url != "": if item.url != "":
@ -823,7 +829,7 @@ def downloadCourseAnnouncementPages(api_url, course_view, cookies_path):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/announcements/", cookies_path, base_announce_dir, "announcement_list.html") download_page(api_url + "/courses/" + str(course_view.course_id) + "/announcements/", cookies_path, base_announce_dir, "announcement_list.html")
for announcements in course_view.announcements: for announcements in course_view.announcements:
announce_dir = os.path.join(base_announce_dir, makeValidFolderPath(announcements.title)) announce_dir = os.path.join(base_announce_dir, makeValidFilename(announcements.title))
if announcements.url == "": if announcements.url == "":
continue continue
@ -857,7 +863,7 @@ def downloadCourseDicussionPages(api_url, course_view, cookies_path):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html") download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html")
for discussion in course_view.discussions: for discussion in course_view.discussions:
dicussion_dir = os.path.join(base_discussion_dir, makeValidFolderPath(discussion.title)) dicussion_dir = os.path.join(base_discussion_dir, makeValidFilename(discussion.title))
if discussion.url == "": if discussion.url == "":
continue continue
@ -921,6 +927,7 @@ if __name__ == "__main__":
skip = set(COURSES_TO_SKIP) skip = set(COURSES_TO_SKIP)
if (COOKIES_PATH): if (COOKIES_PATH):
print(" Downloading course list page") print(" Downloading course list page")
downloadCourseHTML(API_URL, COOKIES_PATH) downloadCourseHTML(API_URL, COOKIES_PATH)