Add a max folder name limit to prevent too long path names (#13)

* implement a max folder name size limit

* fix dicussion spelling
This commit is contained in:
Nafee Jan 2023-05-10 17:36:11 -04:00 committed by GitHub
parent a03d88b742
commit 0f34d8148f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 46 additions and 24 deletions

View File

@ -40,6 +40,10 @@ COURSES_TO_SKIP = [288290, 512033]
DATE_TEMPLATE = "%B %d, %Y %I:%M %p" DATE_TEMPLATE = "%B %d, %Y %I:%M %p"
# Max PATH length is 260 characters on Windows. 70 is just an estimate for a reasonable max folder name to prevent the chance of reaching the limit
# Applies to modules, assignments, announcements, and discussions
# If a folder exceeds this limit, a "-" will be added to the end to indicate it was shortened ("..." not valid)
MAX_FOLDER_NAME_SIZE = 70
class moduleItemView(): class moduleItemView():
id = 0 id = 0
@ -180,9 +184,6 @@ def makeValidFilename(input_str):
# Remove trailing periods # Remove trailing periods
input_str = input_str.rstrip(".") input_str = input_str.rstrip(".")
##Splits strings to prevent extremely long names
#input_str=input_str[:40]
return input_str return input_str
def makeValidFolderPath(input_str): def makeValidFolderPath(input_str):
@ -201,11 +202,20 @@ def makeValidFolderPath(input_str):
# Replace path separators with OS default # Replace path separators with OS default
input_str=input_str.replace("/",os.sep) input_str=input_str.replace("/",os.sep)
##Splits strings to prevent extremely long names
#input_str=input_str[:40]
return input_str return input_str
def shortenFileName(string, shorten_by) -> str:
if (not string or shorten_by <= 0):
return string
# Shorten string by specified value + 1 for "-" to indicate incomplete file name (trailing periods not allowed)
string = string[:len(string)-(shorten_by + 1)]
string = string.rstrip().rstrip(".").rstrip("-")
string += "-"
return string
def findCourseModules(course, course_view): def findCourseModules(course, course_view):
modules_dir = os.path.join(DL_LOCATION, course_view.term, modules_dir = os.path.join(DL_LOCATION, course_view.term,
@ -252,7 +262,9 @@ def findCourseModules(course, course_view):
if module_item_view.content_type == "File": if module_item_view.content_type == "File":
# If problems arise due to long pathnames, changing module.name to module.id might help # If problems arise due to long pathnames, changing module.name to module.id might help
# A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path) # A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path)
module_dir = os.path.join(modules_dir, makeValidFilename(str(module.name)), "files") module_name = makeValidFilename(str(module.name))
module_name = shortenFileName(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
module_dir = os.path.join(modules_dir, module_name, "files")
try: try:
# Create directory for current module if not present # Create directory for current module if not present
@ -327,7 +339,9 @@ def download_submission_attachments(course, course_view):
for assignment in course_view.assignments: for assignment in course_view.assignments:
for submission in assignment.submissions: for submission in assignment.submissions:
attachment_dir = os.path.join(course_dir, "assignments", assignment.title) assignment_title = makeValidFilename(str(assignment.title))
assignment_title = shortenFileName(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
attachment_dir = os.path.join(course_dir, "assignments", assignment_title)
if(len(assignment.submissions)!=1): if(len(assignment.submissions)!=1):
attachment_dir = os.path.join(attachment_dir,str(submission.user_id)) attachment_dir = os.path.join(attachment_dir,str(submission.user_id))
if (not os.path.exists(attachment_dir)) and (submission.attachments): if (not os.path.exists(attachment_dir)) and (submission.attachments):
@ -723,8 +737,10 @@ def downloadAssignmentPages(api_url, course_view, cookies_path):
if not os.path.exists(assignment_list_path): if not os.path.exists(assignment_list_path):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html") download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html")
for assignment in course_view.assignments: for assignment in course_view.assignments:
assign_dir = os.path.join(base_assign_dir, makeValidFilename(assignment.title)) assignment_title = makeValidFilename(str(assignment.title))
assignment_title = shortenFileName(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
assign_dir = os.path.join(base_assign_dir, assignment_title)
# Download an html image of each assignment (includes assignment instructions and other stuff). # Download an html image of each assignment (includes assignment instructions and other stuff).
# Currently, this will only download the main assignment page and not external pages, this is # Currently, this will only download the main assignment page and not external pages, this is
@ -796,7 +812,9 @@ def downloadCourseModulePages(api_url, course_view, cookies_path):
for item in module.items: for item in module.items:
# If problems arise due to long pathnames, changing module.name to module.id might help, this can also be done with item.title # If problems arise due to long pathnames, changing module.name to module.id might help, this can also be done with item.title
# A change would also have to be made in findCourseModules(course, course_view) # A change would also have to be made in findCourseModules(course, course_view)
items_dir = os.path.join(modules_dir, makeValidFilename(str(module.name))) module_name = makeValidFilename(str(module.name))
module_name = shortenFileName(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
items_dir = os.path.join(modules_dir, module_name)
# Create modules directory if not present # Create modules directory if not present
if item.url != "": if item.url != "":
@ -828,7 +846,9 @@ def downloadCourseAnnouncementPages(api_url, course_view, cookies_path):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/announcements/", cookies_path, base_announce_dir, "announcement_list.html") download_page(api_url + "/courses/" + str(course_view.course_id) + "/announcements/", cookies_path, base_announce_dir, "announcement_list.html")
for announcements in course_view.announcements: for announcements in course_view.announcements:
announce_dir = os.path.join(base_announce_dir, makeValidFilename(announcements.title)) announcements_title = makeValidFilename(str(announcements.title))
announcements_title = shortenFileName(announcements_title, len(announcements_title) - MAX_FOLDER_NAME_SIZE)
announce_dir = os.path.join(base_announce_dir, announcements_title)
if announcements.url == "": if announcements.url == "":
continue continue
@ -845,7 +865,7 @@ def downloadCourseAnnouncementPages(api_url, course_view, cookies_path):
if not os.path.exists(announcement_page_dir): if not os.path.exists(announcement_page_dir):
download_page(announcements.url + "/page-" + str(i+1), cookies_path, announce_dir, filename) download_page(announcements.url + "/page-" + str(i+1), cookies_path, announce_dir, filename)
def downloadCourseDicussionPages(api_url, course_view, cookies_path): def downloadCourseDiscussionPages(api_url, course_view, cookies_path):
if(cookies_path == "" or len(course_view.discussions) == 0): if(cookies_path == "" or len(course_view.discussions) == 0):
return return
@ -856,29 +876,31 @@ def downloadCourseDicussionPages(api_url, course_view, cookies_path):
if not os.path.exists(base_discussion_dir): if not os.path.exists(base_discussion_dir):
os.makedirs(base_discussion_dir) os.makedirs(base_discussion_dir)
dicussion_list_dir = os.path.join(base_discussion_dir, "discussion_list.html") discussion_list_dir = os.path.join(base_discussion_dir, "discussion_list.html")
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled) # Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
if not os.path.exists(dicussion_list_dir): if not os.path.exists(discussion_list_dir):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html") download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html")
for discussion in course_view.discussions: for discussion in course_view.discussions:
dicussion_dir = os.path.join(base_discussion_dir, makeValidFilename(discussion.title)) discussion_title = makeValidFilename(str(discussion.title))
discussion_title = shortenFileName(discussion_title, len(discussion_title) - MAX_FOLDER_NAME_SIZE)
discussion_dir = os.path.join(base_discussion_dir, discussion_title)
if discussion.url == "": if discussion.url == "":
continue continue
if not os.path.exists(dicussion_dir): if not os.path.exists(discussion_dir):
os.makedirs(dicussion_dir) os.makedirs(discussion_dir)
# Downloads each page that a discussion takes. # Downloads each page that a discussion takes.
for i in range(discussion.amount_pages): for i in range(discussion.amount_pages):
filename = "dicussion_" + str(i+1) + ".html" filename = "discussion_" + str(i+1) + ".html"
dicussion_page_dir = os.path.join(dicussion_dir, filename) discussion_page_dir = os.path.join(discussion_dir, filename)
# Download assignment page, this usually has instructions and etc. # Download assignment page, this usually has instructions and etc.
if not os.path.exists(dicussion_page_dir): if not os.path.exists(discussion_page_dir):
download_page(discussion.url + "/page-" + str(i+1), cookies_path, dicussion_dir, filename) download_page(discussion.url + "/page-" + str(i+1), cookies_path, discussion_dir, filename)
if __name__ == "__main__": if __name__ == "__main__":
@ -963,8 +985,8 @@ if __name__ == "__main__":
print(" Downloading course announcements pages") print(" Downloading course announcements pages")
downloadCourseAnnouncementPages(API_URL, course_view, COOKIES_PATH) downloadCourseAnnouncementPages(API_URL, course_view, COOKIES_PATH)
print(" Downloading course dicussion pages") print(" Downloading course discussion pages")
downloadCourseDicussionPages(API_URL, course_view, COOKIES_PATH) downloadCourseDiscussionPages(API_URL, course_view, COOKIES_PATH)
print(" Exporting all course data") print(" Exporting all course data")
exportAllCourseData(course_view) exportAllCourseData(course_view)