Added checks to prevent redownloading of a page.
This commit is contained in:
parent
97b02f3f5b
commit
3dfe0b191e
56
export.py
56
export.py
|
@ -677,7 +677,10 @@ def downloadCourseHTML(api_url, cookies_path):
|
||||||
if not os.path.exists(course_dir):
|
if not os.path.exists(course_dir):
|
||||||
os.makedirs(course_dir)
|
os.makedirs(course_dir)
|
||||||
|
|
||||||
|
course_list_path = os.path.join(course_dir, "course_list.html")
|
||||||
|
|
||||||
# Downloads the course list.
|
# Downloads the course list.
|
||||||
|
if not os.path.exists(course_list_path):
|
||||||
download_page(api_url + "/courses/", cookies_path, course_dir, "course_list.html")
|
download_page(api_url + "/courses/", cookies_path, course_dir, "course_list.html")
|
||||||
|
|
||||||
def downloadCourseHomePageHTML(api_url, course_view, cookies_path):
|
def downloadCourseHomePageHTML(api_url, course_view, cookies_path):
|
||||||
|
@ -692,7 +695,10 @@ def downloadCourseHomePageHTML(api_url, course_view, cookies_path):
|
||||||
if not os.path.exists(dl_dir):
|
if not os.path.exists(dl_dir):
|
||||||
os.makedirs(dl_dir)
|
os.makedirs(dl_dir)
|
||||||
|
|
||||||
|
homepage_path = os.path.join(dl_dir, "homepage.html")
|
||||||
|
|
||||||
# Downloads the course home page.
|
# Downloads the course home page.
|
||||||
|
if not os.path.exists(homepage_path):
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id), cookies_path, dl_dir, "homepage.html")
|
download_page(api_url + "/courses/" + str(course_view.course_id), cookies_path, dl_dir, "homepage.html")
|
||||||
|
|
||||||
def downloadAssignmentPages(api_url, course_view, cookies_path):
|
def downloadAssignmentPages(api_url, course_view, cookies_path):
|
||||||
|
@ -706,7 +712,10 @@ def downloadAssignmentPages(api_url, course_view, cookies_path):
|
||||||
if not os.path.exists(base_assign_dir):
|
if not os.path.exists(base_assign_dir):
|
||||||
os.makedirs(base_assign_dir)
|
os.makedirs(base_assign_dir)
|
||||||
|
|
||||||
|
assignment_list_path = os.path.join(base_assign_dir, "assignment_list.html")
|
||||||
|
|
||||||
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
|
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
|
||||||
|
if not os.path.exists(assignment_list_path):
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html")
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html")
|
||||||
|
|
||||||
for assignment in course_view.assignments:
|
for assignment in course_view.assignments:
|
||||||
|
@ -720,7 +729,10 @@ def downloadAssignmentPages(api_url, course_view, cookies_path):
|
||||||
if not os.path.exists(assign_dir):
|
if not os.path.exists(assign_dir):
|
||||||
os.makedirs(assign_dir)
|
os.makedirs(assign_dir)
|
||||||
|
|
||||||
|
assignment_page_path = os.path.join(assign_dir, "assignment.html")
|
||||||
|
|
||||||
# Download assignment page, this usually has instructions and etc.
|
# Download assignment page, this usually has instructions and etc.
|
||||||
|
if not os.path.exists(assignment_page_path):
|
||||||
download_page(assignment.html_url, cookies_path, assign_dir, "assignment.html")
|
download_page(assignment.html_url, cookies_path, assign_dir, "assignment.html")
|
||||||
|
|
||||||
for submission in assignment.submissions:
|
for submission in assignment.submissions:
|
||||||
|
@ -734,7 +746,10 @@ def downloadAssignmentPages(api_url, course_view, cookies_path):
|
||||||
if not os.path.exists(submission_dir):
|
if not os.path.exists(submission_dir):
|
||||||
os.makedirs(submission_dir)
|
os.makedirs(submission_dir)
|
||||||
|
|
||||||
|
submission_page_dir = os.path.join(submission_dir, "submission.html")
|
||||||
|
|
||||||
# Download submission url, this is typically a more focused page
|
# Download submission url, this is typically a more focused page
|
||||||
|
if not os.path.exists(submission_page_dir):
|
||||||
download_page(submission.preview_url, cookies_path, submission_dir, "submission.html")
|
download_page(submission.preview_url, cookies_path, submission_dir, "submission.html")
|
||||||
|
|
||||||
# If theres more then 1 attempt, save each attempt in attempts folder
|
# If theres more then 1 attempt, save each attempt in attempts folder
|
||||||
|
@ -749,7 +764,11 @@ def downloadAssignmentPages(api_url, course_view, cookies_path):
|
||||||
# different ID's however, as I wasnt able to find out what the url
|
# different ID's however, as I wasnt able to find out what the url
|
||||||
# for the specific id's attempts would be.
|
# for the specific id's attempts would be.
|
||||||
for i in range(submission.attempt):
|
for i in range(submission.attempt):
|
||||||
download_page(assignment.updated_url + "/history?version=" + str(i+1), cookies_path, submission_dir, "attempt_" + str(i+1) + ".html")
|
filename = "attempt_" + str(i+1) + ".html"
|
||||||
|
submission_page_attempt_dir = os.path.join(submission_dir, filename)
|
||||||
|
|
||||||
|
if not os.path.exists(submission_page_attempt_dir):
|
||||||
|
download_page(assignment.updated_url + "/history?version=" + str(i+1), cookies_path, filename)
|
||||||
|
|
||||||
def downloadCourseModulePages(api_url, course_view, cookies_path):
|
def downloadCourseModulePages(api_url, course_view, cookies_path):
|
||||||
if(cookies_path == "" or len(course_view.modules) == 0):
|
if(cookies_path == "" or len(course_view.modules) == 0):
|
||||||
|
@ -762,7 +781,10 @@ def downloadCourseModulePages(api_url, course_view, cookies_path):
|
||||||
if not os.path.exists(modules_dir):
|
if not os.path.exists(modules_dir):
|
||||||
os.makedirs(modules_dir)
|
os.makedirs(modules_dir)
|
||||||
|
|
||||||
|
module_list_dir = os.path.join(modules_dir, "modules_list.html")
|
||||||
|
|
||||||
# Downloads the modules page (possible this is disabled by the teacher)
|
# Downloads the modules page (possible this is disabled by the teacher)
|
||||||
|
if not os.path.exists(module_list_dir):
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id) + "/modules/", COOKIES_PATH, modules_dir, "modules_list.html")
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/modules/", COOKIES_PATH, modules_dir, "modules_list.html")
|
||||||
|
|
||||||
for module in course_view.modules:
|
for module in course_view.modules:
|
||||||
|
@ -776,8 +798,12 @@ def downloadCourseModulePages(api_url, course_view, cookies_path):
|
||||||
if not os.path.exists(items_dir):
|
if not os.path.exists(items_dir):
|
||||||
os.makedirs(items_dir)
|
os.makedirs(items_dir)
|
||||||
|
|
||||||
|
filename = makeValidFilename(str(item.title)) + ".html"
|
||||||
|
module_item_dir = os.path.join(items_dir, filename)
|
||||||
|
|
||||||
# Download the module page.
|
# Download the module page.
|
||||||
download_page(item.url, cookies_path, items_dir, makeValidFilename(str(item.title)) + ".html")
|
if not os.path.exists(module_item_dir):
|
||||||
|
download_page(item.url, cookies_path, items_dir, filename)
|
||||||
|
|
||||||
def downloadCourseAnnouncementPages(api_url, course_view, cookies_path):
|
def downloadCourseAnnouncementPages(api_url, course_view, cookies_path):
|
||||||
if(cookies_path == "" or len(course_view.announcements) == 0):
|
if(cookies_path == "" or len(course_view.announcements) == 0):
|
||||||
|
@ -790,7 +816,10 @@ def downloadCourseAnnouncementPages(api_url, course_view, cookies_path):
|
||||||
if not os.path.exists(base_announce_dir):
|
if not os.path.exists(base_announce_dir):
|
||||||
os.makedirs(base_announce_dir)
|
os.makedirs(base_announce_dir)
|
||||||
|
|
||||||
|
announcement_list_dir = os.path.join(base_announce_dir, "announcement_list.html")
|
||||||
|
|
||||||
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
|
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
|
||||||
|
if not os.path.exists(announcement_list_dir):
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id) + "/announcements/", cookies_path, base_announce_dir, "announcement_list.html")
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/announcements/", cookies_path, base_announce_dir, "announcement_list.html")
|
||||||
|
|
||||||
for announcements in course_view.announcements:
|
for announcements in course_view.announcements:
|
||||||
|
@ -803,8 +832,12 @@ def downloadCourseAnnouncementPages(api_url, course_view, cookies_path):
|
||||||
os.makedirs(announce_dir)
|
os.makedirs(announce_dir)
|
||||||
|
|
||||||
for i in range(announcements.amount_pages):
|
for i in range(announcements.amount_pages):
|
||||||
|
filename = "announcement_" + str(i+1) + ".html"
|
||||||
|
announcement_page_dir = os.path.join(announce_dir, filename)
|
||||||
|
|
||||||
# Download assignment page, this usually has instructions and etc.
|
# Download assignment page, this usually has instructions and etc.
|
||||||
download_page(announcements.url + "/page-" + str(i+1), cookies_path, announce_dir, "announcement_" + str(i+1) + ".html")
|
if not os.path.exists(announcement_page_dir):
|
||||||
|
download_page(announcements.url + "/page-" + str(i+1), cookies_path, announce_dir, filename)
|
||||||
|
|
||||||
def downloadCourseDicussionPages(api_url, course_view, cookies_path):
|
def downloadCourseDicussionPages(api_url, course_view, cookies_path):
|
||||||
if(cookies_path == "" or len(course_view.discussions) == 0):
|
if(cookies_path == "" or len(course_view.discussions) == 0):
|
||||||
|
@ -817,7 +850,10 @@ def downloadCourseDicussionPages(api_url, course_view, cookies_path):
|
||||||
if not os.path.exists(base_discussion_dir):
|
if not os.path.exists(base_discussion_dir):
|
||||||
os.makedirs(base_discussion_dir)
|
os.makedirs(base_discussion_dir)
|
||||||
|
|
||||||
|
dicussion_list_dir = os.path.join(base_discussion_dir, "discussion_list.html")
|
||||||
|
|
||||||
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
|
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
|
||||||
|
if not os.path.exists(dicussion_list_dir):
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html")
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html")
|
||||||
|
|
||||||
for discussion in course_view.discussions:
|
for discussion in course_view.discussions:
|
||||||
|
@ -830,8 +866,12 @@ def downloadCourseDicussionPages(api_url, course_view, cookies_path):
|
||||||
os.makedirs(dicussion_dir)
|
os.makedirs(dicussion_dir)
|
||||||
|
|
||||||
for i in range(discussion.amount_pages):
|
for i in range(discussion.amount_pages):
|
||||||
|
filename = "dicussion_" + str(i+1) + ".html"
|
||||||
|
dicussion_page_dir = os.path.join(dicussion_dir, filename)
|
||||||
|
|
||||||
# Download assignment page, this usually has instructions and etc.
|
# Download assignment page, this usually has instructions and etc.
|
||||||
download_page(discussion.url + "/page-" + str(i+1), cookies_path, dicussion_dir, "dicussion_" + str(i+1) + ".html")
|
if not os.path.exists(dicussion_page_dir):
|
||||||
|
download_page(discussion.url + "/page-" + str(i+1), cookies_path, dicussion_dir, filename)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
@ -881,6 +921,10 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
skip = set(COURSES_TO_SKIP)
|
skip = set(COURSES_TO_SKIP)
|
||||||
|
|
||||||
|
if (COOKIES_PATH):
|
||||||
|
print(" Downloading course list page")
|
||||||
|
downloadCourseHTML(API_URL, COOKIES_PATH)
|
||||||
|
|
||||||
for course in courses:
|
for course in courses:
|
||||||
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
|
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
|
||||||
continue
|
continue
|
||||||
|
@ -899,9 +943,6 @@ if __name__ == "__main__":
|
||||||
course_view.modules = findCourseModules(course, course_view)
|
course_view.modules = findCourseModules(course, course_view)
|
||||||
|
|
||||||
if(COOKIES_PATH):
|
if(COOKIES_PATH):
|
||||||
print(" Downloading course list page")
|
|
||||||
downloadCourseHTML(API_URL, COOKIES_PATH)
|
|
||||||
|
|
||||||
print(" Downloading course home page")
|
print(" Downloading course home page")
|
||||||
downloadCourseHomePageHTML(API_URL, course_view, COOKIES_PATH)
|
downloadCourseHomePageHTML(API_URL, course_view, COOKIES_PATH)
|
||||||
|
|
||||||
|
@ -917,7 +958,6 @@ if __name__ == "__main__":
|
||||||
print(" Downloading course dicussion pages")
|
print(" Downloading course dicussion pages")
|
||||||
downloadCourseDicussionPages(API_URL, course_view, COOKIES_PATH)
|
downloadCourseDicussionPages(API_URL, course_view, COOKIES_PATH)
|
||||||
|
|
||||||
|
|
||||||
print(" Exporting all course data")
|
print(" Exporting all course data")
|
||||||
exportAllCourseData(course_view)
|
exportAllCourseData(course_view)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue