download files embedded in modules, add argparse for term and downloading user files
This commit is contained in:
parent
96c63e6c65
commit
796500e954
50
export.py
50
export.py
|
@ -1,3 +1,4 @@
|
||||||
|
import argparse
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from http.cookiejar import MozillaCookieJar
|
from http.cookiejar import MozillaCookieJar
|
||||||
|
@ -9,10 +10,10 @@ import requests
|
||||||
import yaml
|
import yaml
|
||||||
from canvasapi import Canvas
|
from canvasapi import Canvas
|
||||||
|
|
||||||
from module.const import COURSES_TO_SKIP, DL_LOCATION
|
from module.const import COURSES_TO_SKIP, OUTPUT_LOCATION
|
||||||
from module.download_canvas import download_assignment_pages, download_course_announcement_pages, download_course_discussion_pages, download_course_files, download_course_html, download_course_module_pages, download_submission_attachments, download_course_grades_page, download_course_home_page_html
|
from module.download_canvas import download_assignment_pages, download_course_announcement_pages, download_course_discussion_pages, download_course_files, download_course_module_pages, download_submission_attachments, download_course_grades_page, download_course_home_page_html, download_course_html
|
||||||
from module.get_canvas import find_course_announcements, find_course_assignments, find_course_discussions, find_course_modules, find_course_pages
|
from module.get_canvas import find_course_announcements, find_course_assignments, find_course_discussions, find_course_modules, find_course_pages
|
||||||
from module.items import CourseView
|
from module.items import CanvasCourse
|
||||||
from module.user_files import download_user_files
|
from module.user_files import download_user_files
|
||||||
|
|
||||||
SCRIPT_PATH = os.path.abspath(os.path.dirname(__file__))
|
SCRIPT_PATH = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
@ -20,7 +21,7 @@ SCRIPT_PATH = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
||||||
def export_all_course_data(c):
|
def export_all_course_data(c):
|
||||||
json_data = json.dumps(json.loads(jsonpickle.encode(c, unpicklable=False)), indent=4)
|
json_data = json.dumps(json.loads(jsonpickle.encode(c, unpicklable=False)), indent=4)
|
||||||
course_output_dir = os.path.join(DL_LOCATION, c.term, c.name)
|
course_output_dir = os.path.join(OUTPUT_LOCATION, c.term, c.name)
|
||||||
if not os.path.exists(course_output_dir):
|
if not os.path.exists(course_output_dir):
|
||||||
os.makedirs(course_output_dir)
|
os.makedirs(course_output_dir)
|
||||||
course_output_path = os.path.join(course_output_dir, c.name + ".json")
|
course_output_path = os.path.join(course_output_dir, c.name + ".json")
|
||||||
|
@ -29,6 +30,15 @@ def export_all_course_data(c):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description='')
|
||||||
|
parser.add_argument('--output', default='./output', help='Output location. If it does not exist, it will be created.')
|
||||||
|
parser.add_argument('--term', default=None, help='Only download this term.')
|
||||||
|
parser.add_argument('--user-files', action='store_true', help="Download the user files.")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
OUTPUT_LOCATION = Path(args.output).resolve().expanduser().absolute()
|
||||||
|
OUTPUT_LOCATION.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Startup checks.
|
# Startup checks.
|
||||||
creds_file = Path(SCRIPT_PATH, 'credentials.yaml')
|
creds_file = Path(SCRIPT_PATH, 'credentials.yaml')
|
||||||
if not creds_file.is_file():
|
if not creds_file.is_file():
|
||||||
|
@ -54,21 +64,16 @@ if __name__ == "__main__":
|
||||||
# Initialization
|
# Initialization
|
||||||
|
|
||||||
print("Welcome to the Canvas Student Data Export Tool")
|
print("Welcome to the Canvas Student Data Export Tool")
|
||||||
if not os.path.exists(DL_LOCATION):
|
if not os.path.exists(OUTPUT_LOCATION):
|
||||||
print("Creating output directory:", DL_LOCATION)
|
print("Creating output directory:", OUTPUT_LOCATION)
|
||||||
os.makedirs(DL_LOCATION)
|
os.makedirs(OUTPUT_LOCATION)
|
||||||
|
|
||||||
if COOKIES_PATH:
|
if COOKIES_PATH:
|
||||||
|
# Test the cookies.
|
||||||
print("Authenticating with Canvas frontend...")
|
print("Authenticating with Canvas frontend...")
|
||||||
|
|
||||||
# Test the cookies.
|
|
||||||
cookies = MozillaCookieJar(COOKIES_PATH)
|
|
||||||
cookies.load(ignore_discard=True, ignore_expires=True)
|
|
||||||
|
|
||||||
# Requests takes a dict, not the MozillaCookieJar object.
|
# Requests takes a dict, not the MozillaCookieJar object.
|
||||||
request_cookies = {}
|
request_cookies = {c.name: c.value for c in COOKIE_JAR}
|
||||||
for cookie in cookies:
|
|
||||||
request_cookies[cookie.name] = cookie.value
|
|
||||||
|
|
||||||
r = requests.get(f'{API_URL}/profile', headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}, cookies=request_cookies)
|
r = requests.get(f'{API_URL}/profile', headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}, cookies=request_cookies)
|
||||||
if r.status_code != 200:
|
if r.status_code != 200:
|
||||||
|
@ -79,6 +84,7 @@ if __name__ == "__main__":
|
||||||
print(r.url)
|
print(r.url)
|
||||||
quit(1)
|
quit(1)
|
||||||
if 'profileContent__Block' not in r.text:
|
if 'profileContent__Block' not in r.text:
|
||||||
|
# TODO: add an arg to skip this check.
|
||||||
print('Failed to test Canvas profile: could not find an element with the class "profileContent__Block". This could mean that your authentication is incorrect.')
|
print('Failed to test Canvas profile: could not find an element with the class "profileContent__Block". This could mean that your authentication is incorrect.')
|
||||||
quit(1)
|
quit(1)
|
||||||
|
|
||||||
|
@ -110,8 +116,10 @@ if __name__ == "__main__":
|
||||||
print("Downloading courses page...")
|
print("Downloading courses page...")
|
||||||
download_course_html(API_URL, COOKIES_PATH)
|
download_course_html(API_URL, COOKIES_PATH)
|
||||||
|
|
||||||
print('Downloading user files...')
|
if not args.user_files:
|
||||||
download_user_files(canvas, DL_LOCATION / 'User Files')
|
print('Downloading user files...')
|
||||||
|
download_user_files(canvas, OUTPUT_LOCATION / 'User Files')
|
||||||
|
|
||||||
print('')
|
print('')
|
||||||
|
|
||||||
all_courses_views = []
|
all_courses_views = []
|
||||||
|
@ -120,7 +128,12 @@ if __name__ == "__main__":
|
||||||
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
|
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
course_view = CourseView(course)
|
course_view = CanvasCourse(course)
|
||||||
|
|
||||||
|
if args.term and args.term != course_view.term:
|
||||||
|
print('Skipping term:', course_view.term, '\n')
|
||||||
|
continue
|
||||||
|
|
||||||
print(f"=== {course_view.term}: {course_view.name} ===")
|
print(f"=== {course_view.term}: {course_view.name} ===")
|
||||||
|
|
||||||
valid, r = course_view.test_course(API_URL, COOKIE_JAR)
|
valid, r = course_view.test_course(API_URL, COOKIE_JAR)
|
||||||
|
@ -136,6 +149,7 @@ if __name__ == "__main__":
|
||||||
course_view.discussions = find_course_discussions(course)
|
course_view.discussions = find_course_discussions(course)
|
||||||
course_view.pages = find_course_pages(course)
|
course_view.pages = find_course_pages(course)
|
||||||
course_view.modules = find_course_modules(course, course_view)
|
course_view.modules = find_course_modules(course, course_view)
|
||||||
|
|
||||||
all_courses_views.append(course_view)
|
all_courses_views.append(course_view)
|
||||||
|
|
||||||
print('Downloading course home page...')
|
print('Downloading course home page...')
|
||||||
|
@ -165,7 +179,7 @@ if __name__ == "__main__":
|
||||||
# Remove elements from the course objects that can't be JSON serialized, then format it.
|
# Remove elements from the course objects that can't be JSON serialized, then format it.
|
||||||
json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable=False)), indent=4)
|
json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable=False)), indent=4)
|
||||||
|
|
||||||
all_output_path = os.path.join(DL_LOCATION, "all_output.json")
|
all_output_path = os.path.join(OUTPUT_LOCATION, "all_output.json")
|
||||||
with open(all_output_path, "w") as out_file:
|
with open(all_output_path, "w") as out_file:
|
||||||
out_file.write(json_str)
|
out_file.write(json_str)
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Directory in which to download course information to (will be created if not present)
|
# Directory in which to download course information to (will be created if not present)
|
||||||
DL_LOCATION = Path("./output").resolve().expanduser().absolute()
|
OUTPUT_LOCATION = Path("./output").resolve().expanduser().absolute()
|
||||||
|
|
||||||
# List of Course IDs that should be skipped (need to be integers)
|
# List of Course IDs that should be skipped (need to be integers)
|
||||||
COURSES_TO_SKIP = [288290, 512033]
|
COURSES_TO_SKIP = [288290, 512033]
|
||||||
|
|
|
@ -8,7 +8,7 @@ import canvasapi
|
||||||
import requests
|
import requests
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from module.const import DL_LOCATION, MAX_FOLDER_NAME_SIZE
|
from module.const import OUTPUT_LOCATION, MAX_FOLDER_NAME_SIZE
|
||||||
from module.helpers import make_valid_filename, make_valid_folder_path, shorten_file_name
|
from module.helpers import make_valid_filename, make_valid_folder_path, shorten_file_name
|
||||||
from module.singlefile import download_page
|
from module.singlefile import download_page
|
||||||
from module.threading import download_assignment, download_module_item
|
from module.threading import download_assignment, download_module_item
|
||||||
|
@ -16,7 +16,7 @@ from module.threading import download_assignment, download_module_item
|
||||||
|
|
||||||
def download_course_files(course, course_view):
|
def download_course_files(course, course_view):
|
||||||
# file full_name starts with "course files"
|
# file full_name starts with "course files"
|
||||||
dl_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name)
|
dl_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name)
|
||||||
|
|
||||||
# Create directory if not present
|
# Create directory if not present
|
||||||
if not os.path.exists(dl_dir):
|
if not os.path.exists(dl_dir):
|
||||||
|
@ -51,7 +51,7 @@ def download_course_discussion_pages(api_url, course_view, cookies_path):
|
||||||
if cookies_path == "" or len(course_view.discussions) == 0:
|
if cookies_path == "" or len(course_view.discussions) == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
base_discussion_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "discussions")
|
base_discussion_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "discussions")
|
||||||
if not os.path.exists(base_discussion_dir):
|
if not os.path.exists(base_discussion_dir):
|
||||||
os.makedirs(base_discussion_dir)
|
os.makedirs(base_discussion_dir)
|
||||||
|
|
||||||
|
@ -86,7 +86,7 @@ def download_assignment_pages(api_url, course_view, cookies_path, cookie_jar: Mo
|
||||||
if cookies_path == "" or len(course_view.assignments) == 0:
|
if cookies_path == "" or len(course_view.assignments) == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
base_assign_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "assignments")
|
base_assign_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "assignments")
|
||||||
if not os.path.exists(base_assign_dir):
|
if not os.path.exists(base_assign_dir):
|
||||||
os.makedirs(base_assign_dir)
|
os.makedirs(base_assign_dir)
|
||||||
|
|
||||||
|
@ -114,7 +114,7 @@ def download_course_announcement_pages(api_url, course_view, cookies_path):
|
||||||
if cookies_path == "" or len(course_view.announcements) == 0:
|
if cookies_path == "" or len(course_view.announcements) == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
base_announce_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "announcements")
|
base_announce_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "announcements")
|
||||||
if not os.path.exists(base_announce_dir):
|
if not os.path.exists(base_announce_dir):
|
||||||
os.makedirs(base_announce_dir)
|
os.makedirs(base_announce_dir)
|
||||||
announcement_list_dir = os.path.join(base_announce_dir, "announcement_list.html")
|
announcement_list_dir = os.path.join(base_announce_dir, "announcement_list.html")
|
||||||
|
@ -143,7 +143,7 @@ def download_course_announcement_pages(api_url, course_view, cookies_path):
|
||||||
|
|
||||||
|
|
||||||
def download_submission_attachments(course, course_view):
|
def download_submission_attachments(course, course_view):
|
||||||
course_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name)
|
course_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name)
|
||||||
|
|
||||||
# Create directory if not present
|
# Create directory if not present
|
||||||
if not os.path.exists(course_dir):
|
if not os.path.exists(course_dir):
|
||||||
|
@ -173,7 +173,7 @@ def download_course_html(api_url, cookies_path):
|
||||||
if cookies_path == "":
|
if cookies_path == "":
|
||||||
return
|
return
|
||||||
|
|
||||||
course_dir = DL_LOCATION
|
course_dir = OUTPUT_LOCATION
|
||||||
|
|
||||||
if not os.path.exists(course_dir):
|
if not os.path.exists(course_dir):
|
||||||
os.makedirs(course_dir)
|
os.makedirs(course_dir)
|
||||||
|
@ -189,7 +189,7 @@ def download_course_home_page_html(api_url, course_view, cookies_path):
|
||||||
if cookies_path == "":
|
if cookies_path == "":
|
||||||
return
|
return
|
||||||
|
|
||||||
dl_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name)
|
dl_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name)
|
||||||
if not os.path.exists(dl_dir):
|
if not os.path.exists(dl_dir):
|
||||||
os.makedirs(dl_dir)
|
os.makedirs(dl_dir)
|
||||||
|
|
||||||
|
@ -204,18 +204,17 @@ def download_course_module_pages(api_url, course_view, cookies_path):
|
||||||
if cookies_path == "" or len(course_view.modules) == 0:
|
if cookies_path == "" or len(course_view.modules) == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
modules_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "modules")
|
modules_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "modules")
|
||||||
if not os.path.exists(modules_dir):
|
if not os.path.exists(modules_dir):
|
||||||
os.makedirs(modules_dir)
|
os.makedirs(modules_dir)
|
||||||
|
|
||||||
module_list_dir = os.path.join(modules_dir, "modules_list.html")
|
|
||||||
|
|
||||||
# Downloads the modules page (possible this is disabled by the teacher)
|
# Downloads the modules page (possible this is disabled by the teacher)
|
||||||
|
module_list_dir = Path(str(modules_dir), "modules_list.html")
|
||||||
if not os.path.exists(module_list_dir):
|
if not os.path.exists(module_list_dir):
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id) + "/modules/", cookies_path, modules_dir, "modules_list.html")
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/modules/", cookies_path, modules_dir, "modules_list.html")
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||||
for module in tqdm(list(course_view.modules), desc='Downloading Module Pages'):
|
for module in tqdm(list(course_view.modules), desc='Downloading Modules'):
|
||||||
bar = tqdm(list(module.items), leave=False, desc=module.name)
|
bar = tqdm(list(module.items), leave=False, desc=module.name)
|
||||||
futures = [executor.submit(download_module_item, module, item, modules_dir, cookies_path) for item in module.items]
|
futures = [executor.submit(download_module_item, module, item, modules_dir, cookies_path) for item in module.items]
|
||||||
for _ in as_completed(futures):
|
for _ in as_completed(futures):
|
||||||
|
@ -227,7 +226,7 @@ def download_course_grades_page(api_url, course_view, cookies_path):
|
||||||
if cookies_path == "":
|
if cookies_path == "":
|
||||||
return
|
return
|
||||||
|
|
||||||
dl_dir = Path(DL_LOCATION, course_view.term, course_view.name)
|
dl_dir = Path(OUTPUT_LOCATION, course_view.term, course_view.name)
|
||||||
dl_dir.mkdir(parents=True, exist_ok=True)
|
dl_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# TODO: command line arg to prohibit overwrite. Default should overwrite
|
# TODO: command line arg to prohibit overwrite. Default should overwrite
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
from http.cookiejar import MozillaCookieJar
|
from http.cookiejar import MozillaCookieJar
|
||||||
|
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
|
@ -6,13 +7,16 @@ import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from module.const import DATE_TEMPLATE, DL_LOCATION, MAX_FOLDER_NAME_SIZE
|
from module.const import DATE_TEMPLATE, OUTPUT_LOCATION, MAX_FOLDER_NAME_SIZE
|
||||||
from module.helpers import make_valid_filename, shorten_file_name
|
from module.helpers import make_valid_filename, shorten_file_name
|
||||||
from module.items import AssignmentView, AttachmentView, DiscussionView, ModuleItemView, ModuleView, PageView, SubmissionView, TopicEntryView, TopicReplyView
|
from module.items import AssignmentView, AttachmentView, DiscussionView, CanvasModuleItem, CanvasModule, PageView, SubmissionView, TopicEntryView, TopicReplyView
|
||||||
|
|
||||||
|
MODULE_ITEM_ATTACHED_FILE_RE = re.compile(r'<a .*? data-api-endpoint="(.*?)" .*?>')
|
||||||
|
CANVAS_API_FILE_ID_RE = re.compile(r'.*?/api/v1/courses/.*?/files/(.*?)$')
|
||||||
|
|
||||||
|
|
||||||
def find_course_modules(course, course_view):
|
def find_course_modules(course, course_view):
|
||||||
modules_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "modules")
|
modules_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "modules")
|
||||||
|
|
||||||
# Create modules directory if not present
|
# Create modules directory if not present
|
||||||
if not os.path.exists(modules_dir):
|
if not os.path.exists(modules_dir):
|
||||||
|
@ -23,24 +27,22 @@ def find_course_modules(course, course_view):
|
||||||
try:
|
try:
|
||||||
modules = list(course.get_modules())
|
modules = list(course.get_modules())
|
||||||
|
|
||||||
for module in tqdm(modules, desc='Downloading Module Files'):
|
for module in tqdm(modules, desc='Fetching Modules'):
|
||||||
module_view = ModuleView()
|
module_view = CanvasModule()
|
||||||
module_view.id = module.id if hasattr(module, "id") else ""
|
module_view.id = module.id if hasattr(module, "id") else ""
|
||||||
module_view.name = str(module.name) if hasattr(module, "name") else ""
|
module_view.name = str(module.name) if hasattr(module, "name") else ""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Get module items
|
# Get items for each module
|
||||||
module_items = module.get_module_items()
|
for item in module.get_module_items():
|
||||||
|
module_item = CanvasModuleItem()
|
||||||
|
module_item.id = item.id if hasattr(item, "id") else 0
|
||||||
|
module_item.title = str(item.title).replace(' ', ' ') if hasattr(item, "title") else ""
|
||||||
|
module_item.content_type = str(item.type) if hasattr(item, "type") else ""
|
||||||
|
module_item.url = str(item.html_url) if hasattr(item, "html_url") else ""
|
||||||
|
module_item.external_url = str(item.external_url) if hasattr(item, "external_url") else ""
|
||||||
|
|
||||||
for module_item in module_items:
|
if module_item.content_type == "File":
|
||||||
module_item_view = ModuleItemView()
|
|
||||||
module_item_view.id = module_item.id if hasattr(module_item, "id") else 0
|
|
||||||
module_item_view.title = str(module_item.title).replace(' ', ' ') if hasattr(module_item, "title") else ""
|
|
||||||
module_item_view.content_type = str(module_item.type) if hasattr(module_item, "type") else ""
|
|
||||||
module_item_view.url = str(module_item.html_url) if hasattr(module_item, "html_url") else ""
|
|
||||||
module_item_view.external_url = str(module_item.external_url) if hasattr(module_item, "external_url") else ""
|
|
||||||
|
|
||||||
if module_item_view.content_type == "File":
|
|
||||||
# If problems arise due to long pathnames, changing module.name to module.id might help
|
# If problems arise due to long pathnames, changing module.name to module.id might help
|
||||||
# A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path)
|
# A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path)
|
||||||
module_name = make_valid_filename(str(module.name))
|
module_name = make_valid_filename(str(module.name))
|
||||||
|
@ -53,7 +55,7 @@ def find_course_modules(course, course_view):
|
||||||
os.makedirs(module_dir)
|
os.makedirs(module_dir)
|
||||||
|
|
||||||
# Get the file object
|
# Get the file object
|
||||||
module_file = course.get_file(str(module_item.content_id))
|
module_file = course.get_file(str(item.content_id))
|
||||||
|
|
||||||
# Create path for module file download
|
# Create path for module file download
|
||||||
module_file_path = os.path.join(module_dir, make_valid_filename(str(module_file.display_name)))
|
module_file_path = os.path.join(module_dir, make_valid_filename(str(module_file.display_name)))
|
||||||
|
@ -62,9 +64,21 @@ def find_course_modules(course, course_view):
|
||||||
if not os.path.exists(module_file_path):
|
if not os.path.exists(module_file_path):
|
||||||
module_file.download(module_file_path)
|
module_file.download(module_file_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
tqdm.write(f"Skipping module file download that gave the following error: {e} - {module_item}")
|
tqdm.write(f"Skipping module file download that gave the following error: {e} - {item}")
|
||||||
|
|
||||||
module_view.items.append(module_item_view)
|
elif item.type == 'Page':
|
||||||
|
page = course.get_page(item.page_url)
|
||||||
|
if hasattr(page, 'body'):
|
||||||
|
# Extract the attached files from the item's HTML.
|
||||||
|
file_matches = re.findall(MODULE_ITEM_ATTACHED_FILE_RE, page.body)
|
||||||
|
for match in file_matches:
|
||||||
|
file_id = re.match(CANVAS_API_FILE_ID_RE, match)
|
||||||
|
if file_id:
|
||||||
|
# Grab the metadata from the API.
|
||||||
|
canvas_file = course.get_file(file_id.group(1))
|
||||||
|
module_item.attached_files.add(canvas_file)
|
||||||
|
|
||||||
|
module_view.items.append(module_item)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
tqdm.write(f"Skipping module file download that gave the following error: {e}")
|
tqdm.write(f"Skipping module file download that gave the following error: {e}")
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,22 @@
|
||||||
from http.cookiejar import MozillaCookieJar
|
from http.cookiejar import MozillaCookieJar
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from canvasapi.file import File
|
||||||
|
|
||||||
from module.helpers import make_valid_filename
|
from module.helpers import make_valid_filename
|
||||||
|
|
||||||
|
|
||||||
class ModuleItemView:
|
class CanvasModuleItem:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.id = 0
|
self.id = 0
|
||||||
self.title = ""
|
self.title = ""
|
||||||
self.content_type = ""
|
self.content_type = ""
|
||||||
self.url = ""
|
self.url = ""
|
||||||
self.external_url = ""
|
self.external_url = ""
|
||||||
|
self.attached_files: set[File] = set()
|
||||||
|
|
||||||
|
|
||||||
class ModuleView:
|
class CanvasModule:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.id = 0
|
self.id = 0
|
||||||
self.name = ""
|
self.name = ""
|
||||||
|
@ -94,7 +96,7 @@ class AssignmentView:
|
||||||
self.updated_url = ""
|
self.updated_url = ""
|
||||||
|
|
||||||
|
|
||||||
class CourseView:
|
class CanvasCourse:
|
||||||
def __init__(self, course):
|
def __init__(self, course):
|
||||||
self.course_id = course.id if hasattr(course, "id") else 0
|
self.course_id = course.id if hasattr(course, "id") else 0
|
||||||
self.term = make_valid_filename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "")
|
self.term = make_valid_filename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "")
|
||||||
|
|
|
@ -1,30 +1,36 @@
|
||||||
import os
|
import os
|
||||||
|
import traceback
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from module.singlefile import download_page
|
|
||||||
from module.const import MAX_FOLDER_NAME_SIZE
|
from module.const import MAX_FOLDER_NAME_SIZE
|
||||||
from module.download import download_file
|
from module.download import download_file
|
||||||
from module.get_canvas import get_extra_assignment_files
|
from module.get_canvas import get_extra_assignment_files
|
||||||
from module.helpers import make_valid_filename, shorten_file_name
|
from module.helpers import make_valid_filename, shorten_file_name
|
||||||
|
from module.items import CanvasModuleItem, CanvasModule
|
||||||
|
from module.singlefile import download_page
|
||||||
|
|
||||||
|
|
||||||
def download_module_item(module, item, modules_dir, cookies_path):
|
def download_module_item(module: CanvasModule, item: CanvasModuleItem, modules_dir, cookies_path):
|
||||||
# If problems arise due to long pathnames, changing module.name to module.id might help, this can also be done with item.title
|
try:
|
||||||
# A change would also have to be made in findCourseModules(course, course_view)
|
module_name = make_valid_filename(str(module.name))
|
||||||
module_name = make_valid_filename(str(module.name))
|
module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
|
||||||
module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
|
output_dir = Path(modules_dir, module_name)
|
||||||
items_dir = os.path.join(modules_dir, module_name)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if item.url != "":
|
if not item.url:
|
||||||
if not os.path.exists(items_dir):
|
return
|
||||||
os.makedirs(items_dir)
|
|
||||||
|
|
||||||
filename = make_valid_filename(str(item.title)) + ".html"
|
# Download attached files
|
||||||
module_item_dir = os.path.join(items_dir, filename)
|
for file in item.attached_files:
|
||||||
|
file.download(output_dir / file.filename)
|
||||||
|
|
||||||
# Download the module page.
|
# Download the module page.
|
||||||
if not os.path.exists(module_item_dir):
|
html_filename = make_valid_filename(str(item.title)) + ".html"
|
||||||
download_page(item.url, cookies_path, items_dir, filename)
|
if not (output_dir / html_filename).exists():
|
||||||
|
download_page(item.url, cookies_path, output_dir, html_filename)
|
||||||
|
except:
|
||||||
|
# TODO: wrap all threaded funcs in this try/catch
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
|
||||||
def download_assignment(cookies_path, cookie_jar, base_assign_dir, assignment):
|
def download_assignment(cookies_path, cookie_jar, base_assign_dir, assignment):
|
||||||
|
|
|
@ -12,8 +12,7 @@ def do_download(task):
|
||||||
task[0].download(task[1])
|
task[0].download(task[1])
|
||||||
|
|
||||||
|
|
||||||
def download_user_files(canvas: canvasapi.Canvas, base_path: str):
|
def download_user_files(canvas: canvasapi.Canvas, base_path: Path):
|
||||||
base_path = Path(base_path)
|
|
||||||
user = canvas.get_current_user()
|
user = canvas.get_current_user()
|
||||||
folders = []
|
folders = []
|
||||||
for folder in user.get_folders():
|
for folder in user.get_folders():
|
||||||
|
|
Loading…
Reference in New Issue