refactor structure

This commit is contained in:
Cyberes 2024-01-26 08:55:17 -07:00
parent 796500e954
commit 55c2532075
9 changed files with 315 additions and 485 deletions

View File

@ -5,22 +5,22 @@ from http.cookiejar import MozillaCookieJar
from pathlib import Path from pathlib import Path
import canvasapi import canvasapi
import jsonpickle
import requests import requests
import yaml import yaml
from canvasapi import Canvas from canvasapi import Canvas
from module.const import COURSES_TO_SKIP, OUTPUT_LOCATION from module.const import global_consts
from module.download_canvas import download_assignment_pages, download_course_announcement_pages, download_course_discussion_pages, download_course_files, download_course_module_pages, download_submission_attachments, download_course_grades_page, download_course_home_page_html, download_course_html from module.download_canvas import download_assignments, download_course_modules, download_course_grades_page, download_course_announcement_pages, download_course_home_page_html, download_course_discussion_pages
from module.get_canvas import find_course_announcements, find_course_assignments, find_course_discussions, find_course_modules, find_course_pages from module.get_canvas import find_course_pages, find_course_modules, find_course_assignments, find_course_announcements, find_course_discussions
from module.items import CanvasCourse from module.items import CanvasCourse, jsonify_anything
from module.singlefile import download_page
from module.user_files import download_user_files from module.user_files import download_user_files
SCRIPT_PATH = os.path.abspath(os.path.dirname(__file__)) SCRIPT_PATH = os.path.abspath(os.path.dirname(__file__))
def export_all_course_data(c): def export_all_course_data(c):
json_data = json.dumps(json.loads(jsonpickle.encode(c, unpicklable=False)), indent=4) json_data = jsonify_anything(c)
course_output_dir = os.path.join(OUTPUT_LOCATION, c.term, c.name) course_output_dir = os.path.join(OUTPUT_LOCATION, c.term, c.name)
if not os.path.exists(course_output_dir): if not os.path.exists(course_output_dir):
os.makedirs(course_output_dir) os.makedirs(course_output_dir)
@ -48,17 +48,17 @@ if __name__ == "__main__":
with open("credentials.yaml", 'r') as f: with open("credentials.yaml", 'r') as f:
credentials = yaml.full_load(f) credentials = yaml.full_load(f)
API_URL = credentials["API_URL"] global_consts.API_URL = credentials["API_URL"]
API_KEY = credentials["API_KEY"] global_consts.API_KEY = credentials["API_KEY"]
USER_ID = credentials["USER_ID"] global_consts.USER_ID = credentials["USER_ID"]
COOKIES_PATH = str(Path(credentials["COOKIES_PATH"]).resolve().expanduser().absolute()) global_consts.COOKIES_PATH = str(Path(credentials["COOKIES_PATH"]).resolve().expanduser().absolute())
if not Path(COOKIES_PATH).is_file(): if not Path(global_consts.COOKIES_PATH).is_file():
print('The cookies file does not exist:', COOKIES_PATH) print('The cookies file does not exist:', global_consts.COOKIES_PATH)
quit(1) quit(1)
COOKIE_JAR = MozillaCookieJar(COOKIES_PATH) global_consts.COOKIE_JAR = MozillaCookieJar(global_consts.COOKIES_PATH)
COOKIE_JAR.load(ignore_discard=True, ignore_expires=True) global_consts.COOKIE_JAR.load(ignore_discard=True, ignore_expires=True)
# ================================================================================================================== # ==================================================================================================================
# Initialization # Initialization
@ -68,18 +68,18 @@ if __name__ == "__main__":
print("Creating output directory:", OUTPUT_LOCATION) print("Creating output directory:", OUTPUT_LOCATION)
os.makedirs(OUTPUT_LOCATION) os.makedirs(OUTPUT_LOCATION)
if COOKIES_PATH: if global_consts.COOKIES_PATH:
# Test the cookies. # Test the cookies.
print("Authenticating with Canvas frontend...") print("Authenticating with Canvas frontend...")
# Requests takes a dict, not the MozillaCookieJar object. # Requests takes a dict, not the MozillaCookieJar object.
request_cookies = {c.name: c.value for c in COOKIE_JAR} request_cookies = {c.name: c.value for c in global_consts.COOKIE_JAR}
r = requests.get(f'{API_URL}/profile', headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}, cookies=request_cookies) r = requests.get(f'{global_consts.API_URL}/profile', headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}, cookies=request_cookies)
if r.status_code != 200: if r.status_code != 200:
print('Failed to fetch Canvas profile: got status code', r.status_code) print('Failed to fetch Canvas profile: got status code', r.status_code)
quit(1) quit(1)
if not r.url.startswith(API_URL): if not r.url.startswith(global_consts.API_URL):
print('Failed to fetch Canvas profile: client was redirected away from Canvas:') print('Failed to fetch Canvas profile: client was redirected away from Canvas:')
print(r.url) print(r.url)
quit(1) quit(1)
@ -93,7 +93,7 @@ if __name__ == "__main__":
print('No cookies file specified! No HTML pages will be saved.') print('No cookies file specified! No HTML pages will be saved.')
print("Authenticating with Canvas API...") print("Authenticating with Canvas API...")
canvas = Canvas(API_URL, API_KEY) canvas = Canvas(global_consts.API_URL, global_consts.API_KEY)
courses = canvas.get_courses(include="term") courses = canvas.get_courses(include="term")
try: try:
course_count = len(list(courses)) course_count = len(list(courses))
@ -108,15 +108,17 @@ if __name__ == "__main__":
print('') print('')
skip = set(COURSES_TO_SKIP) skip = set(global_consts.COURSES_TO_SKIP)
# ================================================================================================================== # ==================================================================================================================
# Exporting # Exporting
print("Downloading courses page...") print("Downloading courses page...")
download_course_html(API_URL, COOKIES_PATH) courses_dict = {v['id']: v for v in json.loads(jsonify_anything(courses))['_elements']}
(global_consts.OUTPUT_LOCATION / 'courses.json').write_text(json.dumps(courses_dict))
download_page(global_consts.API_URL + "/courses/", global_consts.OUTPUT_LOCATION, "courses.html")
if not args.user_files: if args.user_files:
print('Downloading user files...') print('Downloading user files...')
download_user_files(canvas, OUTPUT_LOCATION / 'User Files') download_user_files(canvas, OUTPUT_LOCATION / 'User Files')
@ -128,56 +130,55 @@ if __name__ == "__main__":
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"): if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
continue continue
course_view = CanvasCourse(course) resolved_canvas_course = CanvasCourse(course)
if args.term and args.term != course_view.term: if args.term and args.term != resolved_canvas_course.term:
print('Skipping term:', course_view.term, '\n') print('Skipping term:', resolved_canvas_course.term, '\n')
continue continue
print(f"=== {course_view.term}: {course_view.name} ===") print(f"=== {resolved_canvas_course.term}: {resolved_canvas_course.name} ===")
valid, r = course_view.test_course(API_URL, COOKIE_JAR) valid, r = resolved_canvas_course.test_course(global_consts.API_URL, global_consts.COOKIE_JAR)
if not valid: if not valid:
print(f'Invalid course: {course_view.course_id} - {r} - {r.text}') print(f'Invalid course: {resolved_canvas_course.course_id} - {r} - {r.text}')
if r.status_code == 401: if r.status_code == 401:
# We can't recover from this error. # We can't recover from this error.
quit(1) quit(1)
continue continue
course_view.assignments = find_course_assignments(course, USER_ID) resolved_canvas_course.modules = find_course_modules(course)
course_view.announcements = find_course_announcements(course) resolved_canvas_course.assignments = find_course_assignments(course)
course_view.discussions = find_course_discussions(course) resolved_canvas_course.announcements = find_course_announcements(course)
course_view.pages = find_course_pages(course) resolved_canvas_course.discussions = find_course_discussions(course)
course_view.modules = find_course_modules(course, course_view) resolved_canvas_course.pages = find_course_pages(course)
all_courses_views.append(course_view) all_courses_views.append(resolved_canvas_course)
print('Downloading course home page...') print('Downloading course home page...')
download_course_home_page_html(API_URL, course_view, COOKIES_PATH) download_course_home_page_html(resolved_canvas_course)
print('Downloading grades...') print('Downloading grades...')
download_course_grades_page(API_URL, course_view, COOKIES_PATH) download_course_grades_page(resolved_canvas_course)
download_assignment_pages(API_URL, course_view, COOKIES_PATH, COOKIE_JAR) download_assignments(resolved_canvas_course)
download_course_module_pages(API_URL, course_view, COOKIES_PATH) download_course_modules(resolved_canvas_course)
download_course_announcement_pages(API_URL, course_view, COOKIES_PATH) download_course_announcement_pages(resolved_canvas_course)
download_course_discussion_pages(API_URL, course_view, COOKIES_PATH) download_course_discussion_pages(resolved_canvas_course)
download_course_files(course, course_view) # TODO: nothing to test this on
# download_course_files(course)
download_submission_attachments(course, course_view)
print("Exporting course metadata...") print("Exporting course metadata...")
export_all_course_data(course_view) export_all_course_data(resolved_canvas_course)
if course_count > 1: if course_count > 1:
print('') print('')
# Remove elements from the course objects that can't be JSON serialized, then format it. # Remove elements from the course objects that can't be JSON serialized, then format it.
json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable=False)), indent=4) json_str = jsonify_anything(all_courses_views)
all_output_path = os.path.join(OUTPUT_LOCATION, "all_output.json") all_output_path = os.path.join(OUTPUT_LOCATION, "all_output.json")
with open(all_output_path, "w") as out_file: with open(all_output_path, "w") as out_file:

0
module/api/__init__.py Normal file
View File

21
module/api/file.py Normal file
View File

@ -0,0 +1,21 @@
import re
import canvasapi
from canvasapi.course import Course
HTML_ITEM_ATTACHED_FILE_RE = re.compile(r'<a .*? data-api-endpoint=\"(.*?)\" .*?>')
CANVAS_API_FILE_ID_RE = re.compile(r'.*?/api/v1/courses/.*?/files/(.*?)$')
def get_embedded_files(course: Course, html: str):
attached_files = set()
file_matches = re.findall(HTML_ITEM_ATTACHED_FILE_RE, html)
for match in file_matches:
file_id = re.match(CANVAS_API_FILE_ID_RE, match)
if file_id:
try:
canvas_file = course.get_file(file_id.group(1))
attached_files.add(canvas_file)
except canvasapi.exceptions.ResourceDoesNotExist:
continue
return attached_files

View File

@ -1,14 +1,28 @@
from http.cookiejar import MozillaCookieJar
from pathlib import Path from pathlib import Path
# Directory in which to download course information to (will be created if not present)
OUTPUT_LOCATION = Path("./output").resolve().expanduser().absolute()
# List of Course IDs that should be skipped (need to be integers) class GlobalConsts:
COURSES_TO_SKIP = [288290, 512033] # Directory in which to download course information to (will be created if not present)
OUTPUT_LOCATION = Path("./output").resolve().expanduser().absolute()
DATE_TEMPLATE = "%B %d, %Y %I:%M %p" # List of Course IDs that should be skipped (need to be integers)
COURSES_TO_SKIP = []
# Max PATH length is 260 characters on Windows. 70 is just an estimate for a reasonable max folder name to prevent the chance of reaching the limit DATE_TEMPLATE = "%B %d, %Y %I:%M %p"
# Applies to modules, assignments, announcements, and discussions
# If a folder exceeds this limit, a "-" will be added to the end to indicate it was shortened ("..." not valid) # Max PATH length is 260 characters on Windows. 70 is just an estimate for a reasonable max folder name to prevent the chance of reaching the limit
MAX_FOLDER_NAME_SIZE = 70 # Applies to modules, assignments, announcements, and discussions
# If a folder exceeds this limit, a "-" will be added to the end to indicate it was shortened ("..." not valid)
MAX_FOLDER_NAME_SIZE = 70
COOKIES_PATH = ""
COOKIE_JAR = MozillaCookieJar()
API_URL = ""
API_KEY = ""
USER_ID = ""
global_consts = GlobalConsts()

View File

@ -1,26 +1,20 @@
import os
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import partial from functools import partial
from http.cookiejar import MozillaCookieJar
from pathlib import Path
import canvasapi import canvasapi
import requests
from tqdm import tqdm from tqdm import tqdm
from module.const import OUTPUT_LOCATION, MAX_FOLDER_NAME_SIZE from module.api.file import get_embedded_files
from module.const import global_consts
from module.helpers import make_valid_filename, make_valid_folder_path, shorten_file_name from module.helpers import make_valid_filename, make_valid_folder_path, shorten_file_name
from module.items import CanvasCourse, jsonify_anything
from module.singlefile import download_page from module.singlefile import download_page
from module.threading import download_assignment, download_module_item from module.threading import download_assignment, download_module_item
def download_course_files(course, course_view): def download_course_files(course, course_view):
# file full_name starts with "course files" dl_dir = global_consts.OUTPUT_LOCATION / course_view.term / course_view.name
dl_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name) dl_dir.mkdir(parents=True, exist_ok=True)
# Create directory if not present
if not os.path.exists(dl_dir):
os.makedirs(dl_dir)
try: try:
files = list(course.get_files()) files = list(course.get_files())
@ -31,205 +25,109 @@ def download_course_files(course, course_view):
for file in tqdm(files, desc='Downloading Files'): for file in tqdm(files, desc='Downloading Files'):
try: try:
file_folder = course.get_folder(file.folder_id) file_folder = course.get_folder(file.folder_id)
folder_dl_dir = dl_dir / make_valid_folder_path(file_folder.full_name)
folder_dl_dir = os.path.join(dl_dir, make_valid_folder_path(file_folder.full_name)) folder_dl_dir.mkdir(parents=True, exist_ok=True)
dl_path = folder_dl_dir / make_valid_filename(str(file.display_name))
if not os.path.exists(folder_dl_dir):
os.makedirs(folder_dl_dir)
dl_path = os.path.join(folder_dl_dir, make_valid_filename(str(file.display_name)))
# Download file if it doesn't already exist
if not os.path.exists(dl_path):
# print('Downloading: {}'.format(dl_path))
file.download(dl_path) file.download(dl_path)
except Exception as e: except Exception as e:
tqdm.write(f"Skipping {file.display_name} - {e}") tqdm.write(f"Skipping {file.display_name} - {e}")
def download_course_discussion_pages(api_url, course_view, cookies_path): def download_course_discussion_pages(resolved_course: CanvasCourse):
if cookies_path == "" or len(course_view.discussions) == 0: if not len(resolved_course.discussions):
return return
base_discussion_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "discussions") base_discussion_dir = global_consts.OUTPUT_LOCATION / resolved_course.term / resolved_course.name / 'discussions'
if not os.path.exists(base_discussion_dir): base_discussion_dir.mkdir(parents=True, exist_ok=True)
os.makedirs(base_discussion_dir)
discussion_list_dir = os.path.join(base_discussion_dir, "discussion_list.html") # (base_discussion_dir / 'discussions.json').write_text(jsonify_anything(resolved_course.discussions))
download_page(global_consts.API_URL + "/courses/" + str(resolved_course.course_id) + "/discussion_topics/", base_discussion_dir, "discussions.html")
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled) for discussion in tqdm(list(resolved_course.discussions), desc='Downloading Discussions'):
if not os.path.exists(discussion_list_dir):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html")
for discussion in tqdm(list(course_view.discussions), desc='Downloading Discussions'):
discussion_title = make_valid_filename(str(discussion.title)) discussion_title = make_valid_filename(str(discussion.title))
discussion_title = shorten_file_name(discussion_title, len(discussion_title) - MAX_FOLDER_NAME_SIZE) discussion_title = shorten_file_name(discussion_title, len(discussion_title) - global_consts.MAX_FOLDER_NAME_SIZE)
discussion_dir = os.path.join(base_discussion_dir, discussion_title) discussion_dir = base_discussion_dir / discussion_title
if discussion.url == "": if not discussion.url:
continue continue
if not os.path.exists(discussion_dir): discussion_dir.mkdir(parents=True, exist_ok=True)
os.makedirs(discussion_dir)
for file in get_embedded_files(resolved_course.course, discussion.body):
file.download(discussion_dir / file.display_name)
# Downloads each page that a discussion takes.
for i in range(discussion.amount_pages): for i in range(discussion.amount_pages):
filename = "discussion_" + str(i + 1) + ".html" filename = "discussion_" + str(i + 1) + ".html"
discussion_page_dir = os.path.join(discussion_dir, filename) download_page(discussion.url + "/page-" + str(i + 1), discussion_dir, filename)
# Download assignment page, this usually has instructions and etc.
if not os.path.exists(discussion_page_dir):
download_page(discussion.url + "/page-" + str(i + 1), cookies_path, discussion_dir, filename)
def download_assignment_pages(api_url, course_view, cookies_path, cookie_jar: MozillaCookieJar): def download_assignments(course_view: CanvasCourse):
if cookies_path == "" or len(course_view.assignments) == 0: if not len(course_view.assignments):
return return
base_assign_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "assignments") base_assign_dir = global_consts.OUTPUT_LOCATION / course_view.term / course_view.name / 'assignments'
if not os.path.exists(base_assign_dir): base_assign_dir.mkdir(parents=True, exist_ok=True)
os.makedirs(base_assign_dir)
assignment_list_path = os.path.join(base_assign_dir, "assignment_list.html") # (base_assign_dir / 'assignments.json').write_text(jsonify_anything(course_view.assignments))
download_page(global_consts.API_URL + "/courses/" + str(course_view.course_id) + "/assignments/", base_assign_dir, "assignments.html")
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
if not os.path.exists(assignment_list_path):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html")
with ThreadPoolExecutor(max_workers=3) as executor: with ThreadPoolExecutor(max_workers=3) as executor:
download_func = partial(download_assignment, cookies_path, cookie_jar, base_assign_dir) download_func = partial(download_assignment, base_assign_dir, course_view.course)
list(tqdm(executor.map(download_func, course_view.assignments), total=len(course_view.assignments), desc='Downloading Assignments')) list(tqdm(executor.map(download_func, course_view.assignments), total=len(course_view.assignments), desc='Downloading Assignments'))
def download_course_announcement_pages(api_url, course_view, cookies_path): def download_course_announcement_pages(resolved_course: CanvasCourse):
""" if not len(resolved_course.announcements):
Download assignment list.
There's a chance this might be the course homepage if the course has the assignments page disabled.
:param api_url:
:param course_view:
:param cookies_path:
:return:
"""
if cookies_path == "" or len(course_view.announcements) == 0:
return return
base_announce_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "announcements") base_announce_dir = global_consts.OUTPUT_LOCATION / resolved_course.term / resolved_course.name / 'announcements'
if not os.path.exists(base_announce_dir): base_announce_dir.mkdir(parents=True, exist_ok=True)
os.makedirs(base_announce_dir)
announcement_list_dir = os.path.join(base_announce_dir, "announcement_list.html")
if not os.path.exists(announcement_list_dir):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/announcements/", cookies_path, base_announce_dir, "announcement_list.html")
for announcements in tqdm(list(course_view.announcements), desc='Downloading Announcements'): # (base_announce_dir / 'announcements.json').write_text(jsonify_anything(resolved_course.announcements))
announcements_title = make_valid_filename(str(announcements.title)) download_page(global_consts.API_URL + "/courses/" + str(resolved_course.course_id) + "/announcements/", base_announce_dir, "announcements.html")
announcements_title = shorten_file_name(announcements_title, len(announcements_title) - MAX_FOLDER_NAME_SIZE)
announce_dir = os.path.join(base_announce_dir, announcements_title)
if announcements.url == "": for announcement in tqdm(list(resolved_course.announcements), desc='Downloading Announcements'):
announcements_title = make_valid_filename(str(announcement.title))
announcements_title = shorten_file_name(announcements_title, len(announcements_title) - global_consts.MAX_FOLDER_NAME_SIZE)
announce_dir = base_announce_dir / announcements_title
if not announcement.url:
continue continue
if not os.path.exists(announce_dir): announce_dir.mkdir(parents=True, exist_ok=True)
os.makedirs(announce_dir)
# Downloads each page that a discussion takes. for file in get_embedded_files(resolved_course.course, announcement.body):
for i in range(announcements.amount_pages): file.download(announce_dir / file.display_name)
for i in range(announcement.amount_pages):
filename = "announcement_" + str(i + 1) + ".html" filename = "announcement_" + str(i + 1) + ".html"
announcement_page_dir = os.path.join(announce_dir, filename) download_page(announcement.url + "/page-" + str(i + 1), announce_dir, filename)
# Download assignment page, this usually has instructions and etc.
if not os.path.exists(announcement_page_dir):
download_page(announcements.url + "/page-" + str(i + 1), cookies_path, announce_dir, filename)
def download_submission_attachments(course, course_view): def download_course_home_page_html(course_view):
course_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name) dl_dir = global_consts.OUTPUT_LOCATION / course_view.term / course_view.name
dl_dir.mkdir(parents=True, exist_ok=True)
# Create directory if not present download_page(global_consts.API_URL + "/courses/" + str(course_view.course_id), dl_dir, "homepage.html")
if not os.path.exists(course_dir):
os.makedirs(course_dir)
for assignment in tqdm(list(course_view.assignments), desc='Downloading Submissions'):
for submission in assignment.submissions:
assignment_title = make_valid_filename(str(assignment.title))
assignment_title = shorten_file_name(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
attachment_dir = os.path.join(course_dir, "assignments", assignment_title)
if len(assignment.submissions) != 1:
attachment_dir = os.path.join(attachment_dir, str(submission.user_id))
if not os.path.exists(attachment_dir) and submission.attachments:
os.makedirs(attachment_dir)
for attachment in submission.attachments:
filepath = os.path.join(attachment_dir, make_valid_filename(str(attachment.id) + "_" + attachment.filename))
if not os.path.exists(filepath):
# print('Downloading attachment: {}'.format(filepath))
r = requests.get(attachment.url, allow_redirects=True)
with open(filepath, 'wb') as f:
f.write(r.content)
# else:
# print('File already exists: {}'.format(filepath))
def download_course_html(api_url, cookies_path): def download_course_modules(course_view: CanvasCourse):
if cookies_path == "": modules_dir = global_consts.OUTPUT_LOCATION / course_view.term / course_view.name / 'modules'
return modules_dir.mkdir(parents=True, exist_ok=True)
course_dir = OUTPUT_LOCATION # (modules_dir / 'modules.json').write_text(jsonify_anything(course_view.modules))
download_page(global_consts.API_URL + "/courses/" + str(course_view.course_id) + "/modules/", modules_dir, "modules.html")
if not os.path.exists(course_dir):
os.makedirs(course_dir)
course_list_path = os.path.join(course_dir, "course_list.html")
# Downloads the course list.
if not os.path.exists(course_list_path):
download_page(api_url + "/courses/", cookies_path, course_dir, "course_list.html")
def download_course_home_page_html(api_url, course_view, cookies_path):
if cookies_path == "":
return
dl_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name)
if not os.path.exists(dl_dir):
os.makedirs(dl_dir)
homepage_path = os.path.join(dl_dir, "homepage.html")
# Downloads the course home page.
if not os.path.exists(homepage_path):
download_page(api_url + "/courses/" + str(course_view.course_id), cookies_path, dl_dir, "homepage.html")
def download_course_module_pages(api_url, course_view, cookies_path):
if cookies_path == "" or len(course_view.modules) == 0:
return
modules_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "modules")
if not os.path.exists(modules_dir):
os.makedirs(modules_dir)
# Downloads the modules page (possible this is disabled by the teacher)
module_list_dir = Path(str(modules_dir), "modules_list.html")
if not os.path.exists(module_list_dir):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/modules/", cookies_path, modules_dir, "modules_list.html")
with ThreadPoolExecutor(max_workers=3) as executor: with ThreadPoolExecutor(max_workers=3) as executor:
for module in tqdm(list(course_view.modules), desc='Downloading Modules'): for module in tqdm(list(course_view.modules), desc='Downloading Modules'):
bar = tqdm(list(module.items), leave=False, desc=module.name) bar = tqdm(list(module.items), leave=False, desc=module.module.name)
futures = [executor.submit(download_module_item, module, item, modules_dir, cookies_path) for item in module.items] futures = [executor.submit(download_module_item, course_view.course, module, item, modules_dir) for item in module.items]
for _ in as_completed(futures): for _ in as_completed(futures):
bar.update() bar.update()
bar.close() bar.close()
def download_course_grades_page(api_url, course_view, cookies_path): def download_course_grades_page(course_view: CanvasCourse):
if cookies_path == "": dl_dir = global_consts.OUTPUT_LOCATION / course_view.term / course_view.name
return
dl_dir = Path(OUTPUT_LOCATION, course_view.term, course_view.name)
dl_dir.mkdir(parents=True, exist_ok=True) dl_dir.mkdir(parents=True, exist_ok=True)
api_target = f'{global_consts.API_URL}/courses/{course_view.course_id}/grades'
# TODO: command line arg to prohibit overwrite. Default should overwrite download_page(api_target, dl_dir, "grades.html")
if not (dl_dir / "grades.html").exists():
api_target = f'{api_url}/courses/{course_view.course_id}/grades'
download_page(api_target, cookies_path, dl_dir, "grades.html")

View File

@ -1,113 +1,51 @@
import os
import re import re
from http.cookiejar import MozillaCookieJar from typing import List
import canvasapi
import dateutil.parser import dateutil.parser
import requests from canvasapi.discussion_topic import DiscussionTopic
from bs4 import BeautifulSoup
from tqdm import tqdm from tqdm import tqdm
from module.const import DATE_TEMPLATE, OUTPUT_LOCATION, MAX_FOLDER_NAME_SIZE from module.const import global_consts
from module.helpers import make_valid_filename, shorten_file_name from module.items import CanvasDiscussion, CanvasPage, CanvasTopicEntry, CanvasTopicReply, CanvasModule
from module.items import AssignmentView, AttachmentView, DiscussionView, CanvasModuleItem, CanvasModule, PageView, SubmissionView, TopicEntryView, TopicReplyView
MODULE_ITEM_ATTACHED_FILE_RE = re.compile(r'<a .*? data-api-endpoint="(.*?)" .*?>') HTML_ITEM_ATTACHED_FILE_RE = re.compile(r'<a .*? data-api-endpoint=\"(.*?)\" .*?>')
CANVAS_API_FILE_ID_RE = re.compile(r'.*?/api/v1/courses/.*?/files/(.*?)$') CANVAS_API_FILE_ID_RE = re.compile(r'.*?/api/v1/courses/.*?/files/(.*?)$')
def find_course_modules(course, course_view): def find_course_modules(course) -> List[CanvasModule]:
modules_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "modules") # modules_dir = os.path.join(global_consts.OUTPUT_LOCATION, course_view.term, course_view.name, "modules")
# Create modules directory if not present results = []
if not os.path.exists(modules_dir):
os.makedirs(modules_dir)
module_views = []
try: try:
modules = list(course.get_modules()) modules = list(course.get_modules())
for module in tqdm(modules, desc='Fetching Modules'): for module in tqdm(modules, desc='Fetching Modules'):
module_view = CanvasModule()
module_view.id = module.id if hasattr(module, "id") else ""
module_view.name = str(module.name) if hasattr(module, "name") else ""
try: try:
# Get items for each module resolved_module = CanvasModule(module)
for item in module.get_module_items(): for item in resolved_module.items:
module_item = CanvasModuleItem() if item.item.type == 'Page':
module_item.id = item.id if hasattr(item, "id") else 0 page = course.get_page(item.item.page_url)
module_item.title = str(item.title).replace(' ', ' ') if hasattr(item, "title") else "" item.page = page
module_item.content_type = str(item.type) if hasattr(item, "type") else ""
module_item.url = str(item.html_url) if hasattr(item, "html_url") else ""
module_item.external_url = str(item.external_url) if hasattr(item, "external_url") else ""
if module_item.content_type == "File":
# If problems arise due to long pathnames, changing module.name to module.id might help
# A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path)
module_name = make_valid_filename(str(module.name))
module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
module_dir = os.path.join(modules_dir, module_name, "files")
try:
# Create directory for current module if not present
if not os.path.exists(module_dir):
os.makedirs(module_dir)
# Get the file object
module_file = course.get_file(str(item.content_id))
# Create path for module file download
module_file_path = os.path.join(module_dir, make_valid_filename(str(module_file.display_name)))
# Download file if it doesn't already exist
if not os.path.exists(module_file_path):
module_file.download(module_file_path)
except Exception as e:
tqdm.write(f"Skipping module file download that gave the following error: {e} - {item}")
elif item.type == 'Page':
page = course.get_page(item.page_url)
if hasattr(page, 'body'): if hasattr(page, 'body'):
# Extract the attached files from the item's HTML. # Extract the attached files from the item's HTML.
file_matches = re.findall(MODULE_ITEM_ATTACHED_FILE_RE, page.body) file_matches = re.findall(HTML_ITEM_ATTACHED_FILE_RE, page.body)
for match in file_matches: for match in file_matches:
file_id = re.match(CANVAS_API_FILE_ID_RE, match) file_id = re.match(CANVAS_API_FILE_ID_RE, match)
if file_id: if file_id:
try:
# Grab the metadata from the API. # Grab the metadata from the API.
canvas_file = course.get_file(file_id.group(1)) canvas_file = course.get_file(file_id.group(1))
module_item.attached_files.add(canvas_file) item.attached_files.add(canvas_file)
except canvasapi.exceptions.ResourceDoesNotExist:
module_view.items.append(module_item) continue
results.append(resolved_module)
except Exception as e:
tqdm.write(f"Skipping module file download that gave the following error: {e}")
except Exception as e: except Exception as e:
tqdm.write(f"Skipping module file download that gave the following error: {e}") tqdm.write(f"Skipping module file download that gave the following error: {e}")
module_views.append(module_view) return results
except Exception as e:
print("Skipping entire module that gave the following error:")
print(e)
return module_views
def get_extra_assignment_files(html, cookie_jar: MozillaCookieJar):
soup = BeautifulSoup(html, 'html.parser')
urls = [a['data-api-endpoint'] for a in soup.find_all('a', {'data-api-returntype': 'File'})]
s = requests.Session()
for cookie in cookie_jar:
s.cookies.set(cookie.name, cookie.value)
extra_files = []
for item in urls:
r = s.get(item)
if r.status_code != 200:
continue
j = r.json()
extra_files.append((j['display_name'], j['url']))
return extra_files
def get_course_page_urls(course): def get_course_page_urls(course):
@ -132,18 +70,18 @@ def find_course_pages(course):
for url in tqdm(page_urls, desc='Fetching Pages'): for url in tqdm(page_urls, desc='Fetching Pages'):
page = course.get_page(url) page = course.get_page(url)
page_view = PageView() page_view = CanvasPage()
page_view.id = page.id if hasattr(page, "id") else 0 page_view.id = page.id if hasattr(page, "id") else 0
page_view.title = str(page.title).replace(' ', ' ') if hasattr(page, "title") else "" page_view.title = str(page.title).replace(' ', ' ') if hasattr(page, "title") else ""
page_view.body = str(page.body) if hasattr(page, "body") else "" page_view.body = str(page.body) if hasattr(page, "body") else ""
if hasattr(page, "created_at"): if hasattr(page, "created_at"):
page_view.created_date = dateutil.parser.parse(page.created_at).strftime(DATE_TEMPLATE) page_view.created_date = dateutil.parser.parse(page.created_at).strftime(global_consts.DATE_TEMPLATE)
else: else:
page_view.created_date = '' page_view.created_date = ''
if hasattr(page, "updated_at"): if hasattr(page, "updated_at"):
page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime(DATE_TEMPLATE) page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime(global_consts.DATE_TEMPLATE)
else: else:
page_view.last_updated_date = '' page_view.last_updated_date = ''
@ -154,83 +92,31 @@ def find_course_pages(course):
return page_views return page_views
def find_course_assignments(course, user_id): def find_course_assignments(course):
assignment_views = [] results = []
# Get all assignments
assignments = list(course.get_assignments()) assignments = list(course.get_assignments())
for assignment in tqdm(assignments, desc='Fetching Assignments'): for assignment in tqdm(assignments, desc='Fetching Assignments'):
assignment_view = AssignmentView() # Have to re-define the object because the `/api/v1/courses/:course_id/assignments` endpoint is sometimes outdated.
assignment_view.id = assignment.id if hasattr(assignment, "id") else "" # The endpoint `/api/v1/courses/:course_id/assignments/:id` has the most up to date data.
assignment_view.title = make_valid_filename(str(assignment.name).replace(' ', ' ')) if hasattr(assignment, "name") else "" assignment = course.get_assignment(assignment.id)
assignment_view.description = str(assignment.description) if hasattr(assignment, "description") else "" results.append(assignment)
assignment_view.assigned_date = assignment.created_at_date.strftime(DATE_TEMPLATE) if hasattr(assignment, "created_at_date") else "" return results
assignment_view.due_date = assignment.due_at_date.strftime(DATE_TEMPLATE) if hasattr(assignment, "due_at_date") else ""
assignment_view.html_url = assignment.html_url if hasattr(assignment, "html_url") else ""
assignment_view.ext_url = str(assignment.url) if hasattr(assignment, "url") else ""
assignment_view.updated_url = str(assignment.submissions_download_url).split("submissions?")[0] if hasattr(assignment, "submissions_download_url") else ""
# Download submission for this user only
submissions = [assignment.get_submission(user_id)]
if not len(submissions):
raise IndexError(f'No submissions found for assignment: {vars(assignment)}')
try:
for submission in submissions:
sub_view = SubmissionView()
sub_view.id = submission.id if hasattr(submission, "id") else 0
sub_view.grade = str(submission.grade) if hasattr(submission, "grade") else ""
sub_view.raw_score = str(submission.score) if hasattr(submission, "score") else ""
sub_view.total_possible_points = str(assignment.points_possible) if hasattr(assignment, "points_possible") else ""
sub_view.submission_comments = str(submission.submission_comments) if hasattr(submission, "submission_comments") else ""
sub_view.attempt = submission.attempt if hasattr(submission, "attempt") and submission.attempt is not None else 0
sub_view.user_id = str(submission.user_id) if hasattr(submission, "user_id") else ""
sub_view.preview_url = str(submission.preview_url) if hasattr(submission, "preview_url") else ""
sub_view.ext_url = str(submission.url) if hasattr(submission, "url") else ""
try:
submission.attachments
except AttributeError:
print('No attachments')
else:
for attachment in submission.attachments:
attach_view = AttachmentView()
attach_view.url = attachment.url
attach_view.id = attachment.id
attach_view.filename = attachment.filename
sub_view.attachments.append(attach_view)
assignment_view.submissions.append(sub_view)
except Exception as e:
raise
# print("Skipping submission that gave the following error:")
# print(e)
assignment_views.append(assignment_view)
return assignment_views
def find_course_announcements(course): def find_course_announcements(course):
announcement_views = [] announcement_views = []
announcements: List[DiscussionTopic] = list(course.get_discussion_topics(only_announcements=True))
# try:
announcements = list(course.get_discussion_topics(only_announcements=True))
for announcement in tqdm(announcements, desc='Fetching Announcements'): for announcement in tqdm(announcements, desc='Fetching Announcements'):
discussion_view = get_discussion_view(announcement) discussion_view = get_discussion_view(announcement)
announcement_views.append(discussion_view) announcement_views.append(discussion_view)
# except Exception as e:
# print("Skipping announcement that gave the following error:")
# print(e)
return announcement_views return announcement_views
def get_discussion_view(discussion_topic): def get_discussion_view(discussion_topic):
# Create discussion view # Create discussion view
discussion_view = DiscussionView() discussion_view = CanvasDiscussion(discussion_topic)
discussion_view.id = discussion_topic.id if hasattr(discussion_topic, "id") else 0 discussion_view.id = discussion_topic.id if hasattr(discussion_topic, "id") else 0
discussion_view.title = str(discussion_topic.title).replace(' ', ' ') if hasattr(discussion_topic, "title") else "" discussion_view.title = str(discussion_topic.title).replace(' ', ' ') if hasattr(discussion_topic, "title") else ""
discussion_view.author = str(discussion_topic.user_name) if hasattr(discussion_topic, "user_name") else "" discussion_view.author = str(discussion_topic.user_name) if hasattr(discussion_topic, "user_name") else ""
@ -250,7 +136,7 @@ def get_discussion_view(discussion_topic):
topic_entries_counter += 1 topic_entries_counter += 1
# Create new discussion view for the topic_entry # Create new discussion view for the topic_entry
topic_entry_view = TopicEntryView() topic_entry_view = CanvasTopicEntry()
topic_entry_view.id = topic_entry.id if hasattr(topic_entry, "id") else 0 topic_entry_view.id = topic_entry.id if hasattr(topic_entry, "id") else 0
topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else "" topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else ""
topic_entry_view.posted_date = topic_entry.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_entry, "created_at_date") else "" topic_entry_view.posted_date = topic_entry.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_entry, "created_at_date") else ""
@ -262,7 +148,7 @@ def get_discussion_view(discussion_topic):
try: try:
for topic_reply in topic_entry_replies: for topic_reply in topic_entry_replies:
# Create new topic reply view # Create new topic reply view
topic_reply_view = TopicReplyView() topic_reply_view = CanvasTopicReply()
topic_reply_view.id = topic_reply.id if hasattr(topic_reply, "id") else 0 topic_reply_view.id = topic_reply.id if hasattr(topic_reply, "id") else 0
topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else "" topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else ""
topic_reply_view.posted_date = topic_reply.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_reply, "created_at_date") else "" topic_reply_view.posted_date = topic_reply.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_reply, "created_at_date") else ""
@ -286,15 +172,8 @@ def get_discussion_view(discussion_topic):
def find_course_discussions(course): def find_course_discussions(course):
discussion_views = [] discussion_views = []
# try:
discussion_topics = list(course.get_discussion_topics()) discussion_topics = list(course.get_discussion_topics())
for discussion_topic in tqdm(discussion_topics, desc='Fetching Discussions'): for discussion_topic in tqdm(discussion_topics, desc='Fetching Discussions'):
discussion_view = get_discussion_view(discussion_topic) discussion_view = get_discussion_view(discussion_topic)
discussion_views.append(discussion_view) discussion_views.append(discussion_view)
# except Exception as e:
# print("Skipping discussion that gave the following error:")
# print(e)
return discussion_views return discussion_views

View File

@ -1,29 +1,64 @@
import json
from http.cookiejar import MozillaCookieJar from http.cookiejar import MozillaCookieJar
from typing import List, Any
import requests import requests
from canvasapi.assignment import Assignment
from canvasapi.course import Course
from canvasapi.file import File from canvasapi.file import File
from canvasapi.module import ModuleItem, Module
from canvasapi.page import Page
from module.helpers import make_valid_filename from module.helpers import make_valid_filename
def varsify(item) -> Any:
result = {}
try:
if isinstance(item, (str, int, float, bool)):
return item
elif isinstance(item, (list, set)):
l_result = []
for i, x in enumerate(item):
l_result.append(varsify(x))
return l_result
else:
for k, v in vars(item).items():
if isinstance(v, dict):
result[k] = varsify(v)
elif isinstance(v, list):
result[k] = []
for i, x in enumerate(v):
result[k].insert(i, varsify(x))
else:
if not k.startswith('_'):
result[k] = varsify(v)
return result
except:
return item
def jsonify_anything(item):
return json.dumps(varsify(item), indent=4, sort_keys=True, default=str)
class CanvasModuleItem: class CanvasModuleItem:
def __init__(self): def __init__(self, module_item: ModuleItem):
self.id = 0 self.item = module_item
self.title = ""
self.content_type = ""
self.url = ""
self.external_url = ""
self.attached_files: set[File] = set() self.attached_files: set[File] = set()
self.page: Page
class CanvasModule: class CanvasModule:
def __init__(self): def __init__(self, module: Module):
self.id = 0 self.module = module
self.name = "" self.items: List[CanvasModuleItem] = []
self.items = [] for item in module.get_module_items():
i = self.module.get_module_item(item.id)
self.items.append(CanvasModuleItem(i))
class PageView: class CanvasPage:
def __init__(self): def __init__(self):
self.id = 0 self.id = 0
self.title = "" self.title = ""
@ -32,7 +67,7 @@ class PageView:
self.last_updated_date = "" self.last_updated_date = ""
class TopicReplyView: class CanvasTopicReply:
def __init__(self): def __init__(self):
self.id = 0 self.id = 0
self.author = "" self.author = ""
@ -40,7 +75,7 @@ class TopicReplyView:
self.body = "" self.body = ""
class TopicEntryView: class CanvasTopicEntry:
def __init__(self): def __init__(self):
self.id = 0 self.id = 0
self.author = "" self.author = ""
@ -49,8 +84,9 @@ class TopicEntryView:
self.topic_replies = [] self.topic_replies = []
class DiscussionView: class CanvasDiscussion:
def __init__(self): def __init__(self, discussion):
self.discussion = discussion
self.id = 0 self.id = 0
self.title = "" self.title = ""
self.author = "" self.author = ""
@ -61,7 +97,7 @@ class DiscussionView:
self.amount_pages = 0 self.amount_pages = 0
class SubmissionView: class CanvasSubmission:
def __init__(self): def __init__(self):
self.id = 0 self.id = 0
self.attachments = [] self.attachments = []
@ -75,41 +111,25 @@ class SubmissionView:
self.ext_url = "" self.ext_url = ""
class AttachmentView:
def __init__(self):
self.id = 0
self.filename = ""
self.url = ""
class AssignmentView:
def __init__(self):
self.id = 0
self.title = ""
self.description = ""
self.assigned_date = ""
self.due_date = ""
self.submissions = []
self.html_url = ""
self.ext_url = ""
self.updated_url = ""
class CanvasCourse: class CanvasCourse:
def __init__(self, course): def __init__(self, course):
self.course: Course = course
self.course_id = course.id if hasattr(course, "id") else 0 self.course_id = course.id if hasattr(course, "id") else 0
self.term = make_valid_filename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "") self.term = make_valid_filename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "")
self.course_code = make_valid_filename(course.course_code if hasattr(course, "course_code") else "") self.course_code = make_valid_filename(course.course_code if hasattr(course, "course_code") else "")
if hasattr(course, 'original_name'):
self.name = course.original_name
else:
self.name = course.name if hasattr(course, "name") else "" self.name = course.name if hasattr(course, "name") else ""
self.course_code = self.course_code.replace(' ', ' ') self.course_code = self.course_code.replace(' ', ' ')
self.name = self.name.replace(' ', ' ') self.name = self.name.replace(' ', ' ')
self.assignments = [] self.assignments: List[Assignment] = []
self.announcements = [] self.announcements: List[CanvasDiscussion] = []
self.discussions = [] self.discussions: List[CanvasDiscussion] = []
self.modules = [] self.modules: List[CanvasModule] = []
def test_course(self, base_url: str, cookie_jar: MozillaCookieJar): def test_course(self, base_url: str, cookie_jar: MozillaCookieJar):
s = requests.Session() s = requests.Session()

View File

@ -1,6 +1,8 @@
from pathlib import Path from pathlib import Path
from subprocess import run from subprocess import run
from .const import global_consts
SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file" SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file"
# TODO: have this be specified by a required arg. # TODO: have this be specified by a required arg.
@ -11,7 +13,7 @@ def add_quotes(s):
return "\"" + str(s).strip("\"") + "\"" return "\"" + str(s).strip("\"") + "\""
def download_page(url, cookies_path, output_path, output_name_template=""): def download_page(url, output_path, output_name_template=""):
# TODO: we can probably safely exclude pages that match the regex r'/external_tools/retrieve\?' # TODO: we can probably safely exclude pages that match the regex r'/external_tools/retrieve\?'
if output_name_template and Path(output_path, output_name_template).exists(): if output_name_template and Path(output_path, output_name_template).exists():
@ -21,7 +23,7 @@ def download_page(url, cookies_path, output_path, output_name_template=""):
args = [ args = [
add_quotes(SINGLEFILE_BINARY_PATH), add_quotes(SINGLEFILE_BINARY_PATH),
"--browser-executable-path=" + add_quotes(CHROME_PATH.strip("\"")), "--browser-executable-path=" + add_quotes(CHROME_PATH.strip("\"")),
"--browser-cookies-file=" + add_quotes(cookies_path), "--browser-cookies-file=" + add_quotes(global_consts.COOKIES_PATH),
"--output-directory=" + add_quotes(output_path), "--output-directory=" + add_quotes(output_path),
add_quotes(url) add_quotes(url)
] ]

View File

@ -1,85 +1,80 @@
import os
import traceback import traceback
from pathlib import Path from pathlib import Path
from module.const import MAX_FOLDER_NAME_SIZE from canvasapi.assignment import Assignment
from module.download import download_file from canvasapi.course import Course
from module.get_canvas import get_extra_assignment_files from canvasapi.submission import Submission
from module.api.file import get_embedded_files
from module.const import global_consts
from module.helpers import make_valid_filename, shorten_file_name from module.helpers import make_valid_filename, shorten_file_name
from module.items import CanvasModuleItem, CanvasModule from module.items import CanvasModuleItem, jsonify_anything, CanvasModule
from module.singlefile import download_page from module.singlefile import download_page
def download_module_item(module: CanvasModule, item: CanvasModuleItem, modules_dir, cookies_path): def download_module_item(course: Course, module: CanvasModule, item: CanvasModuleItem, modules_dir: Path):
try: try:
module_name = make_valid_filename(str(module.name)) module_name = make_valid_filename(str(module.module.name))
module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE) module_name = shorten_file_name(module_name, len(module_name) - global_consts.MAX_FOLDER_NAME_SIZE)
output_dir = Path(modules_dir, module_name) module_dir = modules_dir / module_name
output_dir.mkdir(parents=True, exist_ok=True)
if not item.url: if not hasattr(item.item, 'url') or not item.item.url:
return return
# Download attached files module_dir.mkdir(parents=True, exist_ok=True)
if item.item.type == "File":
file = course.get_file(item.item.content_id)
module_file_path = module_dir / make_valid_filename(str(file.display_name))
file.download(module_file_path)
else:
# It's a page, so download the attached files.
for file in item.attached_files: for file in item.attached_files:
file.download(output_dir / file.filename) file.download(module_dir / file.filename)
# Download the module page. # Download the module page.
html_filename = make_valid_filename(str(item.title)) + ".html" html_filename = make_valid_filename(str(item.item.title)) + ".html"
if not (output_dir / html_filename).exists(): download_page(item.item.html_url, module_dir, html_filename)
download_page(item.url, cookies_path, output_dir, html_filename)
except: except:
# TODO: wrap all threaded funcs in this try/catch # TODO: wrap all threaded funcs in this try/catch
traceback.print_exc() traceback.print_exc()
def download_assignment(cookies_path, cookie_jar, base_assign_dir, assignment): def download_assignment(base_assign_dir: Path, course: Course, assignment: Assignment):
assignment_title = make_valid_filename(str(assignment.title)) try:
assignment_title = shorten_file_name(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE) assignment_title = make_valid_filename(str(assignment.name))
assign_dir = os.path.join(base_assign_dir, assignment_title) assignment_title = shorten_file_name(assignment_title, len(assignment_title) - global_consts.MAX_FOLDER_NAME_SIZE)
assign_dir = Path(base_assign_dir, assignment_title)
assign_dir.mkdir(parents=True, exist_ok=True)
if assignment.html_url != "": if assignment.html_url:
if not os.path.exists(assign_dir): download_page(assignment.html_url, assign_dir, "assignment.html")
os.makedirs(assign_dir)
assignment_page_path = os.path.join(assign_dir, "assignment.html") # Download attached files.
if assignment.description:
for file in get_embedded_files(course, assignment.description):
file.download(assign_dir / file.display_name)
if not os.path.exists(assignment_page_path): # Students cannot view their past attempts, but this logic is left if that's ever implemented in Canvas.
download_page(assignment.html_url, cookies_path, assign_dir, "assignment.html") submissions = [assignment.get_submission(global_consts.USER_ID)]
for submission in submissions:
extra_files = get_extra_assignment_files(assignment.description, cookie_jar) download_attempt(submission, assign_dir)
for name, url in extra_files: submission_dir = assign_dir / 'submission' / str(submission.id)
download_file(url, Path(assign_dir, name), cookie_jar) for attachment in submission.attachments:
filepath = submission_dir / attachment.display_name
for submission in assignment.submissions: if not filepath.exists():
download_submission(assignment, submission, assign_dir, cookies_path) attachment.download(filepath)
except:
traceback.print_exc()
def download_submission(assignment, submission, assign_dir, cookies_path): def download_attempt(submission: Submission, assign_dir: Path):
submission_dir = assign_dir try:
submission_dir = assign_dir / 'submission' / str(submission.id)
if len(assignment.submissions) != 1: submission_dir.mkdir(parents=True, exist_ok=True)
submission_dir = os.path.join(assign_dir, str(submission.user_id)) for file in submission.attachments:
file.download(submission_dir / file.display_name)
if submission.preview_url != "": if submission.preview_url:
if not os.path.exists(submission_dir): download_page(submission.preview_url, submission_dir, f'{submission.id}.html')
os.makedirs(submission_dir) except:
traceback.print_exc()
submission_page_dir = os.path.join(submission_dir, "submission.html")
if not os.path.exists(submission_page_dir):
download_page(submission.preview_url, cookies_path, submission_dir, "submission.html")
if (submission.attempt != 1 and assignment.updated_url != "" and assignment.html_url != ""
and assignment.html_url.rstrip("/") != assignment.updated_url.rstrip("/")):
submission_dir = os.path.join(assign_dir, "attempts")
if not os.path.exists(submission_dir):
os.makedirs(submission_dir)
for i in range(submission.attempt):
filename = "attempt_" + str(i + 1) + ".html"
submission_page_attempt_dir = os.path.join(submission_dir, filename)
if not os.path.exists(submission_page_attempt_dir):
download_page(assignment.updated_url + "/history?version=" + str(i + 1), cookies_path, submission_dir, filename)