refactor structure
This commit is contained in:
parent
796500e954
commit
55c2532075
91
export.py
91
export.py
|
@ -5,22 +5,22 @@ from http.cookiejar import MozillaCookieJar
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import canvasapi
|
import canvasapi
|
||||||
import jsonpickle
|
|
||||||
import requests
|
import requests
|
||||||
import yaml
|
import yaml
|
||||||
from canvasapi import Canvas
|
from canvasapi import Canvas
|
||||||
|
|
||||||
from module.const import COURSES_TO_SKIP, OUTPUT_LOCATION
|
from module.const import global_consts
|
||||||
from module.download_canvas import download_assignment_pages, download_course_announcement_pages, download_course_discussion_pages, download_course_files, download_course_module_pages, download_submission_attachments, download_course_grades_page, download_course_home_page_html, download_course_html
|
from module.download_canvas import download_assignments, download_course_modules, download_course_grades_page, download_course_announcement_pages, download_course_home_page_html, download_course_discussion_pages
|
||||||
from module.get_canvas import find_course_announcements, find_course_assignments, find_course_discussions, find_course_modules, find_course_pages
|
from module.get_canvas import find_course_pages, find_course_modules, find_course_assignments, find_course_announcements, find_course_discussions
|
||||||
from module.items import CanvasCourse
|
from module.items import CanvasCourse, jsonify_anything
|
||||||
|
from module.singlefile import download_page
|
||||||
from module.user_files import download_user_files
|
from module.user_files import download_user_files
|
||||||
|
|
||||||
SCRIPT_PATH = os.path.abspath(os.path.dirname(__file__))
|
SCRIPT_PATH = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
|
||||||
def export_all_course_data(c):
|
def export_all_course_data(c):
|
||||||
json_data = json.dumps(json.loads(jsonpickle.encode(c, unpicklable=False)), indent=4)
|
json_data = jsonify_anything(c)
|
||||||
course_output_dir = os.path.join(OUTPUT_LOCATION, c.term, c.name)
|
course_output_dir = os.path.join(OUTPUT_LOCATION, c.term, c.name)
|
||||||
if not os.path.exists(course_output_dir):
|
if not os.path.exists(course_output_dir):
|
||||||
os.makedirs(course_output_dir)
|
os.makedirs(course_output_dir)
|
||||||
|
@ -48,17 +48,17 @@ if __name__ == "__main__":
|
||||||
with open("credentials.yaml", 'r') as f:
|
with open("credentials.yaml", 'r') as f:
|
||||||
credentials = yaml.full_load(f)
|
credentials = yaml.full_load(f)
|
||||||
|
|
||||||
API_URL = credentials["API_URL"]
|
global_consts.API_URL = credentials["API_URL"]
|
||||||
API_KEY = credentials["API_KEY"]
|
global_consts.API_KEY = credentials["API_KEY"]
|
||||||
USER_ID = credentials["USER_ID"]
|
global_consts.USER_ID = credentials["USER_ID"]
|
||||||
COOKIES_PATH = str(Path(credentials["COOKIES_PATH"]).resolve().expanduser().absolute())
|
global_consts.COOKIES_PATH = str(Path(credentials["COOKIES_PATH"]).resolve().expanduser().absolute())
|
||||||
|
|
||||||
if not Path(COOKIES_PATH).is_file():
|
if not Path(global_consts.COOKIES_PATH).is_file():
|
||||||
print('The cookies file does not exist:', COOKIES_PATH)
|
print('The cookies file does not exist:', global_consts.COOKIES_PATH)
|
||||||
quit(1)
|
quit(1)
|
||||||
|
|
||||||
COOKIE_JAR = MozillaCookieJar(COOKIES_PATH)
|
global_consts.COOKIE_JAR = MozillaCookieJar(global_consts.COOKIES_PATH)
|
||||||
COOKIE_JAR.load(ignore_discard=True, ignore_expires=True)
|
global_consts.COOKIE_JAR.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
|
||||||
# ==================================================================================================================
|
# ==================================================================================================================
|
||||||
# Initialization
|
# Initialization
|
||||||
|
@ -68,18 +68,18 @@ if __name__ == "__main__":
|
||||||
print("Creating output directory:", OUTPUT_LOCATION)
|
print("Creating output directory:", OUTPUT_LOCATION)
|
||||||
os.makedirs(OUTPUT_LOCATION)
|
os.makedirs(OUTPUT_LOCATION)
|
||||||
|
|
||||||
if COOKIES_PATH:
|
if global_consts.COOKIES_PATH:
|
||||||
# Test the cookies.
|
# Test the cookies.
|
||||||
print("Authenticating with Canvas frontend...")
|
print("Authenticating with Canvas frontend...")
|
||||||
|
|
||||||
# Requests takes a dict, not the MozillaCookieJar object.
|
# Requests takes a dict, not the MozillaCookieJar object.
|
||||||
request_cookies = {c.name: c.value for c in COOKIE_JAR}
|
request_cookies = {c.name: c.value for c in global_consts.COOKIE_JAR}
|
||||||
|
|
||||||
r = requests.get(f'{API_URL}/profile', headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}, cookies=request_cookies)
|
r = requests.get(f'{global_consts.API_URL}/profile', headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}, cookies=request_cookies)
|
||||||
if r.status_code != 200:
|
if r.status_code != 200:
|
||||||
print('Failed to fetch Canvas profile: got status code', r.status_code)
|
print('Failed to fetch Canvas profile: got status code', r.status_code)
|
||||||
quit(1)
|
quit(1)
|
||||||
if not r.url.startswith(API_URL):
|
if not r.url.startswith(global_consts.API_URL):
|
||||||
print('Failed to fetch Canvas profile: client was redirected away from Canvas:')
|
print('Failed to fetch Canvas profile: client was redirected away from Canvas:')
|
||||||
print(r.url)
|
print(r.url)
|
||||||
quit(1)
|
quit(1)
|
||||||
|
@ -93,7 +93,7 @@ if __name__ == "__main__":
|
||||||
print('No cookies file specified! No HTML pages will be saved.')
|
print('No cookies file specified! No HTML pages will be saved.')
|
||||||
|
|
||||||
print("Authenticating with Canvas API...")
|
print("Authenticating with Canvas API...")
|
||||||
canvas = Canvas(API_URL, API_KEY)
|
canvas = Canvas(global_consts.API_URL, global_consts.API_KEY)
|
||||||
courses = canvas.get_courses(include="term")
|
courses = canvas.get_courses(include="term")
|
||||||
try:
|
try:
|
||||||
course_count = len(list(courses))
|
course_count = len(list(courses))
|
||||||
|
@ -108,15 +108,17 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
print('')
|
print('')
|
||||||
|
|
||||||
skip = set(COURSES_TO_SKIP)
|
skip = set(global_consts.COURSES_TO_SKIP)
|
||||||
|
|
||||||
# ==================================================================================================================
|
# ==================================================================================================================
|
||||||
# Exporting
|
# Exporting
|
||||||
|
|
||||||
print("Downloading courses page...")
|
print("Downloading courses page...")
|
||||||
download_course_html(API_URL, COOKIES_PATH)
|
courses_dict = {v['id']: v for v in json.loads(jsonify_anything(courses))['_elements']}
|
||||||
|
(global_consts.OUTPUT_LOCATION / 'courses.json').write_text(json.dumps(courses_dict))
|
||||||
|
download_page(global_consts.API_URL + "/courses/", global_consts.OUTPUT_LOCATION, "courses.html")
|
||||||
|
|
||||||
if not args.user_files:
|
if args.user_files:
|
||||||
print('Downloading user files...')
|
print('Downloading user files...')
|
||||||
download_user_files(canvas, OUTPUT_LOCATION / 'User Files')
|
download_user_files(canvas, OUTPUT_LOCATION / 'User Files')
|
||||||
|
|
||||||
|
@ -128,56 +130,55 @@ if __name__ == "__main__":
|
||||||
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
|
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
course_view = CanvasCourse(course)
|
resolved_canvas_course = CanvasCourse(course)
|
||||||
|
|
||||||
if args.term and args.term != course_view.term:
|
if args.term and args.term != resolved_canvas_course.term:
|
||||||
print('Skipping term:', course_view.term, '\n')
|
print('Skipping term:', resolved_canvas_course.term, '\n')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(f"=== {course_view.term}: {course_view.name} ===")
|
print(f"=== {resolved_canvas_course.term}: {resolved_canvas_course.name} ===")
|
||||||
|
|
||||||
valid, r = course_view.test_course(API_URL, COOKIE_JAR)
|
valid, r = resolved_canvas_course.test_course(global_consts.API_URL, global_consts.COOKIE_JAR)
|
||||||
if not valid:
|
if not valid:
|
||||||
print(f'Invalid course: {course_view.course_id} - {r} - {r.text}')
|
print(f'Invalid course: {resolved_canvas_course.course_id} - {r} - {r.text}')
|
||||||
if r.status_code == 401:
|
if r.status_code == 401:
|
||||||
# We can't recover from this error.
|
# We can't recover from this error.
|
||||||
quit(1)
|
quit(1)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
course_view.assignments = find_course_assignments(course, USER_ID)
|
resolved_canvas_course.modules = find_course_modules(course)
|
||||||
course_view.announcements = find_course_announcements(course)
|
resolved_canvas_course.assignments = find_course_assignments(course)
|
||||||
course_view.discussions = find_course_discussions(course)
|
resolved_canvas_course.announcements = find_course_announcements(course)
|
||||||
course_view.pages = find_course_pages(course)
|
resolved_canvas_course.discussions = find_course_discussions(course)
|
||||||
course_view.modules = find_course_modules(course, course_view)
|
resolved_canvas_course.pages = find_course_pages(course)
|
||||||
|
|
||||||
all_courses_views.append(course_view)
|
all_courses_views.append(resolved_canvas_course)
|
||||||
|
|
||||||
print('Downloading course home page...')
|
print('Downloading course home page...')
|
||||||
download_course_home_page_html(API_URL, course_view, COOKIES_PATH)
|
download_course_home_page_html(resolved_canvas_course)
|
||||||
|
|
||||||
print('Downloading grades...')
|
print('Downloading grades...')
|
||||||
download_course_grades_page(API_URL, course_view, COOKIES_PATH)
|
download_course_grades_page(resolved_canvas_course)
|
||||||
|
|
||||||
download_assignment_pages(API_URL, course_view, COOKIES_PATH, COOKIE_JAR)
|
download_assignments(resolved_canvas_course)
|
||||||
|
|
||||||
download_course_module_pages(API_URL, course_view, COOKIES_PATH)
|
download_course_modules(resolved_canvas_course)
|
||||||
|
|
||||||
download_course_announcement_pages(API_URL, course_view, COOKIES_PATH)
|
download_course_announcement_pages(resolved_canvas_course)
|
||||||
|
|
||||||
download_course_discussion_pages(API_URL, course_view, COOKIES_PATH)
|
download_course_discussion_pages(resolved_canvas_course)
|
||||||
|
|
||||||
download_course_files(course, course_view)
|
# TODO: nothing to test this on
|
||||||
|
# download_course_files(course)
|
||||||
download_submission_attachments(course, course_view)
|
|
||||||
|
|
||||||
print("Exporting course metadata...")
|
print("Exporting course metadata...")
|
||||||
export_all_course_data(course_view)
|
export_all_course_data(resolved_canvas_course)
|
||||||
|
|
||||||
if course_count > 1:
|
if course_count > 1:
|
||||||
print('')
|
print('')
|
||||||
|
|
||||||
# Remove elements from the course objects that can't be JSON serialized, then format it.
|
# Remove elements from the course objects that can't be JSON serialized, then format it.
|
||||||
json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable=False)), indent=4)
|
json_str = jsonify_anything(all_courses_views)
|
||||||
|
|
||||||
all_output_path = os.path.join(OUTPUT_LOCATION, "all_output.json")
|
all_output_path = os.path.join(OUTPUT_LOCATION, "all_output.json")
|
||||||
with open(all_output_path, "w") as out_file:
|
with open(all_output_path, "w") as out_file:
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
import canvasapi
|
||||||
|
from canvasapi.course import Course
|
||||||
|
|
||||||
|
HTML_ITEM_ATTACHED_FILE_RE = re.compile(r'<a .*? data-api-endpoint=\"(.*?)\" .*?>')
|
||||||
|
CANVAS_API_FILE_ID_RE = re.compile(r'.*?/api/v1/courses/.*?/files/(.*?)$')
|
||||||
|
|
||||||
|
|
||||||
|
def get_embedded_files(course: Course, html: str):
|
||||||
|
attached_files = set()
|
||||||
|
file_matches = re.findall(HTML_ITEM_ATTACHED_FILE_RE, html)
|
||||||
|
for match in file_matches:
|
||||||
|
file_id = re.match(CANVAS_API_FILE_ID_RE, match)
|
||||||
|
if file_id:
|
||||||
|
try:
|
||||||
|
canvas_file = course.get_file(file_id.group(1))
|
||||||
|
attached_files.add(canvas_file)
|
||||||
|
except canvasapi.exceptions.ResourceDoesNotExist:
|
||||||
|
continue
|
||||||
|
return attached_files
|
|
@ -1,10 +1,13 @@
|
||||||
|
from http.cookiejar import MozillaCookieJar
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalConsts:
|
||||||
# Directory in which to download course information to (will be created if not present)
|
# Directory in which to download course information to (will be created if not present)
|
||||||
OUTPUT_LOCATION = Path("./output").resolve().expanduser().absolute()
|
OUTPUT_LOCATION = Path("./output").resolve().expanduser().absolute()
|
||||||
|
|
||||||
# List of Course IDs that should be skipped (need to be integers)
|
# List of Course IDs that should be skipped (need to be integers)
|
||||||
COURSES_TO_SKIP = [288290, 512033]
|
COURSES_TO_SKIP = []
|
||||||
|
|
||||||
DATE_TEMPLATE = "%B %d, %Y %I:%M %p"
|
DATE_TEMPLATE = "%B %d, %Y %I:%M %p"
|
||||||
|
|
||||||
|
@ -12,3 +15,14 @@ DATE_TEMPLATE = "%B %d, %Y %I:%M %p"
|
||||||
# Applies to modules, assignments, announcements, and discussions
|
# Applies to modules, assignments, announcements, and discussions
|
||||||
# If a folder exceeds this limit, a "-" will be added to the end to indicate it was shortened ("..." not valid)
|
# If a folder exceeds this limit, a "-" will be added to the end to indicate it was shortened ("..." not valid)
|
||||||
MAX_FOLDER_NAME_SIZE = 70
|
MAX_FOLDER_NAME_SIZE = 70
|
||||||
|
|
||||||
|
COOKIES_PATH = ""
|
||||||
|
|
||||||
|
COOKIE_JAR = MozillaCookieJar()
|
||||||
|
|
||||||
|
API_URL = ""
|
||||||
|
API_KEY = ""
|
||||||
|
USER_ID = ""
|
||||||
|
|
||||||
|
|
||||||
|
global_consts = GlobalConsts()
|
||||||
|
|
|
@ -1,26 +1,20 @@
|
||||||
import os
|
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from http.cookiejar import MozillaCookieJar
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import canvasapi
|
import canvasapi
|
||||||
import requests
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from module.const import OUTPUT_LOCATION, MAX_FOLDER_NAME_SIZE
|
from module.api.file import get_embedded_files
|
||||||
|
from module.const import global_consts
|
||||||
from module.helpers import make_valid_filename, make_valid_folder_path, shorten_file_name
|
from module.helpers import make_valid_filename, make_valid_folder_path, shorten_file_name
|
||||||
|
from module.items import CanvasCourse, jsonify_anything
|
||||||
from module.singlefile import download_page
|
from module.singlefile import download_page
|
||||||
from module.threading import download_assignment, download_module_item
|
from module.threading import download_assignment, download_module_item
|
||||||
|
|
||||||
|
|
||||||
def download_course_files(course, course_view):
|
def download_course_files(course, course_view):
|
||||||
# file full_name starts with "course files"
|
dl_dir = global_consts.OUTPUT_LOCATION / course_view.term / course_view.name
|
||||||
dl_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name)
|
dl_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Create directory if not present
|
|
||||||
if not os.path.exists(dl_dir):
|
|
||||||
os.makedirs(dl_dir)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
files = list(course.get_files())
|
files = list(course.get_files())
|
||||||
|
@ -31,205 +25,109 @@ def download_course_files(course, course_view):
|
||||||
for file in tqdm(files, desc='Downloading Files'):
|
for file in tqdm(files, desc='Downloading Files'):
|
||||||
try:
|
try:
|
||||||
file_folder = course.get_folder(file.folder_id)
|
file_folder = course.get_folder(file.folder_id)
|
||||||
|
folder_dl_dir = dl_dir / make_valid_folder_path(file_folder.full_name)
|
||||||
folder_dl_dir = os.path.join(dl_dir, make_valid_folder_path(file_folder.full_name))
|
folder_dl_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
dl_path = folder_dl_dir / make_valid_filename(str(file.display_name))
|
||||||
if not os.path.exists(folder_dl_dir):
|
|
||||||
os.makedirs(folder_dl_dir)
|
|
||||||
|
|
||||||
dl_path = os.path.join(folder_dl_dir, make_valid_filename(str(file.display_name)))
|
|
||||||
|
|
||||||
# Download file if it doesn't already exist
|
|
||||||
if not os.path.exists(dl_path):
|
|
||||||
# print('Downloading: {}'.format(dl_path))
|
|
||||||
file.download(dl_path)
|
file.download(dl_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
tqdm.write(f"Skipping {file.display_name} - {e}")
|
tqdm.write(f"Skipping {file.display_name} - {e}")
|
||||||
|
|
||||||
|
|
||||||
def download_course_discussion_pages(api_url, course_view, cookies_path):
|
def download_course_discussion_pages(resolved_course: CanvasCourse):
|
||||||
if cookies_path == "" or len(course_view.discussions) == 0:
|
if not len(resolved_course.discussions):
|
||||||
return
|
return
|
||||||
|
|
||||||
base_discussion_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "discussions")
|
base_discussion_dir = global_consts.OUTPUT_LOCATION / resolved_course.term / resolved_course.name / 'discussions'
|
||||||
if not os.path.exists(base_discussion_dir):
|
base_discussion_dir.mkdir(parents=True, exist_ok=True)
|
||||||
os.makedirs(base_discussion_dir)
|
|
||||||
|
|
||||||
discussion_list_dir = os.path.join(base_discussion_dir, "discussion_list.html")
|
# (base_discussion_dir / 'discussions.json').write_text(jsonify_anything(resolved_course.discussions))
|
||||||
|
download_page(global_consts.API_URL + "/courses/" + str(resolved_course.course_id) + "/discussion_topics/", base_discussion_dir, "discussions.html")
|
||||||
|
|
||||||
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
|
for discussion in tqdm(list(resolved_course.discussions), desc='Downloading Discussions'):
|
||||||
if not os.path.exists(discussion_list_dir):
|
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html")
|
|
||||||
|
|
||||||
for discussion in tqdm(list(course_view.discussions), desc='Downloading Discussions'):
|
|
||||||
discussion_title = make_valid_filename(str(discussion.title))
|
discussion_title = make_valid_filename(str(discussion.title))
|
||||||
discussion_title = shorten_file_name(discussion_title, len(discussion_title) - MAX_FOLDER_NAME_SIZE)
|
discussion_title = shorten_file_name(discussion_title, len(discussion_title) - global_consts.MAX_FOLDER_NAME_SIZE)
|
||||||
discussion_dir = os.path.join(base_discussion_dir, discussion_title)
|
discussion_dir = base_discussion_dir / discussion_title
|
||||||
|
|
||||||
if discussion.url == "":
|
if not discussion.url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not os.path.exists(discussion_dir):
|
discussion_dir.mkdir(parents=True, exist_ok=True)
|
||||||
os.makedirs(discussion_dir)
|
|
||||||
|
for file in get_embedded_files(resolved_course.course, discussion.body):
|
||||||
|
file.download(discussion_dir / file.display_name)
|
||||||
|
|
||||||
# Downloads each page that a discussion takes.
|
|
||||||
for i in range(discussion.amount_pages):
|
for i in range(discussion.amount_pages):
|
||||||
filename = "discussion_" + str(i + 1) + ".html"
|
filename = "discussion_" + str(i + 1) + ".html"
|
||||||
discussion_page_dir = os.path.join(discussion_dir, filename)
|
download_page(discussion.url + "/page-" + str(i + 1), discussion_dir, filename)
|
||||||
|
|
||||||
# Download assignment page, this usually has instructions and etc.
|
|
||||||
if not os.path.exists(discussion_page_dir):
|
|
||||||
download_page(discussion.url + "/page-" + str(i + 1), cookies_path, discussion_dir, filename)
|
|
||||||
|
|
||||||
|
|
||||||
def download_assignment_pages(api_url, course_view, cookies_path, cookie_jar: MozillaCookieJar):
|
def download_assignments(course_view: CanvasCourse):
|
||||||
if cookies_path == "" or len(course_view.assignments) == 0:
|
if not len(course_view.assignments):
|
||||||
return
|
return
|
||||||
|
|
||||||
base_assign_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "assignments")
|
base_assign_dir = global_consts.OUTPUT_LOCATION / course_view.term / course_view.name / 'assignments'
|
||||||
if not os.path.exists(base_assign_dir):
|
base_assign_dir.mkdir(parents=True, exist_ok=True)
|
||||||
os.makedirs(base_assign_dir)
|
|
||||||
|
|
||||||
assignment_list_path = os.path.join(base_assign_dir, "assignment_list.html")
|
# (base_assign_dir / 'assignments.json').write_text(jsonify_anything(course_view.assignments))
|
||||||
|
download_page(global_consts.API_URL + "/courses/" + str(course_view.course_id) + "/assignments/", base_assign_dir, "assignments.html")
|
||||||
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
|
|
||||||
if not os.path.exists(assignment_list_path):
|
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html")
|
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||||
download_func = partial(download_assignment, cookies_path, cookie_jar, base_assign_dir)
|
download_func = partial(download_assignment, base_assign_dir, course_view.course)
|
||||||
list(tqdm(executor.map(download_func, course_view.assignments), total=len(course_view.assignments), desc='Downloading Assignments'))
|
list(tqdm(executor.map(download_func, course_view.assignments), total=len(course_view.assignments), desc='Downloading Assignments'))
|
||||||
|
|
||||||
|
|
||||||
def download_course_announcement_pages(api_url, course_view, cookies_path):
|
def download_course_announcement_pages(resolved_course: CanvasCourse):
|
||||||
"""
|
if not len(resolved_course.announcements):
|
||||||
Download assignment list.
|
|
||||||
There's a chance this might be the course homepage if the course has the assignments page disabled.
|
|
||||||
:param api_url:
|
|
||||||
:param course_view:
|
|
||||||
:param cookies_path:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
|
|
||||||
if cookies_path == "" or len(course_view.announcements) == 0:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
base_announce_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "announcements")
|
base_announce_dir = global_consts.OUTPUT_LOCATION / resolved_course.term / resolved_course.name / 'announcements'
|
||||||
if not os.path.exists(base_announce_dir):
|
base_announce_dir.mkdir(parents=True, exist_ok=True)
|
||||||
os.makedirs(base_announce_dir)
|
|
||||||
announcement_list_dir = os.path.join(base_announce_dir, "announcement_list.html")
|
|
||||||
if not os.path.exists(announcement_list_dir):
|
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id) + "/announcements/", cookies_path, base_announce_dir, "announcement_list.html")
|
|
||||||
|
|
||||||
for announcements in tqdm(list(course_view.announcements), desc='Downloading Announcements'):
|
# (base_announce_dir / 'announcements.json').write_text(jsonify_anything(resolved_course.announcements))
|
||||||
announcements_title = make_valid_filename(str(announcements.title))
|
download_page(global_consts.API_URL + "/courses/" + str(resolved_course.course_id) + "/announcements/", base_announce_dir, "announcements.html")
|
||||||
announcements_title = shorten_file_name(announcements_title, len(announcements_title) - MAX_FOLDER_NAME_SIZE)
|
|
||||||
announce_dir = os.path.join(base_announce_dir, announcements_title)
|
|
||||||
|
|
||||||
if announcements.url == "":
|
for announcement in tqdm(list(resolved_course.announcements), desc='Downloading Announcements'):
|
||||||
|
announcements_title = make_valid_filename(str(announcement.title))
|
||||||
|
announcements_title = shorten_file_name(announcements_title, len(announcements_title) - global_consts.MAX_FOLDER_NAME_SIZE)
|
||||||
|
announce_dir = base_announce_dir / announcements_title
|
||||||
|
|
||||||
|
if not announcement.url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not os.path.exists(announce_dir):
|
announce_dir.mkdir(parents=True, exist_ok=True)
|
||||||
os.makedirs(announce_dir)
|
|
||||||
|
|
||||||
# Downloads each page that a discussion takes.
|
for file in get_embedded_files(resolved_course.course, announcement.body):
|
||||||
for i in range(announcements.amount_pages):
|
file.download(announce_dir / file.display_name)
|
||||||
|
|
||||||
|
for i in range(announcement.amount_pages):
|
||||||
filename = "announcement_" + str(i + 1) + ".html"
|
filename = "announcement_" + str(i + 1) + ".html"
|
||||||
announcement_page_dir = os.path.join(announce_dir, filename)
|
download_page(announcement.url + "/page-" + str(i + 1), announce_dir, filename)
|
||||||
|
|
||||||
# Download assignment page, this usually has instructions and etc.
|
|
||||||
if not os.path.exists(announcement_page_dir):
|
|
||||||
download_page(announcements.url + "/page-" + str(i + 1), cookies_path, announce_dir, filename)
|
|
||||||
|
|
||||||
|
|
||||||
def download_submission_attachments(course, course_view):
|
def download_course_home_page_html(course_view):
|
||||||
course_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name)
|
dl_dir = global_consts.OUTPUT_LOCATION / course_view.term / course_view.name
|
||||||
|
dl_dir.mkdir(parents=True, exist_ok=True)
|
||||||
# Create directory if not present
|
download_page(global_consts.API_URL + "/courses/" + str(course_view.course_id), dl_dir, "homepage.html")
|
||||||
if not os.path.exists(course_dir):
|
|
||||||
os.makedirs(course_dir)
|
|
||||||
|
|
||||||
for assignment in tqdm(list(course_view.assignments), desc='Downloading Submissions'):
|
|
||||||
for submission in assignment.submissions:
|
|
||||||
assignment_title = make_valid_filename(str(assignment.title))
|
|
||||||
assignment_title = shorten_file_name(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
|
|
||||||
attachment_dir = os.path.join(course_dir, "assignments", assignment_title)
|
|
||||||
if len(assignment.submissions) != 1:
|
|
||||||
attachment_dir = os.path.join(attachment_dir, str(submission.user_id))
|
|
||||||
if not os.path.exists(attachment_dir) and submission.attachments:
|
|
||||||
os.makedirs(attachment_dir)
|
|
||||||
for attachment in submission.attachments:
|
|
||||||
filepath = os.path.join(attachment_dir, make_valid_filename(str(attachment.id) + "_" + attachment.filename))
|
|
||||||
if not os.path.exists(filepath):
|
|
||||||
# print('Downloading attachment: {}'.format(filepath))
|
|
||||||
r = requests.get(attachment.url, allow_redirects=True)
|
|
||||||
with open(filepath, 'wb') as f:
|
|
||||||
f.write(r.content)
|
|
||||||
# else:
|
|
||||||
# print('File already exists: {}'.format(filepath))
|
|
||||||
|
|
||||||
|
|
||||||
def download_course_html(api_url, cookies_path):
|
def download_course_modules(course_view: CanvasCourse):
|
||||||
if cookies_path == "":
|
modules_dir = global_consts.OUTPUT_LOCATION / course_view.term / course_view.name / 'modules'
|
||||||
return
|
modules_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
course_dir = OUTPUT_LOCATION
|
# (modules_dir / 'modules.json').write_text(jsonify_anything(course_view.modules))
|
||||||
|
download_page(global_consts.API_URL + "/courses/" + str(course_view.course_id) + "/modules/", modules_dir, "modules.html")
|
||||||
if not os.path.exists(course_dir):
|
|
||||||
os.makedirs(course_dir)
|
|
||||||
|
|
||||||
course_list_path = os.path.join(course_dir, "course_list.html")
|
|
||||||
|
|
||||||
# Downloads the course list.
|
|
||||||
if not os.path.exists(course_list_path):
|
|
||||||
download_page(api_url + "/courses/", cookies_path, course_dir, "course_list.html")
|
|
||||||
|
|
||||||
|
|
||||||
def download_course_home_page_html(api_url, course_view, cookies_path):
|
|
||||||
if cookies_path == "":
|
|
||||||
return
|
|
||||||
|
|
||||||
dl_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name)
|
|
||||||
if not os.path.exists(dl_dir):
|
|
||||||
os.makedirs(dl_dir)
|
|
||||||
|
|
||||||
homepage_path = os.path.join(dl_dir, "homepage.html")
|
|
||||||
|
|
||||||
# Downloads the course home page.
|
|
||||||
if not os.path.exists(homepage_path):
|
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id), cookies_path, dl_dir, "homepage.html")
|
|
||||||
|
|
||||||
|
|
||||||
def download_course_module_pages(api_url, course_view, cookies_path):
|
|
||||||
if cookies_path == "" or len(course_view.modules) == 0:
|
|
||||||
return
|
|
||||||
|
|
||||||
modules_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "modules")
|
|
||||||
if not os.path.exists(modules_dir):
|
|
||||||
os.makedirs(modules_dir)
|
|
||||||
|
|
||||||
# Downloads the modules page (possible this is disabled by the teacher)
|
|
||||||
module_list_dir = Path(str(modules_dir), "modules_list.html")
|
|
||||||
if not os.path.exists(module_list_dir):
|
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id) + "/modules/", cookies_path, modules_dir, "modules_list.html")
|
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||||
for module in tqdm(list(course_view.modules), desc='Downloading Modules'):
|
for module in tqdm(list(course_view.modules), desc='Downloading Modules'):
|
||||||
bar = tqdm(list(module.items), leave=False, desc=module.name)
|
bar = tqdm(list(module.items), leave=False, desc=module.module.name)
|
||||||
futures = [executor.submit(download_module_item, module, item, modules_dir, cookies_path) for item in module.items]
|
futures = [executor.submit(download_module_item, course_view.course, module, item, modules_dir) for item in module.items]
|
||||||
for _ in as_completed(futures):
|
for _ in as_completed(futures):
|
||||||
bar.update()
|
bar.update()
|
||||||
bar.close()
|
bar.close()
|
||||||
|
|
||||||
|
|
||||||
def download_course_grades_page(api_url, course_view, cookies_path):
|
def download_course_grades_page(course_view: CanvasCourse):
|
||||||
if cookies_path == "":
|
dl_dir = global_consts.OUTPUT_LOCATION / course_view.term / course_view.name
|
||||||
return
|
|
||||||
|
|
||||||
dl_dir = Path(OUTPUT_LOCATION, course_view.term, course_view.name)
|
|
||||||
dl_dir.mkdir(parents=True, exist_ok=True)
|
dl_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
api_target = f'{global_consts.API_URL}/courses/{course_view.course_id}/grades'
|
||||||
# TODO: command line arg to prohibit overwrite. Default should overwrite
|
download_page(api_target, dl_dir, "grades.html")
|
||||||
if not (dl_dir / "grades.html").exists():
|
|
||||||
api_target = f'{api_url}/courses/{course_view.course_id}/grades'
|
|
||||||
download_page(api_target, cookies_path, dl_dir, "grades.html")
|
|
||||||
|
|
|
@ -1,113 +1,51 @@
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
from http.cookiejar import MozillaCookieJar
|
from typing import List
|
||||||
|
|
||||||
|
import canvasapi
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
import requests
|
from canvasapi.discussion_topic import DiscussionTopic
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from module.const import DATE_TEMPLATE, OUTPUT_LOCATION, MAX_FOLDER_NAME_SIZE
|
from module.const import global_consts
|
||||||
from module.helpers import make_valid_filename, shorten_file_name
|
from module.items import CanvasDiscussion, CanvasPage, CanvasTopicEntry, CanvasTopicReply, CanvasModule
|
||||||
from module.items import AssignmentView, AttachmentView, DiscussionView, CanvasModuleItem, CanvasModule, PageView, SubmissionView, TopicEntryView, TopicReplyView
|
|
||||||
|
|
||||||
MODULE_ITEM_ATTACHED_FILE_RE = re.compile(r'<a .*? data-api-endpoint="(.*?)" .*?>')
|
HTML_ITEM_ATTACHED_FILE_RE = re.compile(r'<a .*? data-api-endpoint=\"(.*?)\" .*?>')
|
||||||
CANVAS_API_FILE_ID_RE = re.compile(r'.*?/api/v1/courses/.*?/files/(.*?)$')
|
CANVAS_API_FILE_ID_RE = re.compile(r'.*?/api/v1/courses/.*?/files/(.*?)$')
|
||||||
|
|
||||||
|
|
||||||
def find_course_modules(course, course_view):
|
def find_course_modules(course) -> List[CanvasModule]:
|
||||||
modules_dir = os.path.join(OUTPUT_LOCATION, course_view.term, course_view.name, "modules")
|
# modules_dir = os.path.join(global_consts.OUTPUT_LOCATION, course_view.term, course_view.name, "modules")
|
||||||
|
|
||||||
# Create modules directory if not present
|
results = []
|
||||||
if not os.path.exists(modules_dir):
|
|
||||||
os.makedirs(modules_dir)
|
|
||||||
|
|
||||||
module_views = []
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
modules = list(course.get_modules())
|
modules = list(course.get_modules())
|
||||||
|
|
||||||
for module in tqdm(modules, desc='Fetching Modules'):
|
for module in tqdm(modules, desc='Fetching Modules'):
|
||||||
module_view = CanvasModule()
|
|
||||||
module_view.id = module.id if hasattr(module, "id") else ""
|
|
||||||
module_view.name = str(module.name) if hasattr(module, "name") else ""
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Get items for each module
|
resolved_module = CanvasModule(module)
|
||||||
for item in module.get_module_items():
|
for item in resolved_module.items:
|
||||||
module_item = CanvasModuleItem()
|
if item.item.type == 'Page':
|
||||||
module_item.id = item.id if hasattr(item, "id") else 0
|
page = course.get_page(item.item.page_url)
|
||||||
module_item.title = str(item.title).replace(' ', ' ') if hasattr(item, "title") else ""
|
item.page = page
|
||||||
module_item.content_type = str(item.type) if hasattr(item, "type") else ""
|
|
||||||
module_item.url = str(item.html_url) if hasattr(item, "html_url") else ""
|
|
||||||
module_item.external_url = str(item.external_url) if hasattr(item, "external_url") else ""
|
|
||||||
|
|
||||||
if module_item.content_type == "File":
|
|
||||||
# If problems arise due to long pathnames, changing module.name to module.id might help
|
|
||||||
# A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path)
|
|
||||||
module_name = make_valid_filename(str(module.name))
|
|
||||||
module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
|
|
||||||
module_dir = os.path.join(modules_dir, module_name, "files")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Create directory for current module if not present
|
|
||||||
if not os.path.exists(module_dir):
|
|
||||||
os.makedirs(module_dir)
|
|
||||||
|
|
||||||
# Get the file object
|
|
||||||
module_file = course.get_file(str(item.content_id))
|
|
||||||
|
|
||||||
# Create path for module file download
|
|
||||||
module_file_path = os.path.join(module_dir, make_valid_filename(str(module_file.display_name)))
|
|
||||||
|
|
||||||
# Download file if it doesn't already exist
|
|
||||||
if not os.path.exists(module_file_path):
|
|
||||||
module_file.download(module_file_path)
|
|
||||||
except Exception as e:
|
|
||||||
tqdm.write(f"Skipping module file download that gave the following error: {e} - {item}")
|
|
||||||
|
|
||||||
elif item.type == 'Page':
|
|
||||||
page = course.get_page(item.page_url)
|
|
||||||
if hasattr(page, 'body'):
|
if hasattr(page, 'body'):
|
||||||
# Extract the attached files from the item's HTML.
|
# Extract the attached files from the item's HTML.
|
||||||
file_matches = re.findall(MODULE_ITEM_ATTACHED_FILE_RE, page.body)
|
file_matches = re.findall(HTML_ITEM_ATTACHED_FILE_RE, page.body)
|
||||||
for match in file_matches:
|
for match in file_matches:
|
||||||
file_id = re.match(CANVAS_API_FILE_ID_RE, match)
|
file_id = re.match(CANVAS_API_FILE_ID_RE, match)
|
||||||
if file_id:
|
if file_id:
|
||||||
|
try:
|
||||||
# Grab the metadata from the API.
|
# Grab the metadata from the API.
|
||||||
canvas_file = course.get_file(file_id.group(1))
|
canvas_file = course.get_file(file_id.group(1))
|
||||||
module_item.attached_files.add(canvas_file)
|
item.attached_files.add(canvas_file)
|
||||||
|
except canvasapi.exceptions.ResourceDoesNotExist:
|
||||||
module_view.items.append(module_item)
|
continue
|
||||||
|
results.append(resolved_module)
|
||||||
|
except Exception as e:
|
||||||
|
tqdm.write(f"Skipping module file download that gave the following error: {e}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
tqdm.write(f"Skipping module file download that gave the following error: {e}")
|
tqdm.write(f"Skipping module file download that gave the following error: {e}")
|
||||||
|
|
||||||
module_views.append(module_view)
|
return results
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print("Skipping entire module that gave the following error:")
|
|
||||||
print(e)
|
|
||||||
|
|
||||||
return module_views
|
|
||||||
|
|
||||||
|
|
||||||
def get_extra_assignment_files(html, cookie_jar: MozillaCookieJar):
|
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
|
||||||
urls = [a['data-api-endpoint'] for a in soup.find_all('a', {'data-api-returntype': 'File'})]
|
|
||||||
|
|
||||||
s = requests.Session()
|
|
||||||
for cookie in cookie_jar:
|
|
||||||
s.cookies.set(cookie.name, cookie.value)
|
|
||||||
|
|
||||||
extra_files = []
|
|
||||||
for item in urls:
|
|
||||||
r = s.get(item)
|
|
||||||
if r.status_code != 200:
|
|
||||||
continue
|
|
||||||
j = r.json()
|
|
||||||
extra_files.append((j['display_name'], j['url']))
|
|
||||||
|
|
||||||
return extra_files
|
|
||||||
|
|
||||||
|
|
||||||
def get_course_page_urls(course):
|
def get_course_page_urls(course):
|
||||||
|
@ -132,18 +70,18 @@ def find_course_pages(course):
|
||||||
|
|
||||||
for url in tqdm(page_urls, desc='Fetching Pages'):
|
for url in tqdm(page_urls, desc='Fetching Pages'):
|
||||||
page = course.get_page(url)
|
page = course.get_page(url)
|
||||||
page_view = PageView()
|
page_view = CanvasPage()
|
||||||
page_view.id = page.id if hasattr(page, "id") else 0
|
page_view.id = page.id if hasattr(page, "id") else 0
|
||||||
page_view.title = str(page.title).replace(' ', ' ') if hasattr(page, "title") else ""
|
page_view.title = str(page.title).replace(' ', ' ') if hasattr(page, "title") else ""
|
||||||
page_view.body = str(page.body) if hasattr(page, "body") else ""
|
page_view.body = str(page.body) if hasattr(page, "body") else ""
|
||||||
|
|
||||||
if hasattr(page, "created_at"):
|
if hasattr(page, "created_at"):
|
||||||
page_view.created_date = dateutil.parser.parse(page.created_at).strftime(DATE_TEMPLATE)
|
page_view.created_date = dateutil.parser.parse(page.created_at).strftime(global_consts.DATE_TEMPLATE)
|
||||||
else:
|
else:
|
||||||
page_view.created_date = ''
|
page_view.created_date = ''
|
||||||
|
|
||||||
if hasattr(page, "updated_at"):
|
if hasattr(page, "updated_at"):
|
||||||
page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime(DATE_TEMPLATE)
|
page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime(global_consts.DATE_TEMPLATE)
|
||||||
else:
|
else:
|
||||||
page_view.last_updated_date = ''
|
page_view.last_updated_date = ''
|
||||||
|
|
||||||
|
@ -154,83 +92,31 @@ def find_course_pages(course):
|
||||||
return page_views
|
return page_views
|
||||||
|
|
||||||
|
|
||||||
def find_course_assignments(course, user_id):
|
def find_course_assignments(course):
|
||||||
assignment_views = []
|
results = []
|
||||||
|
|
||||||
# Get all assignments
|
|
||||||
assignments = list(course.get_assignments())
|
assignments = list(course.get_assignments())
|
||||||
|
|
||||||
for assignment in tqdm(assignments, desc='Fetching Assignments'):
|
for assignment in tqdm(assignments, desc='Fetching Assignments'):
|
||||||
assignment_view = AssignmentView()
|
# Have to re-define the object because the `/api/v1/courses/:course_id/assignments` endpoint is sometimes outdated.
|
||||||
assignment_view.id = assignment.id if hasattr(assignment, "id") else ""
|
# The endpoint `/api/v1/courses/:course_id/assignments/:id` has the most up to date data.
|
||||||
assignment_view.title = make_valid_filename(str(assignment.name).replace(' ', ' ')) if hasattr(assignment, "name") else ""
|
assignment = course.get_assignment(assignment.id)
|
||||||
assignment_view.description = str(assignment.description) if hasattr(assignment, "description") else ""
|
results.append(assignment)
|
||||||
assignment_view.assigned_date = assignment.created_at_date.strftime(DATE_TEMPLATE) if hasattr(assignment, "created_at_date") else ""
|
return results
|
||||||
assignment_view.due_date = assignment.due_at_date.strftime(DATE_TEMPLATE) if hasattr(assignment, "due_at_date") else ""
|
|
||||||
assignment_view.html_url = assignment.html_url if hasattr(assignment, "html_url") else ""
|
|
||||||
assignment_view.ext_url = str(assignment.url) if hasattr(assignment, "url") else ""
|
|
||||||
assignment_view.updated_url = str(assignment.submissions_download_url).split("submissions?")[0] if hasattr(assignment, "submissions_download_url") else ""
|
|
||||||
|
|
||||||
# Download submission for this user only
|
|
||||||
submissions = [assignment.get_submission(user_id)]
|
|
||||||
if not len(submissions):
|
|
||||||
raise IndexError(f'No submissions found for assignment: {vars(assignment)}')
|
|
||||||
|
|
||||||
try:
|
|
||||||
for submission in submissions:
|
|
||||||
sub_view = SubmissionView()
|
|
||||||
sub_view.id = submission.id if hasattr(submission, "id") else 0
|
|
||||||
sub_view.grade = str(submission.grade) if hasattr(submission, "grade") else ""
|
|
||||||
sub_view.raw_score = str(submission.score) if hasattr(submission, "score") else ""
|
|
||||||
sub_view.total_possible_points = str(assignment.points_possible) if hasattr(assignment, "points_possible") else ""
|
|
||||||
sub_view.submission_comments = str(submission.submission_comments) if hasattr(submission, "submission_comments") else ""
|
|
||||||
sub_view.attempt = submission.attempt if hasattr(submission, "attempt") and submission.attempt is not None else 0
|
|
||||||
sub_view.user_id = str(submission.user_id) if hasattr(submission, "user_id") else ""
|
|
||||||
sub_view.preview_url = str(submission.preview_url) if hasattr(submission, "preview_url") else ""
|
|
||||||
sub_view.ext_url = str(submission.url) if hasattr(submission, "url") else ""
|
|
||||||
|
|
||||||
try:
|
|
||||||
submission.attachments
|
|
||||||
except AttributeError:
|
|
||||||
print('No attachments')
|
|
||||||
else:
|
|
||||||
for attachment in submission.attachments:
|
|
||||||
attach_view = AttachmentView()
|
|
||||||
attach_view.url = attachment.url
|
|
||||||
attach_view.id = attachment.id
|
|
||||||
attach_view.filename = attachment.filename
|
|
||||||
sub_view.attachments.append(attach_view)
|
|
||||||
assignment_view.submissions.append(sub_view)
|
|
||||||
except Exception as e:
|
|
||||||
raise
|
|
||||||
# print("Skipping submission that gave the following error:")
|
|
||||||
# print(e)
|
|
||||||
|
|
||||||
assignment_views.append(assignment_view)
|
|
||||||
|
|
||||||
return assignment_views
|
|
||||||
|
|
||||||
|
|
||||||
def find_course_announcements(course):
|
def find_course_announcements(course):
|
||||||
announcement_views = []
|
announcement_views = []
|
||||||
|
announcements: List[DiscussionTopic] = list(course.get_discussion_topics(only_announcements=True))
|
||||||
# try:
|
|
||||||
announcements = list(course.get_discussion_topics(only_announcements=True))
|
|
||||||
|
|
||||||
for announcement in tqdm(announcements, desc='Fetching Announcements'):
|
for announcement in tqdm(announcements, desc='Fetching Announcements'):
|
||||||
discussion_view = get_discussion_view(announcement)
|
discussion_view = get_discussion_view(announcement)
|
||||||
|
|
||||||
announcement_views.append(discussion_view)
|
announcement_views.append(discussion_view)
|
||||||
# except Exception as e:
|
|
||||||
# print("Skipping announcement that gave the following error:")
|
|
||||||
# print(e)
|
|
||||||
|
|
||||||
return announcement_views
|
return announcement_views
|
||||||
|
|
||||||
|
|
||||||
def get_discussion_view(discussion_topic):
|
def get_discussion_view(discussion_topic):
|
||||||
# Create discussion view
|
# Create discussion view
|
||||||
discussion_view = DiscussionView()
|
discussion_view = CanvasDiscussion(discussion_topic)
|
||||||
discussion_view.id = discussion_topic.id if hasattr(discussion_topic, "id") else 0
|
discussion_view.id = discussion_topic.id if hasattr(discussion_topic, "id") else 0
|
||||||
discussion_view.title = str(discussion_topic.title).replace(' ', ' ') if hasattr(discussion_topic, "title") else ""
|
discussion_view.title = str(discussion_topic.title).replace(' ', ' ') if hasattr(discussion_topic, "title") else ""
|
||||||
discussion_view.author = str(discussion_topic.user_name) if hasattr(discussion_topic, "user_name") else ""
|
discussion_view.author = str(discussion_topic.user_name) if hasattr(discussion_topic, "user_name") else ""
|
||||||
|
@ -250,7 +136,7 @@ def get_discussion_view(discussion_topic):
|
||||||
topic_entries_counter += 1
|
topic_entries_counter += 1
|
||||||
|
|
||||||
# Create new discussion view for the topic_entry
|
# Create new discussion view for the topic_entry
|
||||||
topic_entry_view = TopicEntryView()
|
topic_entry_view = CanvasTopicEntry()
|
||||||
topic_entry_view.id = topic_entry.id if hasattr(topic_entry, "id") else 0
|
topic_entry_view.id = topic_entry.id if hasattr(topic_entry, "id") else 0
|
||||||
topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else ""
|
topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else ""
|
||||||
topic_entry_view.posted_date = topic_entry.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_entry, "created_at_date") else ""
|
topic_entry_view.posted_date = topic_entry.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_entry, "created_at_date") else ""
|
||||||
|
@ -262,7 +148,7 @@ def get_discussion_view(discussion_topic):
|
||||||
try:
|
try:
|
||||||
for topic_reply in topic_entry_replies:
|
for topic_reply in topic_entry_replies:
|
||||||
# Create new topic reply view
|
# Create new topic reply view
|
||||||
topic_reply_view = TopicReplyView()
|
topic_reply_view = CanvasTopicReply()
|
||||||
topic_reply_view.id = topic_reply.id if hasattr(topic_reply, "id") else 0
|
topic_reply_view.id = topic_reply.id if hasattr(topic_reply, "id") else 0
|
||||||
topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else ""
|
topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else ""
|
||||||
topic_reply_view.posted_date = topic_reply.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_reply, "created_at_date") else ""
|
topic_reply_view.posted_date = topic_reply.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_reply, "created_at_date") else ""
|
||||||
|
@ -286,15 +172,8 @@ def get_discussion_view(discussion_topic):
|
||||||
|
|
||||||
def find_course_discussions(course):
|
def find_course_discussions(course):
|
||||||
discussion_views = []
|
discussion_views = []
|
||||||
|
|
||||||
# try:
|
|
||||||
discussion_topics = list(course.get_discussion_topics())
|
discussion_topics = list(course.get_discussion_topics())
|
||||||
|
|
||||||
for discussion_topic in tqdm(discussion_topics, desc='Fetching Discussions'):
|
for discussion_topic in tqdm(discussion_topics, desc='Fetching Discussions'):
|
||||||
discussion_view = get_discussion_view(discussion_topic)
|
discussion_view = get_discussion_view(discussion_topic)
|
||||||
discussion_views.append(discussion_view)
|
discussion_views.append(discussion_view)
|
||||||
# except Exception as e:
|
|
||||||
# print("Skipping discussion that gave the following error:")
|
|
||||||
# print(e)
|
|
||||||
|
|
||||||
return discussion_views
|
return discussion_views
|
||||||
|
|
102
module/items.py
102
module/items.py
|
@ -1,29 +1,64 @@
|
||||||
|
import json
|
||||||
from http.cookiejar import MozillaCookieJar
|
from http.cookiejar import MozillaCookieJar
|
||||||
|
from typing import List, Any
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from canvasapi.assignment import Assignment
|
||||||
|
from canvasapi.course import Course
|
||||||
from canvasapi.file import File
|
from canvasapi.file import File
|
||||||
|
from canvasapi.module import ModuleItem, Module
|
||||||
|
from canvasapi.page import Page
|
||||||
|
|
||||||
from module.helpers import make_valid_filename
|
from module.helpers import make_valid_filename
|
||||||
|
|
||||||
|
|
||||||
|
def varsify(item) -> Any:
|
||||||
|
result = {}
|
||||||
|
try:
|
||||||
|
if isinstance(item, (str, int, float, bool)):
|
||||||
|
return item
|
||||||
|
elif isinstance(item, (list, set)):
|
||||||
|
l_result = []
|
||||||
|
for i, x in enumerate(item):
|
||||||
|
l_result.append(varsify(x))
|
||||||
|
return l_result
|
||||||
|
else:
|
||||||
|
for k, v in vars(item).items():
|
||||||
|
if isinstance(v, dict):
|
||||||
|
result[k] = varsify(v)
|
||||||
|
elif isinstance(v, list):
|
||||||
|
result[k] = []
|
||||||
|
for i, x in enumerate(v):
|
||||||
|
result[k].insert(i, varsify(x))
|
||||||
|
else:
|
||||||
|
if not k.startswith('_'):
|
||||||
|
result[k] = varsify(v)
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
def jsonify_anything(item):
|
||||||
|
return json.dumps(varsify(item), indent=4, sort_keys=True, default=str)
|
||||||
|
|
||||||
|
|
||||||
class CanvasModuleItem:
|
class CanvasModuleItem:
|
||||||
def __init__(self):
|
def __init__(self, module_item: ModuleItem):
|
||||||
self.id = 0
|
self.item = module_item
|
||||||
self.title = ""
|
|
||||||
self.content_type = ""
|
|
||||||
self.url = ""
|
|
||||||
self.external_url = ""
|
|
||||||
self.attached_files: set[File] = set()
|
self.attached_files: set[File] = set()
|
||||||
|
self.page: Page
|
||||||
|
|
||||||
|
|
||||||
class CanvasModule:
|
class CanvasModule:
|
||||||
def __init__(self):
|
def __init__(self, module: Module):
|
||||||
self.id = 0
|
self.module = module
|
||||||
self.name = ""
|
self.items: List[CanvasModuleItem] = []
|
||||||
self.items = []
|
for item in module.get_module_items():
|
||||||
|
i = self.module.get_module_item(item.id)
|
||||||
|
self.items.append(CanvasModuleItem(i))
|
||||||
|
|
||||||
|
|
||||||
class PageView:
|
class CanvasPage:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.id = 0
|
self.id = 0
|
||||||
self.title = ""
|
self.title = ""
|
||||||
|
@ -32,7 +67,7 @@ class PageView:
|
||||||
self.last_updated_date = ""
|
self.last_updated_date = ""
|
||||||
|
|
||||||
|
|
||||||
class TopicReplyView:
|
class CanvasTopicReply:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.id = 0
|
self.id = 0
|
||||||
self.author = ""
|
self.author = ""
|
||||||
|
@ -40,7 +75,7 @@ class TopicReplyView:
|
||||||
self.body = ""
|
self.body = ""
|
||||||
|
|
||||||
|
|
||||||
class TopicEntryView:
|
class CanvasTopicEntry:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.id = 0
|
self.id = 0
|
||||||
self.author = ""
|
self.author = ""
|
||||||
|
@ -49,8 +84,9 @@ class TopicEntryView:
|
||||||
self.topic_replies = []
|
self.topic_replies = []
|
||||||
|
|
||||||
|
|
||||||
class DiscussionView:
|
class CanvasDiscussion:
|
||||||
def __init__(self):
|
def __init__(self, discussion):
|
||||||
|
self.discussion = discussion
|
||||||
self.id = 0
|
self.id = 0
|
||||||
self.title = ""
|
self.title = ""
|
||||||
self.author = ""
|
self.author = ""
|
||||||
|
@ -61,7 +97,7 @@ class DiscussionView:
|
||||||
self.amount_pages = 0
|
self.amount_pages = 0
|
||||||
|
|
||||||
|
|
||||||
class SubmissionView:
|
class CanvasSubmission:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.id = 0
|
self.id = 0
|
||||||
self.attachments = []
|
self.attachments = []
|
||||||
|
@ -75,41 +111,25 @@ class SubmissionView:
|
||||||
self.ext_url = ""
|
self.ext_url = ""
|
||||||
|
|
||||||
|
|
||||||
class AttachmentView:
|
|
||||||
def __init__(self):
|
|
||||||
self.id = 0
|
|
||||||
self.filename = ""
|
|
||||||
self.url = ""
|
|
||||||
|
|
||||||
|
|
||||||
class AssignmentView:
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.id = 0
|
|
||||||
self.title = ""
|
|
||||||
self.description = ""
|
|
||||||
self.assigned_date = ""
|
|
||||||
self.due_date = ""
|
|
||||||
self.submissions = []
|
|
||||||
self.html_url = ""
|
|
||||||
self.ext_url = ""
|
|
||||||
self.updated_url = ""
|
|
||||||
|
|
||||||
|
|
||||||
class CanvasCourse:
|
class CanvasCourse:
|
||||||
def __init__(self, course):
|
def __init__(self, course):
|
||||||
|
self.course: Course = course
|
||||||
self.course_id = course.id if hasattr(course, "id") else 0
|
self.course_id = course.id if hasattr(course, "id") else 0
|
||||||
self.term = make_valid_filename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "")
|
self.term = make_valid_filename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "")
|
||||||
self.course_code = make_valid_filename(course.course_code if hasattr(course, "course_code") else "")
|
self.course_code = make_valid_filename(course.course_code if hasattr(course, "course_code") else "")
|
||||||
|
|
||||||
|
if hasattr(course, 'original_name'):
|
||||||
|
self.name = course.original_name
|
||||||
|
else:
|
||||||
self.name = course.name if hasattr(course, "name") else ""
|
self.name = course.name if hasattr(course, "name") else ""
|
||||||
|
|
||||||
self.course_code = self.course_code.replace(' ', ' ')
|
self.course_code = self.course_code.replace(' ', ' ')
|
||||||
self.name = self.name.replace(' ', ' ')
|
self.name = self.name.replace(' ', ' ')
|
||||||
|
|
||||||
self.assignments = []
|
self.assignments: List[Assignment] = []
|
||||||
self.announcements = []
|
self.announcements: List[CanvasDiscussion] = []
|
||||||
self.discussions = []
|
self.discussions: List[CanvasDiscussion] = []
|
||||||
self.modules = []
|
self.modules: List[CanvasModule] = []
|
||||||
|
|
||||||
def test_course(self, base_url: str, cookie_jar: MozillaCookieJar):
|
def test_course(self, base_url: str, cookie_jar: MozillaCookieJar):
|
||||||
s = requests.Session()
|
s = requests.Session()
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from subprocess import run
|
from subprocess import run
|
||||||
|
|
||||||
|
from .const import global_consts
|
||||||
|
|
||||||
SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file"
|
SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file"
|
||||||
|
|
||||||
# TODO: have this be specified by a required arg.
|
# TODO: have this be specified by a required arg.
|
||||||
|
@ -11,7 +13,7 @@ def add_quotes(s):
|
||||||
return "\"" + str(s).strip("\"") + "\""
|
return "\"" + str(s).strip("\"") + "\""
|
||||||
|
|
||||||
|
|
||||||
def download_page(url, cookies_path, output_path, output_name_template=""):
|
def download_page(url, output_path, output_name_template=""):
|
||||||
# TODO: we can probably safely exclude pages that match the regex r'/external_tools/retrieve\?'
|
# TODO: we can probably safely exclude pages that match the regex r'/external_tools/retrieve\?'
|
||||||
|
|
||||||
if output_name_template and Path(output_path, output_name_template).exists():
|
if output_name_template and Path(output_path, output_name_template).exists():
|
||||||
|
@ -21,7 +23,7 @@ def download_page(url, cookies_path, output_path, output_name_template=""):
|
||||||
args = [
|
args = [
|
||||||
add_quotes(SINGLEFILE_BINARY_PATH),
|
add_quotes(SINGLEFILE_BINARY_PATH),
|
||||||
"--browser-executable-path=" + add_quotes(CHROME_PATH.strip("\"")),
|
"--browser-executable-path=" + add_quotes(CHROME_PATH.strip("\"")),
|
||||||
"--browser-cookies-file=" + add_quotes(cookies_path),
|
"--browser-cookies-file=" + add_quotes(global_consts.COOKIES_PATH),
|
||||||
"--output-directory=" + add_quotes(output_path),
|
"--output-directory=" + add_quotes(output_path),
|
||||||
add_quotes(url)
|
add_quotes(url)
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,85 +1,80 @@
|
||||||
import os
|
|
||||||
import traceback
|
import traceback
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from module.const import MAX_FOLDER_NAME_SIZE
|
from canvasapi.assignment import Assignment
|
||||||
from module.download import download_file
|
from canvasapi.course import Course
|
||||||
from module.get_canvas import get_extra_assignment_files
|
from canvasapi.submission import Submission
|
||||||
|
|
||||||
|
from module.api.file import get_embedded_files
|
||||||
|
from module.const import global_consts
|
||||||
from module.helpers import make_valid_filename, shorten_file_name
|
from module.helpers import make_valid_filename, shorten_file_name
|
||||||
from module.items import CanvasModuleItem, CanvasModule
|
from module.items import CanvasModuleItem, jsonify_anything, CanvasModule
|
||||||
from module.singlefile import download_page
|
from module.singlefile import download_page
|
||||||
|
|
||||||
|
|
||||||
def download_module_item(module: CanvasModule, item: CanvasModuleItem, modules_dir, cookies_path):
|
def download_module_item(course: Course, module: CanvasModule, item: CanvasModuleItem, modules_dir: Path):
|
||||||
try:
|
try:
|
||||||
module_name = make_valid_filename(str(module.name))
|
module_name = make_valid_filename(str(module.module.name))
|
||||||
module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
|
module_name = shorten_file_name(module_name, len(module_name) - global_consts.MAX_FOLDER_NAME_SIZE)
|
||||||
output_dir = Path(modules_dir, module_name)
|
module_dir = modules_dir / module_name
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
if not item.url:
|
if not hasattr(item.item, 'url') or not item.item.url:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Download attached files
|
module_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if item.item.type == "File":
|
||||||
|
file = course.get_file(item.item.content_id)
|
||||||
|
module_file_path = module_dir / make_valid_filename(str(file.display_name))
|
||||||
|
file.download(module_file_path)
|
||||||
|
else:
|
||||||
|
# It's a page, so download the attached files.
|
||||||
for file in item.attached_files:
|
for file in item.attached_files:
|
||||||
file.download(output_dir / file.filename)
|
file.download(module_dir / file.filename)
|
||||||
|
|
||||||
# Download the module page.
|
# Download the module page.
|
||||||
html_filename = make_valid_filename(str(item.title)) + ".html"
|
html_filename = make_valid_filename(str(item.item.title)) + ".html"
|
||||||
if not (output_dir / html_filename).exists():
|
download_page(item.item.html_url, module_dir, html_filename)
|
||||||
download_page(item.url, cookies_path, output_dir, html_filename)
|
|
||||||
except:
|
except:
|
||||||
# TODO: wrap all threaded funcs in this try/catch
|
# TODO: wrap all threaded funcs in this try/catch
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
|
|
||||||
def download_assignment(cookies_path, cookie_jar, base_assign_dir, assignment):
|
def download_assignment(base_assign_dir: Path, course: Course, assignment: Assignment):
|
||||||
assignment_title = make_valid_filename(str(assignment.title))
|
try:
|
||||||
assignment_title = shorten_file_name(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
|
assignment_title = make_valid_filename(str(assignment.name))
|
||||||
assign_dir = os.path.join(base_assign_dir, assignment_title)
|
assignment_title = shorten_file_name(assignment_title, len(assignment_title) - global_consts.MAX_FOLDER_NAME_SIZE)
|
||||||
|
assign_dir = Path(base_assign_dir, assignment_title)
|
||||||
|
assign_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if assignment.html_url != "":
|
if assignment.html_url:
|
||||||
if not os.path.exists(assign_dir):
|
download_page(assignment.html_url, assign_dir, "assignment.html")
|
||||||
os.makedirs(assign_dir)
|
|
||||||
|
|
||||||
assignment_page_path = os.path.join(assign_dir, "assignment.html")
|
# Download attached files.
|
||||||
|
if assignment.description:
|
||||||
|
for file in get_embedded_files(course, assignment.description):
|
||||||
|
file.download(assign_dir / file.display_name)
|
||||||
|
|
||||||
if not os.path.exists(assignment_page_path):
|
# Students cannot view their past attempts, but this logic is left if that's ever implemented in Canvas.
|
||||||
download_page(assignment.html_url, cookies_path, assign_dir, "assignment.html")
|
submissions = [assignment.get_submission(global_consts.USER_ID)]
|
||||||
|
for submission in submissions:
|
||||||
extra_files = get_extra_assignment_files(assignment.description, cookie_jar)
|
download_attempt(submission, assign_dir)
|
||||||
for name, url in extra_files:
|
submission_dir = assign_dir / 'submission' / str(submission.id)
|
||||||
download_file(url, Path(assign_dir, name), cookie_jar)
|
for attachment in submission.attachments:
|
||||||
|
filepath = submission_dir / attachment.display_name
|
||||||
for submission in assignment.submissions:
|
if not filepath.exists():
|
||||||
download_submission(assignment, submission, assign_dir, cookies_path)
|
attachment.download(filepath)
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
|
||||||
def download_submission(assignment, submission, assign_dir, cookies_path):
|
def download_attempt(submission: Submission, assign_dir: Path):
|
||||||
submission_dir = assign_dir
|
try:
|
||||||
|
submission_dir = assign_dir / 'submission' / str(submission.id)
|
||||||
if len(assignment.submissions) != 1:
|
submission_dir.mkdir(parents=True, exist_ok=True)
|
||||||
submission_dir = os.path.join(assign_dir, str(submission.user_id))
|
for file in submission.attachments:
|
||||||
|
file.download(submission_dir / file.display_name)
|
||||||
if submission.preview_url != "":
|
if submission.preview_url:
|
||||||
if not os.path.exists(submission_dir):
|
download_page(submission.preview_url, submission_dir, f'{submission.id}.html')
|
||||||
os.makedirs(submission_dir)
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
submission_page_dir = os.path.join(submission_dir, "submission.html")
|
|
||||||
|
|
||||||
if not os.path.exists(submission_page_dir):
|
|
||||||
download_page(submission.preview_url, cookies_path, submission_dir, "submission.html")
|
|
||||||
|
|
||||||
if (submission.attempt != 1 and assignment.updated_url != "" and assignment.html_url != ""
|
|
||||||
and assignment.html_url.rstrip("/") != assignment.updated_url.rstrip("/")):
|
|
||||||
submission_dir = os.path.join(assign_dir, "attempts")
|
|
||||||
|
|
||||||
if not os.path.exists(submission_dir):
|
|
||||||
os.makedirs(submission_dir)
|
|
||||||
|
|
||||||
for i in range(submission.attempt):
|
|
||||||
filename = "attempt_" + str(i + 1) + ".html"
|
|
||||||
submission_page_attempt_dir = os.path.join(submission_dir, filename)
|
|
||||||
|
|
||||||
if not os.path.exists(submission_page_attempt_dir):
|
|
||||||
download_page(assignment.updated_url + "/history?version=" + str(i + 1), cookies_path, submission_dir, filename)
|
|
||||||
|
|
Loading…
Reference in New Issue