canvas-student-data-export/export.py

188 lines
7.4 KiB
Python
Raw Normal View History

import argparse
2019-08-15 23:38:16 -06:00
import json
import os
2023-10-27 16:24:52 -06:00
from http.cookiejar import MozillaCookieJar
from pathlib import Path
2020-07-08 10:50:20 -06:00
import canvasapi
import requests
import yaml
2023-10-27 16:24:52 -06:00
from canvasapi import Canvas
2024-01-26 08:55:17 -07:00
from module.const import global_consts
from module.download_canvas import download_assignments, download_course_modules, download_course_grades_page, download_course_announcement_pages, download_course_home_page_html, download_course_discussion_pages
from module.get_canvas import find_course_pages, find_course_modules, find_course_assignments, find_course_announcements, find_course_discussions
from module.items import CanvasCourse, jsonify_anything
from module.singlefile import download_page
2023-10-27 18:04:07 -06:00
from module.user_files import download_user_files
SCRIPT_PATH = os.path.abspath(os.path.dirname(__file__))
2019-08-15 23:38:16 -06:00
2023-10-27 16:30:16 -06:00
def export_all_course_data(c):
2024-01-26 08:55:17 -07:00
json_data = jsonify_anything(c)
course_output_dir = os.path.join(OUTPUT_LOCATION, c.term, c.name)
2019-08-15 23:38:16 -06:00
if not os.path.exists(course_output_dir):
os.makedirs(course_output_dir)
2023-10-27 16:30:16 -06:00
course_output_path = os.path.join(course_output_dir, c.name + ".json")
2023-10-27 16:24:52 -06:00
with open(course_output_path, "w") as file:
file.write(json_data)
2019-08-15 23:38:16 -06:00
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='')
parser.add_argument('--output', default='./output', help='Output location. If it does not exist, it will be created.')
parser.add_argument('--term', default=None, help='Only download this term.')
parser.add_argument('--user-files', action='store_true', help="Download the user files.")
args = parser.parse_args()
OUTPUT_LOCATION = Path(args.output).resolve().expanduser().absolute()
OUTPUT_LOCATION.mkdir(parents=True, exist_ok=True)
# Startup checks.
creds_file = Path(SCRIPT_PATH, 'credentials.yaml')
if not creds_file.is_file():
print('The credentials.yaml file does not exist:', creds_file)
quit(1)
with open("credentials.yaml", 'r') as f:
credentials = yaml.full_load(f)
2024-01-26 08:55:17 -07:00
global_consts.API_URL = credentials["API_URL"]
global_consts.API_KEY = credentials["API_KEY"]
global_consts.USER_ID = credentials["USER_ID"]
global_consts.COOKIES_PATH = str(Path(credentials["COOKIES_PATH"]).resolve().expanduser().absolute())
2024-01-26 08:55:17 -07:00
if not Path(global_consts.COOKIES_PATH).is_file():
print('The cookies file does not exist:', global_consts.COOKIES_PATH)
quit(1)
2024-01-26 08:55:17 -07:00
global_consts.COOKIE_JAR = MozillaCookieJar(global_consts.COOKIES_PATH)
global_consts.COOKIE_JAR.load(ignore_discard=True, ignore_expires=True)
# ==================================================================================================================
# Initialization
2023-10-27 16:24:52 -06:00
print("Welcome to the Canvas Student Data Export Tool")
if not os.path.exists(OUTPUT_LOCATION):
print("Creating output directory:", OUTPUT_LOCATION)
os.makedirs(OUTPUT_LOCATION)
2024-01-26 08:55:17 -07:00
if global_consts.COOKIES_PATH:
# Test the cookies.
print("Authenticating with Canvas frontend...")
# Requests takes a dict, not the MozillaCookieJar object.
2024-01-26 08:55:17 -07:00
request_cookies = {c.name: c.value for c in global_consts.COOKIE_JAR}
2024-01-26 08:55:17 -07:00
r = requests.get(f'{global_consts.API_URL}/profile', headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}, cookies=request_cookies)
if r.status_code != 200:
print('Failed to fetch Canvas profile: got status code', r.status_code)
quit(1)
2024-01-26 08:55:17 -07:00
if not r.url.startswith(global_consts.API_URL):
print('Failed to fetch Canvas profile: client was redirected away from Canvas:')
print(r.url)
quit(1)
if 'profileContent__Block' not in r.text:
# TODO: add an arg to skip this check.
print('Failed to test Canvas profile: could not find an element with the class "profileContent__Block". This could mean that your authentication is incorrect.')
quit(1)
# TODO: log debug status success here
else:
print('No cookies file specified! No HTML pages will be saved.')
print("Authenticating with Canvas API...")
2024-01-26 08:55:17 -07:00
canvas = Canvas(global_consts.API_URL, global_consts.API_KEY)
courses = canvas.get_courses(include="term")
try:
course_count = len(list(courses))
except canvasapi.exceptions.InvalidAccessToken as e:
try:
msg = e.message[0]['message']
except:
# Something went very wrong.
msg = ''
print('Failed to fetch courses from the Canvas API:', msg)
quit(1)
2023-10-27 16:30:16 -06:00
2023-10-27 18:04:07 -06:00
print('')
2024-01-26 08:55:17 -07:00
skip = set(global_consts.COURSES_TO_SKIP)
2019-08-15 23:38:16 -06:00
# ==================================================================================================================
# Exporting
print("Downloading courses page...")
2024-01-26 08:55:17 -07:00
courses_dict = {v['id']: v for v in json.loads(jsonify_anything(courses))['_elements']}
(global_consts.OUTPUT_LOCATION / 'courses.json').write_text(json.dumps(courses_dict))
download_page(global_consts.API_URL + "/courses/", global_consts.OUTPUT_LOCATION, "courses.html")
2024-01-26 08:55:17 -07:00
if args.user_files:
print('Downloading user files...')
download_user_files(canvas, OUTPUT_LOCATION / 'User Files')
2023-10-27 16:24:52 -06:00
print('')
all_courses_views = []
for course in courses:
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
continue
2019-08-15 23:38:16 -06:00
2024-01-26 08:55:17 -07:00
resolved_canvas_course = CanvasCourse(course)
2024-01-26 08:55:17 -07:00
if args.term and args.term != resolved_canvas_course.term:
print('Skipping term:', resolved_canvas_course.term, '\n')
continue
2024-01-26 08:55:17 -07:00
print(f"=== {resolved_canvas_course.term}: {resolved_canvas_course.name} ===")
2023-10-27 18:04:07 -06:00
2024-01-26 08:55:17 -07:00
valid, r = resolved_canvas_course.test_course(global_consts.API_URL, global_consts.COOKIE_JAR)
2023-10-27 18:04:07 -06:00
if not valid:
2024-01-26 08:55:17 -07:00
print(f'Invalid course: {resolved_canvas_course.course_id} - {r} - {r.text}')
2023-12-13 09:04:34 -07:00
if r.status_code == 401:
# We can't recover from this error.
2023-12-13 09:04:34 -07:00
quit(1)
2023-10-27 18:04:07 -06:00
continue
2024-01-26 08:55:17 -07:00
resolved_canvas_course.modules = find_course_modules(course)
resolved_canvas_course.assignments = find_course_assignments(course)
resolved_canvas_course.announcements = find_course_announcements(course)
resolved_canvas_course.discussions = find_course_discussions(course)
resolved_canvas_course.pages = find_course_pages(course)
2024-01-26 08:55:17 -07:00
all_courses_views.append(resolved_canvas_course)
2019-08-15 23:38:16 -06:00
2023-10-27 16:24:52 -06:00
print('Downloading course home page...')
2024-01-26 08:55:17 -07:00
download_course_home_page_html(resolved_canvas_course)
print('Downloading grades...')
2024-01-26 08:55:17 -07:00
download_course_grades_page(resolved_canvas_course)
2024-01-26 08:55:17 -07:00
download_assignments(resolved_canvas_course)
2024-01-26 08:55:17 -07:00
download_course_modules(resolved_canvas_course)
2024-01-26 08:55:17 -07:00
download_course_announcement_pages(resolved_canvas_course)
2024-01-26 08:55:17 -07:00
download_course_discussion_pages(resolved_canvas_course)
2024-01-26 08:55:17 -07:00
# TODO: nothing to test this on
# download_course_files(course)
print("Exporting course metadata...")
2024-01-26 08:55:17 -07:00
export_all_course_data(resolved_canvas_course)
2023-10-27 18:04:07 -06:00
if course_count > 1:
2023-10-27 16:30:16 -06:00
print('')
# Remove elements from the course objects that can't be JSON serialized, then format it.
2024-01-26 08:55:17 -07:00
json_str = jsonify_anything(all_courses_views)
2019-08-15 23:38:16 -06:00
all_output_path = os.path.join(OUTPUT_LOCATION, "all_output.json")
2019-08-15 23:38:16 -06:00
with open(all_output_path, "w") as out_file:
out_file.write(json_str)
print("\nProcess complete. All canvas data exported!")