2024-01-25 15:43:35 -07:00
import argparse
2019-08-15 23:38:16 -06:00
import json
import os
2023-10-27 16:24:52 -06:00
from http . cookiejar import MozillaCookieJar
2023-10-27 18:59:58 -06:00
from pathlib import Path
2020-07-08 10:50:20 -06:00
2024-01-11 21:57:36 -07:00
import canvasapi
import requests
2020-07-08 14:51:04 -06:00
import yaml
2023-10-27 16:24:52 -06:00
from canvasapi import Canvas
2020-07-08 14:51:04 -06:00
2024-01-26 08:55:17 -07:00
from module . const import global_consts
from module . download_canvas import download_assignments , download_course_modules , download_course_grades_page , download_course_announcement_pages , download_course_home_page_html , download_course_discussion_pages
from module . get_canvas import find_course_pages , find_course_modules , find_course_assignments , find_course_announcements , find_course_discussions
from module . items import CanvasCourse , jsonify_anything
from module . singlefile import download_page
2023-10-27 18:04:07 -06:00
from module . user_files import download_user_files
2020-07-08 14:51:04 -06:00
2024-01-11 21:57:36 -07:00
SCRIPT_PATH = os . path . abspath ( os . path . dirname ( __file__ ) )
2019-08-15 23:38:16 -06:00
2023-10-27 16:30:16 -06:00
def export_all_course_data ( c ) :
2024-01-26 08:55:17 -07:00
json_data = jsonify_anything ( c )
2024-01-25 15:43:35 -07:00
course_output_dir = os . path . join ( OUTPUT_LOCATION , c . term , c . name )
2019-08-15 23:38:16 -06:00
if not os . path . exists ( course_output_dir ) :
os . makedirs ( course_output_dir )
2023-10-27 16:30:16 -06:00
course_output_path = os . path . join ( course_output_dir , c . name + " .json " )
2023-10-27 16:24:52 -06:00
with open ( course_output_path , " w " ) as file :
file . write ( json_data )
2022-01-22 10:21:05 -07:00
2019-08-15 23:38:16 -06:00
2020-07-08 14:51:04 -06:00
if __name__ == " __main__ " :
2024-01-25 15:43:35 -07:00
parser = argparse . ArgumentParser ( description = ' ' )
parser . add_argument ( ' --output ' , default = ' ./output ' , help = ' Output location. If it does not exist, it will be created. ' )
parser . add_argument ( ' --term ' , default = None , help = ' Only download this term. ' )
parser . add_argument ( ' --user-files ' , action = ' store_true ' , help = " Download the user files. " )
args = parser . parse_args ( )
OUTPUT_LOCATION = Path ( args . output ) . resolve ( ) . expanduser ( ) . absolute ( )
OUTPUT_LOCATION . mkdir ( parents = True , exist_ok = True )
2024-01-11 21:57:36 -07:00
# Startup checks.
creds_file = Path ( SCRIPT_PATH , ' credentials.yaml ' )
if not creds_file . is_file ( ) :
print ( ' The credentials.yaml file does not exist: ' , creds_file )
quit ( 1 )
with open ( " credentials.yaml " , ' r ' ) as f :
credentials = yaml . full_load ( f )
2024-01-26 08:55:17 -07:00
global_consts . API_URL = credentials [ " API_URL " ]
global_consts . API_KEY = credentials [ " API_KEY " ]
global_consts . USER_ID = credentials [ " USER_ID " ]
global_consts . COOKIES_PATH = str ( Path ( credentials [ " COOKIES_PATH " ] ) . resolve ( ) . expanduser ( ) . absolute ( ) )
2024-01-11 21:57:36 -07:00
2024-01-26 08:55:17 -07:00
if not Path ( global_consts . COOKIES_PATH ) . is_file ( ) :
print ( ' The cookies file does not exist: ' , global_consts . COOKIES_PATH )
2024-01-11 21:57:36 -07:00
quit ( 1 )
2024-01-26 08:55:17 -07:00
global_consts . COOKIE_JAR = MozillaCookieJar ( global_consts . COOKIES_PATH )
global_consts . COOKIE_JAR . load ( ignore_discard = True , ignore_expires = True )
2024-01-11 21:57:36 -07:00
# ==================================================================================================================
# Initialization
2023-10-27 16:24:52 -06:00
print ( " Welcome to the Canvas Student Data Export Tool " )
2024-01-25 15:43:35 -07:00
if not os . path . exists ( OUTPUT_LOCATION ) :
print ( " Creating output directory: " , OUTPUT_LOCATION )
os . makedirs ( OUTPUT_LOCATION )
2020-07-07 22:33:49 -06:00
2024-01-26 08:55:17 -07:00
if global_consts . COOKIES_PATH :
2024-01-11 21:57:36 -07:00
# Test the cookies.
2024-01-25 15:43:35 -07:00
print ( " Authenticating with Canvas frontend... " )
2024-01-11 21:57:36 -07:00
# Requests takes a dict, not the MozillaCookieJar object.
2024-01-26 08:55:17 -07:00
request_cookies = { c . name : c . value for c in global_consts . COOKIE_JAR }
2024-01-11 21:57:36 -07:00
2024-01-26 08:55:17 -07:00
r = requests . get ( f ' { global_consts . API_URL } /profile ' , headers = { ' User-Agent ' : ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 ' } , cookies = request_cookies )
2024-01-11 21:57:36 -07:00
if r . status_code != 200 :
print ( ' Failed to fetch Canvas profile: got status code ' , r . status_code )
quit ( 1 )
2024-01-26 08:55:17 -07:00
if not r . url . startswith ( global_consts . API_URL ) :
2024-01-11 21:57:36 -07:00
print ( ' Failed to fetch Canvas profile: client was redirected away from Canvas: ' )
print ( r . url )
quit ( 1 )
if ' profileContent__Block ' not in r . text :
2024-01-25 15:43:35 -07:00
# TODO: add an arg to skip this check.
2024-01-11 21:57:36 -07:00
print ( ' Failed to test Canvas profile: could not find an element with the class " profileContent__Block " . This could mean that your authentication is incorrect. ' )
quit ( 1 )
# TODO: log debug status success here
else :
print ( ' No cookies file specified! No HTML pages will be saved. ' )
print ( " Authenticating with Canvas API... " )
2024-01-26 08:55:17 -07:00
canvas = Canvas ( global_consts . API_URL , global_consts . API_KEY )
2024-01-11 21:57:36 -07:00
courses = canvas . get_courses ( include = " term " )
try :
course_count = len ( list ( courses ) )
except canvasapi . exceptions . InvalidAccessToken as e :
try :
msg = e . message [ 0 ] [ ' message ' ]
except :
# Something went very wrong.
msg = ' '
print ( ' Failed to fetch courses from the Canvas API: ' , msg )
quit ( 1 )
2023-10-27 16:30:16 -06:00
2023-10-27 18:04:07 -06:00
print ( ' ' )
2024-01-26 08:55:17 -07:00
skip = set ( global_consts . COURSES_TO_SKIP )
2019-08-15 23:38:16 -06:00
2024-01-11 21:57:36 -07:00
# ==================================================================================================================
# Exporting
print ( " Downloading courses page... " )
2024-01-26 08:55:17 -07:00
courses_dict = { v [ ' id ' ] : v for v in json . loads ( jsonify_anything ( courses ) ) [ ' _elements ' ] }
( global_consts . OUTPUT_LOCATION / ' courses.json ' ) . write_text ( json . dumps ( courses_dict ) )
download_page ( global_consts . API_URL + " /courses/ " , global_consts . OUTPUT_LOCATION , " courses.html " )
2022-01-22 10:21:05 -07:00
2024-01-26 08:55:17 -07:00
if args . user_files :
2024-01-25 15:43:35 -07:00
print ( ' Downloading user files... ' )
download_user_files ( canvas , OUTPUT_LOCATION / ' User Files ' )
2023-10-27 16:24:52 -06:00
print ( ' ' )
2022-01-22 10:21:05 -07:00
2024-01-11 21:57:36 -07:00
all_courses_views = [ ]
2020-07-08 14:51:04 -06:00
for course in courses :
2022-01-22 10:21:05 -07:00
if course . id in skip or not hasattr ( course , " name " ) or not hasattr ( course , " term " ) :
2020-07-08 14:51:04 -06:00
continue
2019-08-15 23:38:16 -06:00
2024-01-26 08:55:17 -07:00
resolved_canvas_course = CanvasCourse ( course )
2024-01-25 15:43:35 -07:00
2024-01-26 08:55:17 -07:00
if args . term and args . term != resolved_canvas_course . term :
print ( ' Skipping term: ' , resolved_canvas_course . term , ' \n ' )
2024-01-25 15:43:35 -07:00
continue
2024-01-26 08:55:17 -07:00
print ( f " === { resolved_canvas_course . term } : { resolved_canvas_course . name } === " )
2023-10-27 18:04:07 -06:00
2024-01-26 08:55:17 -07:00
valid , r = resolved_canvas_course . test_course ( global_consts . API_URL , global_consts . COOKIE_JAR )
2023-10-27 18:04:07 -06:00
if not valid :
2024-01-26 08:55:17 -07:00
print ( f ' Invalid course: { resolved_canvas_course . course_id } - { r } - { r . text } ' )
2023-12-13 09:04:34 -07:00
if r . status_code == 401 :
2024-01-11 21:57:36 -07:00
# We can't recover from this error.
2023-12-13 09:04:34 -07:00
quit ( 1 )
2023-10-27 18:04:07 -06:00
continue
2024-01-26 08:55:17 -07:00
resolved_canvas_course . modules = find_course_modules ( course )
resolved_canvas_course . assignments = find_course_assignments ( course )
resolved_canvas_course . announcements = find_course_announcements ( course )
resolved_canvas_course . discussions = find_course_discussions ( course )
resolved_canvas_course . pages = find_course_pages ( course )
2024-01-25 15:43:35 -07:00
2024-01-26 08:55:17 -07:00
all_courses_views . append ( resolved_canvas_course )
2019-08-15 23:38:16 -06:00
2023-10-27 16:24:52 -06:00
print ( ' Downloading course home page... ' )
2024-01-26 08:55:17 -07:00
download_course_home_page_html ( resolved_canvas_course )
2020-07-08 14:51:04 -06:00
2024-01-11 21:57:36 -07:00
print ( ' Downloading grades... ' )
2024-01-26 08:55:17 -07:00
download_course_grades_page ( resolved_canvas_course )
2024-01-11 21:57:36 -07:00
2024-01-26 08:55:17 -07:00
download_assignments ( resolved_canvas_course )
2022-01-22 10:21:05 -07:00
2024-01-26 08:55:17 -07:00
download_course_modules ( resolved_canvas_course )
2022-01-22 10:21:05 -07:00
2024-01-26 08:55:17 -07:00
download_course_announcement_pages ( resolved_canvas_course )
2022-01-22 10:21:05 -07:00
2024-01-26 08:55:17 -07:00
download_course_discussion_pages ( resolved_canvas_course )
2022-01-22 10:21:05 -07:00
2024-01-26 08:55:17 -07:00
# TODO: nothing to test this on
# download_course_files(course)
2024-01-11 21:57:36 -07:00
print ( " Exporting course metadata... " )
2024-01-26 08:55:17 -07:00
export_all_course_data ( resolved_canvas_course )
2022-01-22 10:21:05 -07:00
2023-10-27 18:04:07 -06:00
if course_count > 1 :
2023-10-27 16:30:16 -06:00
print ( ' ' )
2024-01-11 21:57:36 -07:00
# Remove elements from the course objects that can't be JSON serialized, then format it.
2024-01-26 08:55:17 -07:00
json_str = jsonify_anything ( all_courses_views )
2019-08-15 23:38:16 -06:00
2024-01-25 15:43:35 -07:00
all_output_path = os . path . join ( OUTPUT_LOCATION , " all_output.json " )
2019-08-15 23:38:16 -06:00
with open ( all_output_path , " w " ) as out_file :
out_file . write ( json_str )
print ( " \n Process complete. All canvas data exported! " )