canvas-student-data-export/export.py

103 lines
3.7 KiB
Python

import json
import os
from http.cookiejar import MozillaCookieJar
import jsonpickle
import yaml
from canvasapi import Canvas
from stuff.const import COURSES_TO_SKIP, DL_LOCATION
from stuff.download_canvas import download_assignment_pages, download_course_announcement_pages, download_course_discussion_pages, download_course_files, download_course_home_page_html, download_course_html, download_course_module_pages, download_submission_attachments
from stuff.get_canvas import find_course_announcements, find_course_assignments, find_course_discussions, find_course_modules, find_course_pages
from stuff.items import CourseView
with open("credentials.yaml", 'r') as f:
credentials = yaml.full_load(f)
API_URL = credentials["API_URL"]
API_KEY = credentials["API_KEY"]
USER_ID = credentials["USER_ID"]
COOKIES_PATH = credentials["COOKIES_PATH"]
COOKIE_JAR = MozillaCookieJar(COOKIES_PATH)
COOKIE_JAR.load(ignore_discard=True, ignore_expires=True)
def export_all_course_data(course_view):
json_data = json.dumps(json.loads(jsonpickle.encode(course_view, unpicklable=False)), indent=4)
course_output_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name)
if not os.path.exists(course_output_dir):
os.makedirs(course_output_dir)
course_output_path = os.path.join(course_output_dir, course_view.name + ".json")
with open(course_output_path, "w") as file:
file.write(json_data)
if __name__ == "__main__":
print("Welcome to the Canvas Student Data Export Tool")
print("\nConnecting to Canvas...")
# Initialize a new Canvas object
canvas = Canvas(API_URL, API_KEY)
print("Creating output directory:", DL_LOCATION)
if not os.path.exists(DL_LOCATION):
os.makedirs(DL_LOCATION)
all_courses_views = []
print("Getting list of all courses...")
courses = canvas.get_courses(include="term")
skip = set(COURSES_TO_SKIP)
if COOKIES_PATH:
print("Fetching Courses...")
download_course_html(API_URL, COOKIES_PATH)
print('')
for course in courses:
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
continue
course_view = CourseView(course)
print(f"=== {course_view.term}: {course_view.name} ===")
course_view.assignments = find_course_assignments(course, USER_ID)
course_view.announcements = find_course_announcements(course)
course_view.discussions = find_course_discussions(course)
course_view.pages = find_course_pages(course)
course_view.modules = find_course_modules(course, course_view)
all_courses_views.append(course_view)
download_course_files(course, course_view)
download_submission_attachments(course, course_view)
print('Downloading course home page...')
download_course_home_page_html(API_URL, course_view, COOKIES_PATH)
download_assignment_pages(API_URL, course_view, COOKIES_PATH, COOKIE_JAR)
download_course_module_pages(API_URL, course_view, COOKIES_PATH)
download_course_announcement_pages(API_URL, course_view, COOKIES_PATH)
download_course_discussion_pages(API_URL, course_view, COOKIES_PATH)
print("Exporting all course data...")
export_all_course_data(course_view)
print("Exporting data from all courses combined as all_output.json")
# Awful hack to make the JSON pretty. Decode it with Python stdlib json
# module then re-encode with indentation
json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable=False)), indent=4)
all_output_path = os.path.join(DL_LOCATION, "all_output.json")
with open(all_output_path, "w") as out_file:
out_file.write(json_str)
print("\nProcess complete. All canvas data exported!")