download course grades, validate paths and test authentication on startup, minor code clean up

This commit is contained in:
Cyberes 2024-01-11 21:57:36 -07:00
parent c5724d8b36
commit 96c63e6c65
4 changed files with 120 additions and 37 deletions

View File

@ -24,10 +24,20 @@ The tool exports all of the following data for each course:
- Pages - Pages
- Files - Files
- Modules - Modules
- Single file webpage of the Canvas page for assignments, announcements, discussions, and modules - Single file webpage of the Canvas page for grades, assignments, announcements, discussions, and modules
Additionally, all your files stored on Canvas (such as historic submissions and attachments) will be downloaded. Additionally, all your files stored on Canvas (such as historic submissions and attachments) will be downloaded.
**TO DO LIST**
- [x] Export grades.
- [x] Detect when the cookies and API token are not valid.
- [ ] Use argparse.
- [ ] Add an argument to prohibit overwriting existing files.
- [ ] Have the path to the Chrome binary be specified by a required argument.
- [ ] Use logging.
- [ ] Refactor `download_canvas.py`.
- [ ] Refactor `export.py`.
## Install ## Install
```shell ```shell

127
export.py
View File

@ -3,24 +3,19 @@ import os
from http.cookiejar import MozillaCookieJar from http.cookiejar import MozillaCookieJar
from pathlib import Path from pathlib import Path
import canvasapi
import jsonpickle import jsonpickle
import requests
import yaml import yaml
from canvasapi import Canvas from canvasapi import Canvas
from module.const import COURSES_TO_SKIP, DL_LOCATION from module.const import COURSES_TO_SKIP, DL_LOCATION
from module.download_canvas import download_assignment_pages, download_course_announcement_pages, download_course_discussion_pages, download_course_files, download_course_home_page_html, download_course_html, download_course_module_pages, download_submission_attachments from module.download_canvas import download_assignment_pages, download_course_announcement_pages, download_course_discussion_pages, download_course_files, download_course_html, download_course_module_pages, download_submission_attachments, download_course_grades_page, download_course_home_page_html
from module.get_canvas import find_course_announcements, find_course_assignments, find_course_discussions, find_course_modules, find_course_pages from module.get_canvas import find_course_announcements, find_course_assignments, find_course_discussions, find_course_modules, find_course_pages
from module.items import CourseView from module.items import CourseView
from module.user_files import download_user_files from module.user_files import download_user_files
with open("credentials.yaml", 'r') as f: SCRIPT_PATH = os.path.abspath(os.path.dirname(__file__))
credentials = yaml.full_load(f)
API_URL = credentials["API_URL"]
API_KEY = credentials["API_KEY"]
USER_ID = credentials["USER_ID"]
COOKIES_PATH = str(Path(credentials["COOKIES_PATH"]).resolve().expanduser().absolute())
COOKIE_JAR = MozillaCookieJar(COOKIES_PATH)
COOKIE_JAR.load(ignore_discard=True, ignore_expires=True)
def export_all_course_data(c): def export_all_course_data(c):
@ -34,32 +29,93 @@ def export_all_course_data(c):
if __name__ == "__main__": if __name__ == "__main__":
# Startup checks.
creds_file = Path(SCRIPT_PATH, 'credentials.yaml')
if not creds_file.is_file():
print('The credentials.yaml file does not exist:', creds_file)
quit(1)
with open("credentials.yaml", 'r') as f:
credentials = yaml.full_load(f)
API_URL = credentials["API_URL"]
API_KEY = credentials["API_KEY"]
USER_ID = credentials["USER_ID"]
COOKIES_PATH = str(Path(credentials["COOKIES_PATH"]).resolve().expanduser().absolute())
if not Path(COOKIES_PATH).is_file():
print('The cookies file does not exist:', COOKIES_PATH)
quit(1)
COOKIE_JAR = MozillaCookieJar(COOKIES_PATH)
COOKIE_JAR.load(ignore_discard=True, ignore_expires=True)
# ==================================================================================================================
# Initialization
print("Welcome to the Canvas Student Data Export Tool") print("Welcome to the Canvas Student Data Export Tool")
print("Creating output directory:", DL_LOCATION)
if not os.path.exists(DL_LOCATION): if not os.path.exists(DL_LOCATION):
print("Creating output directory:", DL_LOCATION)
os.makedirs(DL_LOCATION) os.makedirs(DL_LOCATION)
print("Connecting to Canvas...") if COOKIES_PATH:
canvas = Canvas(API_URL, API_KEY) print("Authenticating with Canvas frontend...")
print('\nDownloading user files...') # Test the cookies.
cookies = MozillaCookieJar(COOKIES_PATH)
cookies.load(ignore_discard=True, ignore_expires=True)
# Requests takes a dict, not the MozillaCookieJar object.
request_cookies = {}
for cookie in cookies:
request_cookies[cookie.name] = cookie.value
r = requests.get(f'{API_URL}/profile', headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}, cookies=request_cookies)
if r.status_code != 200:
print('Failed to fetch Canvas profile: got status code', r.status_code)
quit(1)
if not r.url.startswith(API_URL):
print('Failed to fetch Canvas profile: client was redirected away from Canvas:')
print(r.url)
quit(1)
if 'profileContent__Block' not in r.text:
print('Failed to test Canvas profile: could not find an element with the class "profileContent__Block". This could mean that your authentication is incorrect.')
quit(1)
# TODO: log debug status success here
else:
print('No cookies file specified! No HTML pages will be saved.')
print("Authenticating with Canvas API...")
canvas = Canvas(API_URL, API_KEY)
courses = canvas.get_courses(include="term")
try:
course_count = len(list(courses))
except canvasapi.exceptions.InvalidAccessToken as e:
try:
msg = e.message[0]['message']
except:
# Something went very wrong.
msg = ''
print('Failed to fetch courses from the Canvas API:', msg)
quit(1)
print('')
skip = set(COURSES_TO_SKIP)
# ==================================================================================================================
# Exporting
print("Downloading courses page...")
download_course_html(API_URL, COOKIES_PATH)
print('Downloading user files...')
download_user_files(canvas, DL_LOCATION / 'User Files') download_user_files(canvas, DL_LOCATION / 'User Files')
print('') print('')
all_courses_views = [] all_courses_views = []
print("Getting list of all courses...")
courses = canvas.get_courses(include="term")
course_count = len(list(courses))
skip = set(COURSES_TO_SKIP)
if COOKIES_PATH:
print("Fetching Courses...")
download_course_html(API_URL, COOKIES_PATH)
print('')
for course in courses: for course in courses:
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"): if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
continue continue
@ -69,12 +125,10 @@ if __name__ == "__main__":
valid, r = course_view.test_course(API_URL, COOKIE_JAR) valid, r = course_view.test_course(API_URL, COOKIE_JAR)
if not valid: if not valid:
print(f'Invalid course: {course_view.course_id} - {r}') print(f'Invalid course: {course_view.course_id} - {r} - {r.text}')
if r.status_code == 401: if r.status_code == 401:
print('Got a bad status code:', r.status_code) # We can't recover from this error.
quit(1) quit(1)
continue continue
course_view.assignments = find_course_assignments(course, USER_ID) course_view.assignments = find_course_assignments(course, USER_ID)
@ -84,13 +138,12 @@ if __name__ == "__main__":
course_view.modules = find_course_modules(course, course_view) course_view.modules = find_course_modules(course, course_view)
all_courses_views.append(course_view) all_courses_views.append(course_view)
download_course_files(course, course_view)
download_submission_attachments(course, course_view)
print('Downloading course home page...') print('Downloading course home page...')
download_course_home_page_html(API_URL, course_view, COOKIES_PATH) download_course_home_page_html(API_URL, course_view, COOKIES_PATH)
print('Downloading grades...')
download_course_grades_page(API_URL, course_view, COOKIES_PATH)
download_assignment_pages(API_URL, course_view, COOKIES_PATH, COOKIE_JAR) download_assignment_pages(API_URL, course_view, COOKIES_PATH, COOKIE_JAR)
download_course_module_pages(API_URL, course_view, COOKIES_PATH) download_course_module_pages(API_URL, course_view, COOKIES_PATH)
@ -99,13 +152,17 @@ if __name__ == "__main__":
download_course_discussion_pages(API_URL, course_view, COOKIES_PATH) download_course_discussion_pages(API_URL, course_view, COOKIES_PATH)
print("Exporting all course data...") download_course_files(course, course_view)
download_submission_attachments(course, course_view)
print("Exporting course metadata...")
export_all_course_data(course_view) export_all_course_data(course_view)
if course_count > 1: if course_count > 1:
print('') print('')
# Remove elemnts from the course objects that can't be JSON serialized, then format it. # Remove elements from the course objects that can't be JSON serialized, then format it.
json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable=False)), indent=4) json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable=False)), indent=4)
all_output_path = os.path.join(DL_LOCATION, "all_output.json") all_output_path = os.path.join(DL_LOCATION, "all_output.json")

View File

@ -2,6 +2,7 @@ import os
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import partial from functools import partial
from http.cookiejar import MozillaCookieJar from http.cookiejar import MozillaCookieJar
from pathlib import Path
import canvasapi import canvasapi
import requests import requests
@ -220,3 +221,16 @@ def download_course_module_pages(api_url, course_view, cookies_path):
for _ in as_completed(futures): for _ in as_completed(futures):
bar.update() bar.update()
bar.close() bar.close()
def download_course_grades_page(api_url, course_view, cookies_path):
if cookies_path == "":
return
dl_dir = Path(DL_LOCATION, course_view.term, course_view.name)
dl_dir.mkdir(parents=True, exist_ok=True)
# TODO: command line arg to prohibit overwrite. Default should overwrite
if not (dl_dir / "grades.html").exists():
api_target = f'{api_url}/courses/{course_view.course_id}/grades'
download_page(api_target, cookies_path, dl_dir, "grades.html")

View File

@ -2,7 +2,9 @@ from pathlib import Path
from subprocess import run from subprocess import run
SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file" SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file"
CHROME_PATH = "/usr/bin/chromium-browser"
# TODO: have this be specified by a required arg.
CHROME_PATH = "/usr/bin/google-chrome"
def add_quotes(s): def add_quotes(s):