download course grades, validate paths and test authentication on startup, minor code clean up

2024-01-11 21:57:36 -07:00 · 2024-01-11 21:57:36 -07:00 · 96c63e6c65
parent c5724d8b36
commit 96c63e6c65
4 changed files with 120 additions and 37 deletions
--- a/README.md
+++ b/README.md
@ -24,10 +24,20 @@ The tool exports all of the following data for each course:
 - Pages
 - Files
 - Modules
- Single file webpage of the Canvas page for assignments, announcements, discussions, and modules
+- Single file webpage of the Canvas page for grades, assignments, announcements, discussions, and modules

 Additionally, all your files stored on Canvas (such as historic submissions and attachments) will be downloaded.

+**TO DO LIST**
+- [x] Export grades.
+- [x] Detect when the cookies and API token are not valid.
+- [ ] Use argparse.
+- [ ] Add an argument to prohibit overwriting existing files.
+- [ ] Have the path to the Chrome binary be specified by a required argument.
+- [ ] Use logging.
+- [ ] Refactor `download_canvas.py`.
+- [ ] Refactor `export.py`.
+
 ## Install

 ```shell
--- a/export.py
+++ b/export.py
@ -3,24 +3,19 @@ import os
 from http.cookiejar import MozillaCookieJar
 from pathlib import Path

+import canvasapi
 import jsonpickle
+import requests
 import yaml
 from canvasapi import Canvas

 from module.const import COURSES_TO_SKIP, DL_LOCATION
-from module.download_canvas import download_assignment_pages, download_course_announcement_pages, download_course_discussion_pages, download_course_files, download_course_home_page_html, download_course_html, download_course_module_pages, download_submission_attachments
+from module.download_canvas import download_assignment_pages, download_course_announcement_pages, download_course_discussion_pages, download_course_files, download_course_html, download_course_module_pages, download_submission_attachments, download_course_grades_page, download_course_home_page_html
 from module.get_canvas import find_course_announcements, find_course_assignments, find_course_discussions, find_course_modules, find_course_pages
 from module.items import CourseView
 from module.user_files import download_user_files

-with open("credentials.yaml", 'r') as f:
-    credentials = yaml.full_load(f)
-API_URL = credentials["API_URL"]
-API_KEY = credentials["API_KEY"]
-USER_ID = credentials["USER_ID"]
-COOKIES_PATH = str(Path(credentials["COOKIES_PATH"]).resolve().expanduser().absolute())
-COOKIE_JAR = MozillaCookieJar(COOKIES_PATH)
-COOKIE_JAR.load(ignore_discard=True, ignore_expires=True)
+SCRIPT_PATH = os.path.abspath(os.path.dirname(__file__))


 def export_all_course_data(c):
@ -34,32 +29,93 @@ def export_all_course_data(c):


 if __name__ == "__main__":
+    # Startup checks.
+    creds_file = Path(SCRIPT_PATH, 'credentials.yaml')
+    if not creds_file.is_file():
+        print('The credentials.yaml file does not exist:', creds_file)
+        quit(1)
+
+    with open("credentials.yaml", 'r') as f:
+        credentials = yaml.full_load(f)
+
+    API_URL = credentials["API_URL"]
+    API_KEY = credentials["API_KEY"]
+    USER_ID = credentials["USER_ID"]
+    COOKIES_PATH = str(Path(credentials["COOKIES_PATH"]).resolve().expanduser().absolute())
+
+    if not Path(COOKIES_PATH).is_file():
+        print('The cookies file does not exist:', COOKIES_PATH)
+        quit(1)
+
+    COOKIE_JAR = MozillaCookieJar(COOKIES_PATH)
+    COOKIE_JAR.load(ignore_discard=True, ignore_expires=True)
+
+    # ==================================================================================================================
+    # Initialization
+
    print("Welcome to the Canvas Student Data Export Tool")
-    print("Creating output directory:", DL_LOCATION)
    if not os.path.exists(DL_LOCATION):
+        print("Creating output directory:", DL_LOCATION)
        os.makedirs(DL_LOCATION)

-    print("Connecting to Canvas...")
-    canvas = Canvas(API_URL, API_KEY)
+    if COOKIES_PATH:
+        print("Authenticating with Canvas frontend...")

-    print('\nDownloading user files...')
+        # Test the cookies.
+        cookies = MozillaCookieJar(COOKIES_PATH)
+        cookies.load(ignore_discard=True, ignore_expires=True)
+
+        # Requests takes a dict, not the MozillaCookieJar object.
+        request_cookies = {}
+        for cookie in cookies:
+            request_cookies[cookie.name] = cookie.value
+
+        r = requests.get(f'{API_URL}/profile', headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}, cookies=request_cookies)
+        if r.status_code != 200:
+            print('Failed to fetch Canvas profile: got status code', r.status_code)
+            quit(1)
+        if not r.url.startswith(API_URL):
+            print('Failed to fetch Canvas profile: client was redirected away from Canvas:')
+            print(r.url)
+            quit(1)
+        if 'profileContent__Block' not in r.text:
+            print('Failed to test Canvas profile: could not find an element with the class "profileContent__Block". This could mean that your authentication is incorrect.')
+            quit(1)
+
+        # TODO: log debug status success here
+    else:
+        print('No cookies file specified! No HTML pages will be saved.')
+
+    print("Authenticating with Canvas API...")
+    canvas = Canvas(API_URL, API_KEY)
+    courses = canvas.get_courses(include="term")
+    try:
+        course_count = len(list(courses))
+    except canvasapi.exceptions.InvalidAccessToken as e:
+        try:
+            msg = e.message[0]['message']
+        except:
+            # Something went very wrong.
+            msg = ''
+        print('Failed to fetch courses from the Canvas API:', msg)
+        quit(1)
+
+    print('')
+
+    skip = set(COURSES_TO_SKIP)
+
+    # ==================================================================================================================
+    # Exporting
+
+    print("Downloading courses page...")
+    download_course_html(API_URL, COOKIES_PATH)
+
+    print('Downloading user files...')
    download_user_files(canvas, DL_LOCATION / 'User Files')
    print('')

    all_courses_views = []

-    print("Getting list of all courses...")
-    courses = canvas.get_courses(include="term")
-    course_count = len(list(courses))
-
-    skip = set(COURSES_TO_SKIP)
-
-    if COOKIES_PATH:
-        print("Fetching Courses...")
-        download_course_html(API_URL, COOKIES_PATH)
-
-    print('')
-
    for course in courses:
        if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
            continue
@ -69,12 +125,10 @@ if __name__ == "__main__":

        valid, r = course_view.test_course(API_URL, COOKIE_JAR)
        if not valid:
-            print(f'Invalid course: {course_view.course_id} - {r}')
-
+            print(f'Invalid course: {course_view.course_id} - {r} - {r.text}')
            if r.status_code == 401:
-                print('Got a bad status code:', r.status_code)
+                # We can't recover from this error.
                quit(1)
-
            continue

        course_view.assignments = find_course_assignments(course, USER_ID)
@ -84,13 +138,12 @@ if __name__ == "__main__":
        course_view.modules = find_course_modules(course, course_view)
        all_courses_views.append(course_view)

-        download_course_files(course, course_view)
-
-        download_submission_attachments(course, course_view)
-
        print('Downloading course home page...')
        download_course_home_page_html(API_URL, course_view, COOKIES_PATH)

+        print('Downloading grades...')
+        download_course_grades_page(API_URL, course_view, COOKIES_PATH)
+
        download_assignment_pages(API_URL, course_view, COOKIES_PATH, COOKIE_JAR)

        download_course_module_pages(API_URL, course_view, COOKIES_PATH)
@ -99,13 +152,17 @@ if __name__ == "__main__":

        download_course_discussion_pages(API_URL, course_view, COOKIES_PATH)

-        print("Exporting all course data...")
+        download_course_files(course, course_view)
+
+        download_submission_attachments(course, course_view)
+
+        print("Exporting course metadata...")
        export_all_course_data(course_view)

        if course_count > 1:
            print('')

-    # Remove elemnts from the course objects that can't be JSON serialized, then format it.
+    # Remove elements from the course objects that can't be JSON serialized, then format it.
    json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable=False)), indent=4)

    all_output_path = os.path.join(DL_LOCATION, "all_output.json")
--- a/module/download_canvas.py
+++ b/module/download_canvas.py
@ -2,6 +2,7 @@ import os
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from functools import partial
 from http.cookiejar import MozillaCookieJar
+from pathlib import Path

 import canvasapi
 import requests
@ -220,3 +221,16 @@ def download_course_module_pages(api_url, course_view, cookies_path):
            for _ in as_completed(futures):
                bar.update()
            bar.close()
+
+
+def download_course_grades_page(api_url, course_view, cookies_path):
+    if cookies_path == "":
+        return
+
+    dl_dir = Path(DL_LOCATION, course_view.term, course_view.name)
+    dl_dir.mkdir(parents=True, exist_ok=True)
+
+    # TODO: command line arg to prohibit overwrite. Default should overwrite
+    if not (dl_dir / "grades.html").exists():
+        api_target = f'{api_url}/courses/{course_view.course_id}/grades'
+        download_page(api_target, cookies_path, dl_dir, "grades.html")
--- a/module/singlefile.py
+++ b/module/singlefile.py
@ -2,7 +2,9 @@ from pathlib import Path
 from subprocess import run

 SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file"
-CHROME_PATH = "/usr/bin/chromium-browser"
+
+# TODO: have this be specified by a required arg.
+CHROME_PATH = "/usr/bin/google-chrome"


 def add_quotes(s):