download course grades, validate paths and test authentication on startup, minor code clean up
This commit is contained in:
parent
c5724d8b36
commit
96c63e6c65
12
README.md
12
README.md
|
@ -24,10 +24,20 @@ The tool exports all of the following data for each course:
|
||||||
- Pages
|
- Pages
|
||||||
- Files
|
- Files
|
||||||
- Modules
|
- Modules
|
||||||
- Single file webpage of the Canvas page for assignments, announcements, discussions, and modules
|
- Single file webpage of the Canvas page for grades, assignments, announcements, discussions, and modules
|
||||||
|
|
||||||
Additionally, all your files stored on Canvas (such as historic submissions and attachments) will be downloaded.
|
Additionally, all your files stored on Canvas (such as historic submissions and attachments) will be downloaded.
|
||||||
|
|
||||||
|
**TO DO LIST**
|
||||||
|
- [x] Export grades.
|
||||||
|
- [x] Detect when the cookies and API token are not valid.
|
||||||
|
- [ ] Use argparse.
|
||||||
|
- [ ] Add an argument to prohibit overwriting existing files.
|
||||||
|
- [ ] Have the path to the Chrome binary be specified by a required argument.
|
||||||
|
- [ ] Use logging.
|
||||||
|
- [ ] Refactor `download_canvas.py`.
|
||||||
|
- [ ] Refactor `export.py`.
|
||||||
|
|
||||||
## Install
|
## Install
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
|
127
export.py
127
export.py
|
@ -3,24 +3,19 @@ import os
|
||||||
from http.cookiejar import MozillaCookieJar
|
from http.cookiejar import MozillaCookieJar
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import canvasapi
|
||||||
import jsonpickle
|
import jsonpickle
|
||||||
|
import requests
|
||||||
import yaml
|
import yaml
|
||||||
from canvasapi import Canvas
|
from canvasapi import Canvas
|
||||||
|
|
||||||
from module.const import COURSES_TO_SKIP, DL_LOCATION
|
from module.const import COURSES_TO_SKIP, DL_LOCATION
|
||||||
from module.download_canvas import download_assignment_pages, download_course_announcement_pages, download_course_discussion_pages, download_course_files, download_course_home_page_html, download_course_html, download_course_module_pages, download_submission_attachments
|
from module.download_canvas import download_assignment_pages, download_course_announcement_pages, download_course_discussion_pages, download_course_files, download_course_html, download_course_module_pages, download_submission_attachments, download_course_grades_page, download_course_home_page_html
|
||||||
from module.get_canvas import find_course_announcements, find_course_assignments, find_course_discussions, find_course_modules, find_course_pages
|
from module.get_canvas import find_course_announcements, find_course_assignments, find_course_discussions, find_course_modules, find_course_pages
|
||||||
from module.items import CourseView
|
from module.items import CourseView
|
||||||
from module.user_files import download_user_files
|
from module.user_files import download_user_files
|
||||||
|
|
||||||
with open("credentials.yaml", 'r') as f:
|
SCRIPT_PATH = os.path.abspath(os.path.dirname(__file__))
|
||||||
credentials = yaml.full_load(f)
|
|
||||||
API_URL = credentials["API_URL"]
|
|
||||||
API_KEY = credentials["API_KEY"]
|
|
||||||
USER_ID = credentials["USER_ID"]
|
|
||||||
COOKIES_PATH = str(Path(credentials["COOKIES_PATH"]).resolve().expanduser().absolute())
|
|
||||||
COOKIE_JAR = MozillaCookieJar(COOKIES_PATH)
|
|
||||||
COOKIE_JAR.load(ignore_discard=True, ignore_expires=True)
|
|
||||||
|
|
||||||
|
|
||||||
def export_all_course_data(c):
|
def export_all_course_data(c):
|
||||||
|
@ -34,32 +29,93 @@ def export_all_course_data(c):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
# Startup checks.
|
||||||
|
creds_file = Path(SCRIPT_PATH, 'credentials.yaml')
|
||||||
|
if not creds_file.is_file():
|
||||||
|
print('The credentials.yaml file does not exist:', creds_file)
|
||||||
|
quit(1)
|
||||||
|
|
||||||
|
with open("credentials.yaml", 'r') as f:
|
||||||
|
credentials = yaml.full_load(f)
|
||||||
|
|
||||||
|
API_URL = credentials["API_URL"]
|
||||||
|
API_KEY = credentials["API_KEY"]
|
||||||
|
USER_ID = credentials["USER_ID"]
|
||||||
|
COOKIES_PATH = str(Path(credentials["COOKIES_PATH"]).resolve().expanduser().absolute())
|
||||||
|
|
||||||
|
if not Path(COOKIES_PATH).is_file():
|
||||||
|
print('The cookies file does not exist:', COOKIES_PATH)
|
||||||
|
quit(1)
|
||||||
|
|
||||||
|
COOKIE_JAR = MozillaCookieJar(COOKIES_PATH)
|
||||||
|
COOKIE_JAR.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
|
||||||
|
# ==================================================================================================================
|
||||||
|
# Initialization
|
||||||
|
|
||||||
print("Welcome to the Canvas Student Data Export Tool")
|
print("Welcome to the Canvas Student Data Export Tool")
|
||||||
print("Creating output directory:", DL_LOCATION)
|
|
||||||
if not os.path.exists(DL_LOCATION):
|
if not os.path.exists(DL_LOCATION):
|
||||||
|
print("Creating output directory:", DL_LOCATION)
|
||||||
os.makedirs(DL_LOCATION)
|
os.makedirs(DL_LOCATION)
|
||||||
|
|
||||||
print("Connecting to Canvas...")
|
if COOKIES_PATH:
|
||||||
canvas = Canvas(API_URL, API_KEY)
|
print("Authenticating with Canvas frontend...")
|
||||||
|
|
||||||
print('\nDownloading user files...')
|
# Test the cookies.
|
||||||
|
cookies = MozillaCookieJar(COOKIES_PATH)
|
||||||
|
cookies.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
|
||||||
|
# Requests takes a dict, not the MozillaCookieJar object.
|
||||||
|
request_cookies = {}
|
||||||
|
for cookie in cookies:
|
||||||
|
request_cookies[cookie.name] = cookie.value
|
||||||
|
|
||||||
|
r = requests.get(f'{API_URL}/profile', headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}, cookies=request_cookies)
|
||||||
|
if r.status_code != 200:
|
||||||
|
print('Failed to fetch Canvas profile: got status code', r.status_code)
|
||||||
|
quit(1)
|
||||||
|
if not r.url.startswith(API_URL):
|
||||||
|
print('Failed to fetch Canvas profile: client was redirected away from Canvas:')
|
||||||
|
print(r.url)
|
||||||
|
quit(1)
|
||||||
|
if 'profileContent__Block' not in r.text:
|
||||||
|
print('Failed to test Canvas profile: could not find an element with the class "profileContent__Block". This could mean that your authentication is incorrect.')
|
||||||
|
quit(1)
|
||||||
|
|
||||||
|
# TODO: log debug status success here
|
||||||
|
else:
|
||||||
|
print('No cookies file specified! No HTML pages will be saved.')
|
||||||
|
|
||||||
|
print("Authenticating with Canvas API...")
|
||||||
|
canvas = Canvas(API_URL, API_KEY)
|
||||||
|
courses = canvas.get_courses(include="term")
|
||||||
|
try:
|
||||||
|
course_count = len(list(courses))
|
||||||
|
except canvasapi.exceptions.InvalidAccessToken as e:
|
||||||
|
try:
|
||||||
|
msg = e.message[0]['message']
|
||||||
|
except:
|
||||||
|
# Something went very wrong.
|
||||||
|
msg = ''
|
||||||
|
print('Failed to fetch courses from the Canvas API:', msg)
|
||||||
|
quit(1)
|
||||||
|
|
||||||
|
print('')
|
||||||
|
|
||||||
|
skip = set(COURSES_TO_SKIP)
|
||||||
|
|
||||||
|
# ==================================================================================================================
|
||||||
|
# Exporting
|
||||||
|
|
||||||
|
print("Downloading courses page...")
|
||||||
|
download_course_html(API_URL, COOKIES_PATH)
|
||||||
|
|
||||||
|
print('Downloading user files...')
|
||||||
download_user_files(canvas, DL_LOCATION / 'User Files')
|
download_user_files(canvas, DL_LOCATION / 'User Files')
|
||||||
print('')
|
print('')
|
||||||
|
|
||||||
all_courses_views = []
|
all_courses_views = []
|
||||||
|
|
||||||
print("Getting list of all courses...")
|
|
||||||
courses = canvas.get_courses(include="term")
|
|
||||||
course_count = len(list(courses))
|
|
||||||
|
|
||||||
skip = set(COURSES_TO_SKIP)
|
|
||||||
|
|
||||||
if COOKIES_PATH:
|
|
||||||
print("Fetching Courses...")
|
|
||||||
download_course_html(API_URL, COOKIES_PATH)
|
|
||||||
|
|
||||||
print('')
|
|
||||||
|
|
||||||
for course in courses:
|
for course in courses:
|
||||||
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
|
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
|
||||||
continue
|
continue
|
||||||
|
@ -69,12 +125,10 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
valid, r = course_view.test_course(API_URL, COOKIE_JAR)
|
valid, r = course_view.test_course(API_URL, COOKIE_JAR)
|
||||||
if not valid:
|
if not valid:
|
||||||
print(f'Invalid course: {course_view.course_id} - {r}')
|
print(f'Invalid course: {course_view.course_id} - {r} - {r.text}')
|
||||||
|
|
||||||
if r.status_code == 401:
|
if r.status_code == 401:
|
||||||
print('Got a bad status code:', r.status_code)
|
# We can't recover from this error.
|
||||||
quit(1)
|
quit(1)
|
||||||
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
course_view.assignments = find_course_assignments(course, USER_ID)
|
course_view.assignments = find_course_assignments(course, USER_ID)
|
||||||
|
@ -84,13 +138,12 @@ if __name__ == "__main__":
|
||||||
course_view.modules = find_course_modules(course, course_view)
|
course_view.modules = find_course_modules(course, course_view)
|
||||||
all_courses_views.append(course_view)
|
all_courses_views.append(course_view)
|
||||||
|
|
||||||
download_course_files(course, course_view)
|
|
||||||
|
|
||||||
download_submission_attachments(course, course_view)
|
|
||||||
|
|
||||||
print('Downloading course home page...')
|
print('Downloading course home page...')
|
||||||
download_course_home_page_html(API_URL, course_view, COOKIES_PATH)
|
download_course_home_page_html(API_URL, course_view, COOKIES_PATH)
|
||||||
|
|
||||||
|
print('Downloading grades...')
|
||||||
|
download_course_grades_page(API_URL, course_view, COOKIES_PATH)
|
||||||
|
|
||||||
download_assignment_pages(API_URL, course_view, COOKIES_PATH, COOKIE_JAR)
|
download_assignment_pages(API_URL, course_view, COOKIES_PATH, COOKIE_JAR)
|
||||||
|
|
||||||
download_course_module_pages(API_URL, course_view, COOKIES_PATH)
|
download_course_module_pages(API_URL, course_view, COOKIES_PATH)
|
||||||
|
@ -99,13 +152,17 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
download_course_discussion_pages(API_URL, course_view, COOKIES_PATH)
|
download_course_discussion_pages(API_URL, course_view, COOKIES_PATH)
|
||||||
|
|
||||||
print("Exporting all course data...")
|
download_course_files(course, course_view)
|
||||||
|
|
||||||
|
download_submission_attachments(course, course_view)
|
||||||
|
|
||||||
|
print("Exporting course metadata...")
|
||||||
export_all_course_data(course_view)
|
export_all_course_data(course_view)
|
||||||
|
|
||||||
if course_count > 1:
|
if course_count > 1:
|
||||||
print('')
|
print('')
|
||||||
|
|
||||||
# Remove elemnts from the course objects that can't be JSON serialized, then format it.
|
# Remove elements from the course objects that can't be JSON serialized, then format it.
|
||||||
json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable=False)), indent=4)
|
json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable=False)), indent=4)
|
||||||
|
|
||||||
all_output_path = os.path.join(DL_LOCATION, "all_output.json")
|
all_output_path = os.path.join(DL_LOCATION, "all_output.json")
|
||||||
|
|
|
@ -2,6 +2,7 @@ import os
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from http.cookiejar import MozillaCookieJar
|
from http.cookiejar import MozillaCookieJar
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import canvasapi
|
import canvasapi
|
||||||
import requests
|
import requests
|
||||||
|
@ -220,3 +221,16 @@ def download_course_module_pages(api_url, course_view, cookies_path):
|
||||||
for _ in as_completed(futures):
|
for _ in as_completed(futures):
|
||||||
bar.update()
|
bar.update()
|
||||||
bar.close()
|
bar.close()
|
||||||
|
|
||||||
|
|
||||||
|
def download_course_grades_page(api_url, course_view, cookies_path):
|
||||||
|
if cookies_path == "":
|
||||||
|
return
|
||||||
|
|
||||||
|
dl_dir = Path(DL_LOCATION, course_view.term, course_view.name)
|
||||||
|
dl_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# TODO: command line arg to prohibit overwrite. Default should overwrite
|
||||||
|
if not (dl_dir / "grades.html").exists():
|
||||||
|
api_target = f'{api_url}/courses/{course_view.course_id}/grades'
|
||||||
|
download_page(api_target, cookies_path, dl_dir, "grades.html")
|
||||||
|
|
|
@ -2,7 +2,9 @@ from pathlib import Path
|
||||||
from subprocess import run
|
from subprocess import run
|
||||||
|
|
||||||
SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file"
|
SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file"
|
||||||
CHROME_PATH = "/usr/bin/chromium-browser"
|
|
||||||
|
# TODO: have this be specified by a required arg.
|
||||||
|
CHROME_PATH = "/usr/bin/google-chrome"
|
||||||
|
|
||||||
|
|
||||||
def add_quotes(s):
|
def add_quotes(s):
|
||||||
|
|
Loading…
Reference in New Issue