canvas-student-data-export/export.py

1008 lines
37 KiB
Python

# built in
import json
import os
import string
# external
from canvasapi import Canvas
from canvasapi.exceptions import ResourceDoesNotExist, Unauthorized
from singlefile import download_page
import dateutil.parser
import jsonpickle
import requests
import yaml
try:
with open("credentials.yaml", 'r') as f:
credentials = yaml.full_load(f)
except OSError:
# Canvas API URL
API_URL = ""
# Canvas API key
API_KEY = ""
# My Canvas User ID
USER_ID = 0000000
# Browser Cookies File
COOKIES_PATH = ""
else:
API_URL = credentials["API_URL"]
API_KEY = credentials["API_KEY"]
USER_ID = credentials["USER_ID"]
COOKIES_PATH = credentials["COOKIES_PATH"]
# Directory in which to download course information to (will be created if not
# present)
DL_LOCATION = "./output"
# List of Course IDs that should be skipped (need to be integers)
COURSES_TO_SKIP = [288290, 512033]
DATE_TEMPLATE = "%B %d, %Y %I:%M %p"
# Max PATH length is 260 characters on Windows. 70 is just an estimate for a reasonable max folder name to prevent the chance of reaching the limit
# Applies to modules, assignments, announcements, and discussions
# If a folder exceeds this limit, a "-" will be added to the end to indicate it was shortened ("..." not valid)
MAX_FOLDER_NAME_SIZE = 70
class moduleItemView():
id = 0
title = ""
content_type = ""
url = ""
external_url = ""
class moduleView():
id = 0
name = ""
items = []
def __init__(self):
self.items = []
class pageView():
id = 0
title = ""
body = ""
created_date = ""
last_updated_date = ""
class topicReplyView():
id = 0
author = ""
posted_date = ""
body = ""
class topicEntryView():
id = 0
author = ""
posted_date = ""
body = ""
topic_replies = []
def __init__(self):
self.topic_replies = []
class discussionView():
id = 0
title = ""
author = ""
posted_date = ""
body = ""
topic_entries = []
url = ""
amount_pages = 0
def __init__(self):
self.topic_entries = []
class submissionView():
id = 0
attachments = []
grade = ""
raw_score = ""
submission_comments = ""
total_possible_points = ""
attempt = 0
user_id = "no-id"
preview_url = ""
ext_url = ""
def __init__(self):
self.attachments = []
class attachmentView():
id = 0
filename = ""
url = ""
class assignmentView():
id = 0
title = ""
description = ""
assigned_date = ""
due_date = ""
submissions = []
html_url = ""
ext_url = ""
updated_url = ""
def __init__(self):
self.submissions = []
class courseView():
course_id = 0
term = ""
course_code = ""
name = ""
assignments = []
announcements = []
discussions = []
modules = []
def __init__(self):
self.assignments = []
self.announcements = []
self.discussions = []
self.modules = []
def makeValidFilename(input_str):
if(not input_str):
return input_str
# Remove invalid characters
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
input_str = input_str.replace("+"," ") # Canvas default for spaces
input_str = input_str.replace(":","-")
input_str = input_str.replace("/","-")
input_str = "".join(c for c in input_str if c in valid_chars)
# Remove leading and trailing whitespace
input_str = input_str.lstrip().rstrip()
# Remove trailing periods
input_str = input_str.rstrip(".")
return input_str
def makeValidFolderPath(input_str):
# Remove invalid characters
valid_chars = "-_.()/ %s%s" % (string.ascii_letters, string.digits)
input_str = input_str.replace("+"," ") # Canvas default for spaces
input_str = input_str.replace(":","-")
input_str = "".join(c for c in input_str if c in valid_chars)
# Remove leading and trailing whitespace, separators
input_str = input_str.lstrip().rstrip().strip("/").strip("\\")
# Remove trailing periods
input_str = input_str.rstrip(".")
# Replace path separators with OS default
input_str=input_str.replace("/",os.sep)
return input_str
def shortenFileName(string, shorten_by) -> str:
if (not string or shorten_by <= 0):
return string
# Shorten string by specified value + 1 for "-" to indicate incomplete file name (trailing periods not allowed)
string = string[:len(string)-(shorten_by + 1)]
string = string.rstrip().rstrip(".").rstrip("-")
string += "-"
return string
def findCourseModules(course, course_view):
modules_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code, "modules")
# Create modules directory if not present
if not os.path.exists(modules_dir):
os.makedirs(modules_dir)
module_views = []
try:
modules = course.get_modules()
for module in modules:
module_view = moduleView()
# ID
module_view.id = module.id if hasattr(module, "id") else ""
# Name
module_view.name = str(module.name) if hasattr(module, "name") else ""
try:
# Get module items
module_items = module.get_module_items()
for module_item in module_items:
module_item_view = moduleItemView()
# ID
module_item_view.id = module_item.id if hasattr(module_item, "id") else 0
# Title
module_item_view.title = str(module_item.title) if hasattr(module_item, "title") else ""
# Type
module_item_view.content_type = str(module_item.type) if hasattr(module_item, "type") else ""
# URL
module_item_view.url = str(module_item.html_url) if hasattr(module_item, "html_url") else ""
# External URL
module_item_view.external_url = str(module_item.external_url) if hasattr(module_item, "external_url") else ""
if module_item_view.content_type == "File":
# If problems arise due to long pathnames, changing module.name to module.id might help
# A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path)
module_name = makeValidFilename(str(module.name))
module_name = shortenFileName(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
module_dir = os.path.join(modules_dir, module_name, "files")
try:
# Create directory for current module if not present
if not os.path.exists(module_dir):
os.makedirs(module_dir)
# Get the file object
module_file = course.get_file(str(module_item.content_id))
# Create path for module file download
module_file_path = os.path.join(module_dir, makeValidFilename(str(module_file.display_name)))
# Download file if it doesn't already exist
if not os.path.exists(module_file_path):
module_file.download(module_file_path)
except Exception as e:
print("Skipping module file download that gave the following error:")
print(e)
module_view.items.append(module_item_view)
except Exception as e:
print("Skipping module item that gave the following error:")
print(e)
module_views.append(module_view)
except Exception as e:
print("Skipping entire module that gave the following error:")
print(e)
return module_views
def downloadCourseFiles(course, course_view):
# file full_name starts with "course files"
dl_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code)
# Create directory if not present
if not os.path.exists(dl_dir):
os.makedirs(dl_dir)
try:
files = course.get_files()
for file in files:
file_folder=course.get_folder(file.folder_id)
folder_dl_dir=os.path.join(dl_dir, makeValidFolderPath(file_folder.full_name))
if not os.path.exists(folder_dl_dir):
os.makedirs(folder_dl_dir)
dl_path = os.path.join(folder_dl_dir, makeValidFilename(str(file.display_name)))
# Download file if it doesn't already exist
if not os.path.exists(dl_path):
print('Downloading: {}'.format(dl_path))
file.download(dl_path)
except Exception as e:
print("Skipping file download that gave the following error:")
print(e)
def download_submission_attachments(course, course_view):
course_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code)
# Create directory if not present
if not os.path.exists(course_dir):
os.makedirs(course_dir)
for assignment in course_view.assignments:
for submission in assignment.submissions:
assignment_title = makeValidFilename(str(assignment.title))
assignment_title = shortenFileName(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
attachment_dir = os.path.join(course_dir, "assignments", assignment_title)
if(len(assignment.submissions)!=1):
attachment_dir = os.path.join(attachment_dir,str(submission.user_id))
if (not os.path.exists(attachment_dir)) and (submission.attachments):
os.makedirs(attachment_dir)
for attachment in submission.attachments:
filepath = os.path.join(attachment_dir, makeValidFilename(str(attachment.id) +
"_" + attachment.filename))
if not os.path.exists(filepath):
print('Downloading attachment: {}'.format(filepath))
r = requests.get(attachment.url, allow_redirects=True)
with open(filepath, 'wb') as f:
f.write(r.content)
else:
print('File already exists: {}'.format(filepath))
def getCoursePageUrls(course):
page_urls = []
try:
# Get all pages
pages = course.get_pages()
for page in pages:
if hasattr(page, "url"):
page_urls.append(str(page.url))
except Exception as e:
if e.message != "Not Found":
print("Skipping page that gave the following error:")
print(e)
return page_urls
def findCoursePages(course):
page_views = []
try:
# Get all page URLs
page_urls = getCoursePageUrls(course)
for url in page_urls:
page = course.get_page(url)
page_view = pageView()
# ID
page_view.id = page.id if hasattr(page, "id") else 0
# Title
page_view.title = str(page.title) if hasattr(page, "title") else ""
# Body
page_view.body = str(page.body) if hasattr(page, "body") else ""
# Date created
page_view.created_date = dateutil.parser.parse(page.created_at).strftime(DATE_TEMPLATE) if \
hasattr(page, "created_at") else ""
# Date last updated
page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime(DATE_TEMPLATE) if \
hasattr(page, "updated_at") else ""
page_views.append(page_view)
except Exception as e:
print("Skipping page download that gave the following error:")
print(e)
return page_views
def findCourseAssignments(course):
assignment_views = []
# Get all assignments
assignments = course.get_assignments()
try:
for assignment in assignments:
# Create a new assignment view
assignment_view = assignmentView()
#ID
assignment_view.id = assignment.id if \
hasattr(assignment, "id") else ""
# Title
assignment_view.title = makeValidFilename(str(assignment.name)) if \
hasattr(assignment, "name") else ""
# Description
assignment_view.description = str(assignment.description) if \
hasattr(assignment, "description") else ""
# Assigned date
assignment_view.assigned_date = assignment.created_at_date.strftime(DATE_TEMPLATE) if \
hasattr(assignment, "created_at_date") else ""
# Due date
assignment_view.due_date = assignment.due_at_date.strftime(DATE_TEMPLATE) if \
hasattr(assignment, "due_at_date") else ""
# HTML Url
assignment_view.html_url = assignment.html_url if \
hasattr(assignment, "html_url") else ""
# External URL
assignment_view.ext_url = str(assignment.url) if \
hasattr(assignment, "url") else ""
# Other URL (more up-to-date)
assignment_view.updated_url = str(assignment.submissions_download_url).split("submissions?")[0] if \
hasattr(assignment, "submissions_download_url") else ""
try:
try: # Download all submissions for entire class
submissions = assignment.get_submissions()
submissions[0] # Trigger Unauthorized if not allowed
except Unauthorized:
print("Not authorized to download entire class submissions for this assignment")
# Download submission for this user only
submissions = [assignment.get_submission(USER_ID)]
submissions[0] #throw error if no submissions found at all but without error
except (ResourceDoesNotExist, NameError, IndexError):
print('Got no submissions from either class or user: {}'.format(USER_ID))
except Exception as e:
print("Failed to retrieve submissions for this assignment")
print(e.__class__.__name__)
else:
try:
for submission in submissions:
sub_view = submissionView()
# Submission ID
sub_view.id = submission.id if \
hasattr(submission, "id") else 0
# My grade
sub_view.grade = str(submission.grade) if \
hasattr(submission, "grade") else ""
# My raw score
sub_view.raw_score = str(submission.score) if \
hasattr(submission, "score") else ""
# Total possible score
sub_view.total_possible_points = str(assignment.points_possible) if \
hasattr(assignment, "points_possible") else ""
# Submission comments
sub_view.submission_comments = str(submission.submission_comments) if \
hasattr(submission, "submission_comments") else ""
# Attempt
sub_view.attempt = submission.attempt if \
hasattr(submission, "attempt") and submission.attempt is not None else 0
# User ID
sub_view.user_id = str(submission.user_id) if \
hasattr(submission, "user_id") else ""
# Submission URL
sub_view.preview_url = str(submission.preview_url) if \
hasattr(submission, "preview_url") else ""
# External URL
sub_view.ext_url = str(submission.url) if \
hasattr(submission, "url") else ""
try:
submission.attachments
except AttributeError:
print('No attachments')
else:
for attachment in submission.attachments:
attach_view = attachmentView()
attach_view.url = attachment["url"]
attach_view.id = attachment["id"]
attach_view.filename = attachment["filename"]
sub_view.attachments.append(attach_view)
assignment_view.submissions.append(sub_view)
except Exception as e:
print("Skipping submission that gave the following error:")
print(e)
assignment_views.append(assignment_view)
except Exception as e:
print("Skipping course assignments that gave the following error:")
print(e)
return assignment_views
def findCourseAnnouncements(course):
announcement_views = []
try:
announcements = course.get_discussion_topics(only_announcements=True)
for announcement in announcements:
discussion_view = getDiscussionView(announcement)
announcement_views.append(discussion_view)
except Exception as e:
print("Skipping announcement that gave the following error:")
print(e)
return announcement_views
def getDiscussionView(discussion_topic):
# Create discussion view
discussion_view = discussionView()
#ID
discussion_view.id = discussion_topic.id if hasattr(discussion_topic, "id") else 0
# Title
discussion_view.title = str(discussion_topic.title) if hasattr(discussion_topic, "title") else ""
# Author
discussion_view.author = str(discussion_topic.user_name) if hasattr(discussion_topic, "user_name") else ""
# Posted date
discussion_view.posted_date = discussion_topic.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(discussion_topic, "created_at_date") else ""
# Body
discussion_view.body = str(discussion_topic.message) if hasattr(discussion_topic, "message") else ""
# URL
discussion_view.url = str(discussion_topic.html_url) if hasattr(discussion_topic, "html_url") else ""
# Keeps track of how many topic_entries there are.
topic_entries_counter = 0
# Topic entries
if hasattr(discussion_topic, "discussion_subentry_count") and discussion_topic.discussion_subentry_count > 0:
# Need to get replies to entries recursively?
discussion_topic_entries = discussion_topic.get_topic_entries()
try:
for topic_entry in discussion_topic_entries:
topic_entries_counter += 1
# Create new discussion view for the topic_entry
topic_entry_view = topicEntryView()
# ID
topic_entry_view.id = topic_entry.id if hasattr(topic_entry, "id") else 0
# Author
topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else ""
# Posted date
topic_entry_view.posted_date = topic_entry.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_entry, "created_at_date") else ""
# Body
topic_entry_view.body = str(topic_entry.message) if hasattr(topic_entry, "message") else ""
# Get this topic's replies
topic_entry_replies = topic_entry.get_replies()
try:
for topic_reply in topic_entry_replies:
# Create new topic reply view
topic_reply_view = topicReplyView()
# ID
topic_reply_view.id = topic_reply.id if hasattr(topic_reply, "id") else 0
# Author
topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else ""
# Posted Date
topic_reply_view.posted_date = topic_reply.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_reply, "created_at_date") else ""
# Body
topic_reply_view.message = str(topic_reply.message) if hasattr(topic_reply, "message") else ""
topic_entry_view.topic_replies.append(topic_reply_view)
except Exception as e:
print("Tried to enumerate discussion topic entry replies but received the following error:")
print(e)
discussion_view.topic_entries.append(topic_entry_view)
except Exception as e:
print("Tried to enumerate discussion topic entries but received the following error:")
print(e)
# Amount of pages
discussion_view.amount_pages = int(topic_entries_counter/50) + 1 # Typically 50 topic entries are stored on a page before it creates another page.
return discussion_view
def findCourseDiscussions(course):
discussion_views = []
try:
discussion_topics = course.get_discussion_topics()
for discussion_topic in discussion_topics:
discussion_view = None
discussion_view = getDiscussionView(discussion_topic)
discussion_views.append(discussion_view)
except Exception as e:
print("Skipping discussion that gave the following error:")
print(e)
return discussion_views
def getCourseView(course):
course_view = courseView()
# Course ID
course_view.course_id = course.id if hasattr(course, "id") else 0
# Course term
course_view.term = makeValidFilename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "")
# Course code
course_view.course_code = makeValidFilename(course.course_code if hasattr(course, "course_code") else "")
# Course name
course_view.name = course.name if hasattr(course, "name") else ""
print("Working on " + course_view.term + ": " + course_view.name)
# Course assignments
print(" Getting assignments")
course_view.assignments = findCourseAssignments(course)
# Course announcements
print(" Getting announcements")
course_view.announcements = findCourseAnnouncements(course)
# Course discussions
print(" Getting discussions")
course_view.discussions = findCourseDiscussions(course)
# Course pages
print(" Getting pages")
course_view.pages = findCoursePages(course)
return course_view
def exportAllCourseData(course_view):
json_str = json.dumps(json.loads(jsonpickle.encode(course_view, unpicklable = False)), indent = 4)
course_output_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code)
# Create directory if not present
if not os.path.exists(course_output_dir):
os.makedirs(course_output_dir)
course_output_path = os.path.join(course_output_dir,
course_view.course_code + ".json")
with open(course_output_path, "w") as out_file:
out_file.write(json_str)
def downloadCourseHTML(api_url, cookies_path):
if(cookies_path == ""):
return
course_dir = DL_LOCATION
if not os.path.exists(course_dir):
os.makedirs(course_dir)
course_list_path = os.path.join(course_dir, "course_list.html")
# Downloads the course list.
if not os.path.exists(course_list_path):
download_page(api_url + "/courses/", cookies_path, course_dir, "course_list.html")
def downloadCourseHomePageHTML(api_url, course_view, cookies_path):
if(cookies_path == ""):
return
dl_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code)
# Create directory if not present
if not os.path.exists(dl_dir):
os.makedirs(dl_dir)
homepage_path = os.path.join(dl_dir, "homepage.html")
# Downloads the course home page.
if not os.path.exists(homepage_path):
download_page(api_url + "/courses/" + str(course_view.course_id), cookies_path, dl_dir, "homepage.html")
def downloadAssignmentPages(api_url, course_view, cookies_path):
if(cookies_path == "" or len(course_view.assignments) == 0):
return
base_assign_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code, "assignments")
# Create directory if not present
if not os.path.exists(base_assign_dir):
os.makedirs(base_assign_dir)
assignment_list_path = os.path.join(base_assign_dir, "assignment_list.html")
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
if not os.path.exists(assignment_list_path):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html")
for assignment in course_view.assignments:
assignment_title = makeValidFilename(str(assignment.title))
assignment_title = shortenFileName(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
assign_dir = os.path.join(base_assign_dir, assignment_title)
# Download an html image of each assignment (includes assignment instructions and other stuff).
# Currently, this will only download the main assignment page and not external pages, this is
# because these external pages are given in a json format. Saving these would require a lot
# more work then normal.
if assignment.html_url != "":
if not os.path.exists(assign_dir):
os.makedirs(assign_dir)
assignment_page_path = os.path.join(assign_dir, "assignment.html")
# Download assignment page, this usually has instructions and etc.
if not os.path.exists(assignment_page_path):
download_page(assignment.html_url, cookies_path, assign_dir, "assignment.html")
for submission in assignment.submissions:
submission_dir = assign_dir
# If theres more then 1 submission, add unique id to download dir
if len(assignment.submissions) != 1:
submission_dir = os.path.join(assign_dir, str(submission.user_id))
if submission.preview_url != "":
if not os.path.exists(submission_dir):
os.makedirs(submission_dir)
submission_page_dir = os.path.join(submission_dir, "submission.html")
# Download submission url, this is typically a more focused page
if not os.path.exists(submission_page_dir):
download_page(submission.preview_url, cookies_path, submission_dir, "submission.html")
# If theres more then 1 attempt, save each attempt in attempts folder
if (submission.attempt != 1 and assignment.updated_url != "" and assignment.html_url != ""
and assignment.html_url.rstrip("/") != assignment.updated_url.rstrip("/")):
submission_dir = os.path.join(assign_dir, "attempts")
if not os.path.exists(submission_dir):
os.makedirs(submission_dir)
# Saves the attempts if multiple were taken, doesn't account for
# different ID's however, as I wasnt able to find out what the url
# for the specific id's attempts would be.
for i in range(submission.attempt):
filename = "attempt_" + str(i+1) + ".html"
submission_page_attempt_dir = os.path.join(submission_dir, filename)
if not os.path.exists(submission_page_attempt_dir):
download_page(assignment.updated_url + "/history?version=" + str(i+1), cookies_path, submission_dir, filename)
def downloadCourseModulePages(api_url, course_view, cookies_path):
if(cookies_path == "" or len(course_view.modules) == 0):
return
modules_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code, "modules")
# Create modules directory if not present
if not os.path.exists(modules_dir):
os.makedirs(modules_dir)
module_list_dir = os.path.join(modules_dir, "modules_list.html")
# Downloads the modules page (possible this is disabled by the teacher)
if not os.path.exists(module_list_dir):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/modules/", COOKIES_PATH, modules_dir, "modules_list.html")
for module in course_view.modules:
for item in module.items:
# If problems arise due to long pathnames, changing module.name to module.id might help, this can also be done with item.title
# A change would also have to be made in findCourseModules(course, course_view)
module_name = makeValidFilename(str(module.name))
module_name = shortenFileName(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
items_dir = os.path.join(modules_dir, module_name)
# Create modules directory if not present
if item.url != "":
if not os.path.exists(items_dir):
os.makedirs(items_dir)
filename = makeValidFilename(str(item.title)) + ".html"
module_item_dir = os.path.join(items_dir, filename)
# Download the module page.
if not os.path.exists(module_item_dir):
download_page(item.url, cookies_path, items_dir, filename)
def downloadCourseAnnouncementPages(api_url, course_view, cookies_path):
if(cookies_path == "" or len(course_view.announcements) == 0):
return
base_announce_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code, "announcements")
# Create directory if not present
if not os.path.exists(base_announce_dir):
os.makedirs(base_announce_dir)
announcement_list_dir = os.path.join(base_announce_dir, "announcement_list.html")
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
if not os.path.exists(announcement_list_dir):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/announcements/", cookies_path, base_announce_dir, "announcement_list.html")
for announcements in course_view.announcements:
announcements_title = makeValidFilename(str(announcements.title))
announcements_title = shortenFileName(announcements_title, len(announcements_title) - MAX_FOLDER_NAME_SIZE)
announce_dir = os.path.join(base_announce_dir, announcements_title)
if announcements.url == "":
continue
if not os.path.exists(announce_dir):
os.makedirs(announce_dir)
# Downloads each page that a discussion takes.
for i in range(announcements.amount_pages):
filename = "announcement_" + str(i+1) + ".html"
announcement_page_dir = os.path.join(announce_dir, filename)
# Download assignment page, this usually has instructions and etc.
if not os.path.exists(announcement_page_dir):
download_page(announcements.url + "/page-" + str(i+1), cookies_path, announce_dir, filename)
def downloadCourseDiscussionPages(api_url, course_view, cookies_path):
if(cookies_path == "" or len(course_view.discussions) == 0):
return
base_discussion_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code, "discussions")
# Create directory if not present
if not os.path.exists(base_discussion_dir):
os.makedirs(base_discussion_dir)
discussion_list_dir = os.path.join(base_discussion_dir, "discussion_list.html")
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
if not os.path.exists(discussion_list_dir):
download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html")
for discussion in course_view.discussions:
discussion_title = makeValidFilename(str(discussion.title))
discussion_title = shortenFileName(discussion_title, len(discussion_title) - MAX_FOLDER_NAME_SIZE)
discussion_dir = os.path.join(base_discussion_dir, discussion_title)
if discussion.url == "":
continue
if not os.path.exists(discussion_dir):
os.makedirs(discussion_dir)
# Downloads each page that a discussion takes.
for i in range(discussion.amount_pages):
filename = "discussion_" + str(i+1) + ".html"
discussion_page_dir = os.path.join(discussion_dir, filename)
# Download assignment page, this usually has instructions and etc.
if not os.path.exists(discussion_page_dir):
download_page(discussion.url + "/page-" + str(i+1), cookies_path, discussion_dir, filename)
if __name__ == "__main__":
print("Welcome to the Canvas Student Data Export Tool\n")
if API_URL == "":
# Canvas API URL
print("We will need your organization's Canvas Base URL. This is "
"probably something like https://{schoolName}.instructure.com)")
API_URL = input("Enter your organization's Canvas Base URL: ")
if API_KEY == "":
# Canvas API key
print("\nWe will need a valid API key for your user. You can generate "
"one in Canvas once you are logged in.")
API_KEY = input("Enter a valid API key for your user: ")
if USER_ID == 0000000:
# My Canvas User ID
print("\nWe will need your Canvas User ID. You can find this by "
"logging in to canvas and then going to this URL in the same "
"browser {yourCanvasBaseUrl}/api/v1/users/self")
USER_ID = input("Enter your Canvas User ID: ")
if COOKIES_PATH == "":
# Cookies path
print("\nWe will need your browsers cookies file. This needs to be "
"exported using another tool. This needs to be a path to a file "
"formatted in the NetScape format. This can be left blank if an html "
"images aren't wanted. ")
COOKIES_PATH = input("Enter your cookies path: ")
print("\nConnecting to canvas\n")
# Initialize a new Canvas object
canvas = Canvas(API_URL, API_KEY)
print("Creating output directory: " + DL_LOCATION + "\n")
# Create directory if not present
if not os.path.exists(DL_LOCATION):
os.makedirs(DL_LOCATION)
all_courses_views = []
print("Getting list of all courses\n")
courses = canvas.get_courses(include="term")
skip = set(COURSES_TO_SKIP)
if (COOKIES_PATH):
print(" Downloading course list page")
downloadCourseHTML(API_URL, COOKIES_PATH)
for course in courses:
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
continue
course_view = getCourseView(course)
all_courses_views.append(course_view)
print(" Downloading all files")
downloadCourseFiles(course, course_view)
print(" Downloading submission attachments")
download_submission_attachments(course, course_view)
print(" Getting modules and downloading module files")
course_view.modules = findCourseModules(course, course_view)
if(COOKIES_PATH):
print(" Downloading course home page")
downloadCourseHomePageHTML(API_URL, course_view, COOKIES_PATH)
print(" Downloading assignment pages")
downloadAssignmentPages(API_URL, course_view, COOKIES_PATH)
print(" Downloading course module pages")
downloadCourseModulePages(API_URL, course_view, COOKIES_PATH)
print(" Downloading course announcements pages")
downloadCourseAnnouncementPages(API_URL, course_view, COOKIES_PATH)
print(" Downloading course discussion pages")
downloadCourseDiscussionPages(API_URL, course_view, COOKIES_PATH)
print(" Exporting all course data")
exportAllCourseData(course_view)
print("Exporting data from all courses combined as one file: "
"all_output.json")
# Awful hack to make the JSON pretty. Decode it with Python stdlib json
# module then re-encode with indentation
json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views,
unpicklable=False)),
indent=4)
all_output_path = os.path.join(DL_LOCATION, "all_output.json")
with open(all_output_path, "w") as out_file:
out_file.write(json_str)
print("\nProcess complete. All canvas data exported!")