diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..600d2d3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.vscode
\ No newline at end of file
diff --git a/README.md b/README.md
deleted file mode 100644
index 343cd12..0000000
--- a/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# canvas-student-data-export
-Python script to download all of your student data from Canvas LMS
diff --git a/LICENSE b/docs/LICENSE
similarity index 100%
rename from LICENSE
rename to docs/LICENSE
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..d0a88d8
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,56 @@
+# Introduction
+The Canvas Student Data Export Tool can export nearly all of a student's data from Instructure Canvas Learning Management System (Canvas LMS).
+This is useful when you are graduating or leaving your college or university, and would like to have a backup of all the data you had in canvas.
+
+The tool exports all of the following data:
+- Course Assignments
+- Course Announcements
+- Course Discussions
+- Course Pages
+- Course Files
+- Course Modules
+
+The tool will export your data in JSON format, and will organize it nicely into folders named for every term of every year.
+Example:
+- Fall 2013
+ - Econ 101
+ - files
+ - modules
+ - Econ 101.json
+ - English 101
+ - files
+ - modules
+ - English 101.json
+- Fall 2014
+- Fall 2015
+- Fall 2016
+- Spring 2014
+- Spring 2015
+- Spring 2016
+- Spring 2017
+- Winter 2014
+- Winter 2015
+- Winter 2016
+- Winter 2017
+- all_output.json
+
+# Getting Started
+## Dependencies
+To run the program, you will need the following dependencies:
+`pip install requests`
+`pip install jsonpickle`
+`pip install canvasapi`
+
+Then run from the command line:
+`python export.py`
+
+## Configuration
+These are the configuration parameters for the program:
+- Canvas API URL
+- Canvas API key
+- Canvas User ID
+- Directory in which to download course information to (will be created if not present)
+- List of Course IDs that should be skipped
+
+# Contribute
+I would love to see this script's functionality expanded and improved! I welcome all pull requests :) Thank you!
\ No newline at end of file
diff --git a/docs/_config.yml b/docs/_config.yml
new file mode 100644
index 0000000..76f68da
--- /dev/null
+++ b/docs/_config.yml
@@ -0,0 +1,2 @@
+theme: jekyll-theme-cayman
+title: Canvas Student Data Export Tool
\ No newline at end of file
diff --git a/docs/_layouts/default.html b/docs/_layouts/default.html
new file mode 100644
index 0000000..711817e
--- /dev/null
+++ b/docs/_layouts/default.html
@@ -0,0 +1,53 @@
+
+
+
+
+ {% if site.google_analytics %}
+
+
+ {% endif %}
+
+
+ {% seo %}
+
+
+
+
+
+
+
+
+
+
+
+ {{ content }}
+
+
+
+
+
+
\ No newline at end of file
diff --git a/export.py b/export.py
new file mode 100644
index 0000000..fa2c9d8
--- /dev/null
+++ b/export.py
@@ -0,0 +1,487 @@
+from canvasapi import Canvas
+import requests
+import traceback
+import jsonpickle
+import json
+import dateutil.parser
+import os
+import string
+
+# Canvas API URL
+API_URL = ""
+# Canvas API key
+API_KEY = ""
+# My Canvas User ID
+USER_ID = 0000000
+# Directory in which to download course information to (will be created if not present)
+DL_LOCATION = "./output"
+# List of Course IDs that should be skipped
+COURSES_TO_SKIP = []
+
+class moduleItemView():
+ title = ""
+ content_type = ""
+ external_url = ""
+
+class moduleView():
+ name = ""
+ items = []
+
+ def __init__(self):
+ self.items = []
+
+class pageView():
+ title = ""
+ body = ""
+ created_date = ""
+ last_updated_date = ""
+
+class topicReplyView():
+ author = ""
+ posted_date = ""
+ body = ""
+
+class topicEntryView():
+ author = ""
+ posted_date = ""
+ body = ""
+ topic_replies = []
+
+ def __init__(self):
+ self.topic_replies = []
+
+class discussionView():
+ title = ""
+ author = ""
+ posted_date = ""
+ body = ""
+ topic_entries = []
+
+ def __init__(self):
+ self.topic_entries = []
+
+class submissionView():
+ grade = ""
+ raw_score = ""
+ total_possible_points = ""
+ submission_comments = ""
+
+class assignmentView():
+ title = ""
+ description = ""
+ assigned_date = ""
+ due_date = ""
+ submission = None
+
+ def __init__(self):
+ self.submission = submissionView()
+
+class courseView():
+ term = ""
+ course_code = ""
+ name = ""
+ assignments = []
+ announcements = []
+ discussions = []
+
+ def __init__(self):
+ self.assignments = []
+ self.announcements = []
+ self.discussions = []
+
+def makeValidFilename(input_str):
+ # Remove invalid characters
+ valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
+ input_str = "".join(c for c in input_str if c in valid_chars)
+
+ # Remove leading and trailing whitespace
+ input_str = input_str.lstrip().rstrip()
+
+ return input_str
+
+def findCourseModules(course, course_view):
+ modules_dir = DL_LOCATION + "/" + course_view.term + "/" + course_view.course_code + "/modules"
+
+ # Create modules directory if not present
+ if not os.path.exists(modules_dir):
+ os.makedirs(modules_dir)
+
+ module_views = []
+
+ try:
+ modules = course.get_modules()
+
+ for module in modules:
+ module_view = moduleView()
+
+ # Name
+ module_view.name = str(module.name) if hasattr(module, "name") else ""
+
+ try:
+ # Get module items
+ module_items = module.get_module_items()
+
+ for module_item in module_items:
+ module_item_view = moduleItemView()
+
+ # Title
+ module_item_view.title = str(module_item.title) if hasattr(module_item, "title") else ""
+
+ # Type
+ module_item_view.content_type = str(module_item.type) if hasattr(module_item, "type") else ""
+
+ # External URL
+ module_item_view.external_url = str(module_item.external_url) if hasattr(module_item, "external_url") else ""
+
+ if module_item_view.content_type == "File":
+ module_dir = modules_dir + "/" + makeValidFilename(str(module.name))
+
+ try:
+ # Create directory for current module if not present
+ if not os.path.exists(module_dir):
+ os.makedirs(module_dir)
+
+ # Get the file object
+ module_file = course.get_file(str(module_item.content_id))
+
+ # Create path for module file download
+ module_file_path = module_dir + "/" + makeValidFilename(str(module_file.display_name))
+
+ # Download file if it doesn't already exist
+ if not os.path.exists(module_file_path):
+ module_file.download(module_file_path)
+ except Exception as e:
+ print("Skipping module file download that gave the following error:")
+ print(e)
+
+ module_view.items.append(module_item_view)
+ except Exception as e:
+ print("Skipping module item that gave the following error:")
+ print(e)
+
+ module_views.append(module_view)
+
+ except Exception as e:
+ print("Skipping entire module that gave the following error:")
+ print(e)
+
+ return module_views
+
+def downloadCourseFiles(course, course_view):
+ dl_dir = DL_LOCATION + "/" + course_view.term + "/" + course_view.course_code + "/files"
+
+ # Create directory if not present
+ if not os.path.exists(dl_dir):
+ os.makedirs(dl_dir)
+
+ try:
+ files = course.get_files()
+
+ for file in files:
+ dl_path = dl_dir + "/" + makeValidFilename(str(file.display_name))
+
+ # Download file if it doesn't already exist
+ if not os.path.exists(dl_path):
+ file.download(dl_path)
+ except Exception as e:
+ print("Skipping file download that gave the following error:")
+ print(e)
+
+def getCoursePageUrls(course):
+ page_urls = []
+
+ try:
+ # Get all pages
+ pages = course.get_pages()
+
+ for page in pages:
+ if hasattr(page, "url"):
+ page_urls.append(str(page.url))
+ except Exception as e:
+ if e.message != "Not Found":
+ print("Skipping page that gave the following error:")
+ print(e)
+
+ return page_urls
+
+def findCoursePages(course):
+ page_views = []
+
+ try:
+ # Get all page URLs
+ page_urls = getCoursePageUrls(course)
+
+ for url in page_urls:
+ page = course.get_page(url)
+
+ page_view = pageView()
+
+ # Title
+ page_view.title = str(page.title) if hasattr(page, "title") else ""
+ # Body
+ page_view.body = str(page.body) if hasattr(page, "body") else ""
+ # Date created
+ page_view.created_date = dateutil.parser.parse(page.created_at).strftime("%B %d, %Y %I:%M %p") if hasattr(page, "created_at") else ""
+ # Date last updated
+ page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime("%B %d, %Y %I:%M %p") if hasattr(page, "updated_at") else ""
+
+ page_views.append(page_view)
+ except Exception as e:
+ print("Skipping page download that gave the following error:")
+ print(e)
+
+ return page_views
+
+def findCourseAssignments(course):
+ assignment_views = []
+
+ try:
+ # Get all assignments
+ assignments = course.get_assignments()
+
+ for assignment in assignments:
+ # Create a new assignment view
+ assignment_view = assignmentView()
+
+ # Title
+ assignment_view.title = str(assignment.name) if hasattr(assignment, "name") else ""
+ # Description
+ assignment_view.description = str(assignment.description) if hasattr(assignment, "description") else ""
+ # Assigned date
+ assignment_view.assigned_date = assignment.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(assignment, "created_at_date") else ""
+ # Due date
+ assignment_view.due_date = assignment.due_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(assignment, "due_at_date") else ""
+
+ # Get my user"s submission object
+ submission = assignment.get_submission(USER_ID)
+
+ # Create a new submission view
+ assignment_view.submission = submissionView()
+
+ # My grade
+ assignment_view.submission.grade = str(submission.grade) if hasattr(submission, "grade") else ""
+ # My raw score
+ assignment_view.submission.raw_score = str(submission.score) if hasattr(submission, "score") else ""
+ # Total possible score
+ assignment_view.submission.total_possible_points = str(assignment.points_possible) if hasattr(assignment, "points_possible") else ""
+ # Submission comments
+ assignment_view.submission.submission_comments = str(submission.submission_comments) if hasattr(submission, "submission_comments") else ""
+
+ assignment_views.append(assignment_view)
+ except Exception as e:
+ print("Skipping assignment that gave the following error:")
+ print(e)
+
+ return assignment_views
+
+def findCourseAnnouncements(course):
+ announcement_views = []
+
+ try:
+ announcements = course.get_discussion_topics(only_announcements=True)
+
+ for announcement in announcements:
+ discussion_view = getDiscussionView(announcement)
+
+ announcement_views.append(discussion_view)
+ except Exception as e:
+ print("Skipping announcement that gave the following error:")
+ print(e)
+
+ return announcement_views
+
+def getDiscussionView(discussion_topic):
+ # Create discussion view
+ discussion_view = discussionView()
+
+ # Title
+ discussion_view.title = str(discussion_topic.title) if hasattr(discussion_topic, "title") else ""
+ # Author
+ discussion_view.author = str(discussion_topic.user_name) if hasattr(discussion_topic, "user_name") else ""
+ # Posted date
+ discussion_view.posted_date = discussion_topic.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(discussion_topic, "created_at_date") else ""
+ # Body
+ discussion_view.body = str(discussion_topic.message) if hasattr(discussion_topic, "message") else ""
+ # Topic entries
+ if hasattr(discussion_topic, "discussion_subentry_count") and discussion_topic.discussion_subentry_count > 0:
+ # Need to get replies to entries recursively?
+
+ discussion_topic_entries = discussion_topic.get_topic_entries()
+
+ try:
+ for topic_entry in discussion_topic_entries:
+ # Create new discussion view for the topic_entry
+ topic_entry_view = topicEntryView()
+
+ # Author
+ topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else ""
+ # Posted date
+ topic_entry_view.posted_date = topic_entry.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_entry, "created_at_date") else ""
+ # Body
+ topic_entry_view.body = str(topic_entry.message) if hasattr(topic_entry, "message") else ""
+
+ # Get this topic's replies
+ topic_entry_replies = topic_entry.get_replies()
+
+ try:
+ for topic_reply in topic_entry_replies:
+ # Create new topic reply view
+ topic_reply_view = topicReplyView()
+
+ # Author
+ topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else ""
+ # Posted Date
+ topic_reply_view.posted_date = topic_reply.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_reply, "created_at_date") else ""
+ # Body
+ topic_reply_view.message = str(topic_reply.message) if hasattr(topic_reply, "message") else ""
+
+ topic_entry_view.topic_replies.append(topic_reply_view)
+ except Exception as e:
+ print("Tried to enumerate discussion topic entry replies but received the following error:")
+ print(e)
+
+ discussion_view.topic_entries.append(topic_entry_view)
+ except Exception as e:
+ print("Tried to enumerate discussion topic entries but received the following error:")
+ print(e)
+
+ return discussion_view
+
+def findCourseDiscussions(course):
+ discussion_views = []
+
+ try:
+ discussion_topics = course.get_discussion_topics()
+
+ for discussion_topic in discussion_topics:
+ discussion_view = None
+ discussion_view = getDiscussionView(discussion_topic)
+
+ discussion_views.append(discussion_view)
+ except Exception as e:
+ print("Skipping discussion that gave the following error:")
+ print(e)
+
+ return discussion_views
+
+def getCourseView(course):
+ course_view = courseView()
+
+ # Course term
+ course_view.term = course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else ""
+
+ # Course code
+ course_view.course_code = course.course_code if hasattr(course, "course_code") else ""
+
+ # Course name
+ course_view.name = course.name if hasattr(course, "name") else ""
+
+ print("Working on " + course_view.term + ": " + course_view.name)
+
+ # Course assignments
+ print(" Getting assignments")
+ course_view.assignments = findCourseAssignments(course)
+
+ # Course announcements
+ print(" Getting announcements")
+ course_view.announcements = findCourseAnnouncements(course)
+
+ # Course discussions
+ print(" Getting discussions")
+ course_view.discussions = findCourseDiscussions(course)
+
+ # Course pages
+ print(" Getting pages")
+ course_view.pages = findCoursePages(course)
+
+ return course_view
+
+def exportAllCourseData(course_view):
+ json_str = json.dumps(json.loads(jsonpickle.encode(course_view, unpicklable = False)), indent = 4)
+
+ course_output_dir = DL_LOCATION + "/" + course_view.term + "/" + course_view.course_code
+
+ # Create directory if not present
+ if not os.path.exists(course_output_dir):
+ os.makedirs(course_output_dir)
+
+ course_output_path = course_output_dir + "/" + course_view.course_code + ".json"
+
+ with open(course_output_path, "w") as out_file:
+ out_file.write(json_str)
+
+def main():
+ print("Welcome to the Canvas Student Data Export Tool\n")
+
+ # Canvas API URL
+ print("We will need your organization's Canvas Base URL. This is probably something like https://{schoolName}.instructure.com)")
+ global API_URL
+ API_URL = input("Enter your organization's Canvas Base URL: ")
+
+ # Canvas API key
+ print("\nWe will need a valid API key for your user. You can generate one in Canvas once you are logged in.")
+ global API_KEY
+ API_KEY = input("Enter a valid API key for your user: ")
+
+ # My Canvas User ID
+ print("\nWe will need your Canvas User ID. You can find this by logging in to canvas and then going to this URL in the same browser {yourCanvasBaseUrl}/api/v1/users/self")
+ global USER_ID
+ USER_ID = input("Enter your Canvas User ID: ")
+
+ print("\nConnecting to canvas\n")
+
+ # Initialize a new Canvas object
+ canvas = Canvas(API_URL, API_KEY)
+
+ print("Creating output directory: " + DL_LOCATION + "\n")
+ # Create directory if not present
+ if not os.path.exists(DL_LOCATION):
+ os.makedirs(DL_LOCATION)
+
+ all_courses_views = []
+
+ try:
+ print("Getting list of all courses\n")
+ courses = canvas.get_courses(include="term")
+
+ # I am not authorized to access course 1083083
+ skip = set(COURSES_TO_SKIP)
+
+ for course in courses:
+ if course.id in skip:
+ continue
+
+ course_view = getCourseView(course)
+
+ all_courses_views.append(course_view)
+
+ print(" Downloading all files")
+ downloadCourseFiles(course, course_view)
+
+ print(" Getting modules and downloading module files")
+ course_view.modules = findCourseModules(course, course_view)
+
+ print(" Exporting all course data")
+ exportAllCourseData(course_view)
+ except Exception as e:
+ print("Skipping entire course that gave the following error:")
+ print(e)
+
+ print("Exporting data from all courses combined as one file: all_output.json")
+ # Awful hack to make the JSON pretty. Decode it with Python stdlib json module then re-encode with indentation
+ json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable = False)), indent = 4)
+
+ all_output_path = DL_LOCATION + "/all_output.json"
+
+ with open(all_output_path, "w") as out_file:
+ out_file.write(json_str)
+
+ print("\nProcess complete. All canvas data exported!")
+
+if __name__ == "__main__":
+ try:
+ main()
+ except Exception as e:
+ print("Exiting due to uncaught exception:")
+ print(e)
+ print(traceback.format_exc())
\ No newline at end of file