From b96f3dcbc79809a4ed5835f4306707cf2405b554 Mon Sep 17 00:00:00 2001
From: David Katsandres <dkatsandres@hotmail.com>
Date: Thu, 15 Aug 2019 22:38:16 -0700
Subject: [PATCH] Add project files.

---
 .gitignore                 |   1 +
 README.md                  |   2 -
 LICENSE => docs/LICENSE    |   0
 docs/README.md             |  56 +++++
 docs/_config.yml           |   2 +
 docs/_layouts/default.html |  53 ++++
 export.py                  | 487 +++++++++++++++++++++++++++++++++++++
 7 files changed, 599 insertions(+), 2 deletions(-)
 create mode 100644 .gitignore
 delete mode 100644 README.md
 rename LICENSE => docs/LICENSE (100%)
 create mode 100644 docs/README.md
 create mode 100644 docs/_config.yml
 create mode 100644 docs/_layouts/default.html
 create mode 100644 export.py
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..600d2d3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.vscode
\ No newline at end of file
diff --git a/README.md b/README.md
deleted file mode 100644
index 343cd12..0000000
--- a/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# canvas-student-data-export
-Python script to download all of your student data from Canvas LMS
diff --git a/LICENSE b/docs/LICENSE
similarity index 100%
rename from LICENSE
rename to docs/LICENSE
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..d0a88d8
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,56 @@
+# Introduction
+The Canvas Student Data Export Tool can export nearly all of a student's data from Instructure Canvas Learning Management System (Canvas LMS).  
+This is useful when you are graduating or leaving your college or university, and would like to have a backup of all the data you had in canvas.  
+
+The tool exports all of the following data:  
+- Course Assignments
+- Course Announcements
+- Course Discussions
+- Course Pages
+- Course Files
+- Course Modules
+
+The tool will export your data in JSON format, and will organize it nicely into folders named for every term of every year.  
+Example:  
+- Fall 2013
+  - Econ 101
+    - files
+    - modules
+    - Econ 101.json
+  - English 101
+    - files
+    - modules
+    - English 101.json
+- Fall 2014
+- Fall 2015
+- Fall 2016
+- Spring 2014
+- Spring 2015
+- Spring 2016
+- Spring 2017
+- Winter 2014
+- Winter 2015
+- Winter 2016
+- Winter 2017
+- all_output.json
+
+# Getting Started
+## Dependencies
+To run the program, you will need the following dependencies:  
+`pip install requests`  
+`pip install jsonpickle`  
+`pip install canvasapi`  
+
+Then run from the command line:  
+`python export.py`
+
+## Configuration
+These are the configuration parameters for the program:
+- Canvas API URL
+- Canvas API key
+- Canvas User ID
+- Directory in which to download course information to (will be created if not present)
+- List of Course IDs that should be skipped
+
+# Contribute
+I would love to see this script's functionality expanded and improved! I welcome all pull requests :) Thank you!  
\ No newline at end of file
diff --git a/docs/_config.yml b/docs/_config.yml
new file mode 100644
index 0000000..76f68da
--- /dev/null
+++ b/docs/_config.yml
@@ -0,0 +1,2 @@
+theme: jekyll-theme-cayman
+title: Canvas Student Data Export Tool
\ No newline at end of file
diff --git a/docs/_layouts/default.html b/docs/_layouts/default.html
new file mode 100644
index 0000000..711817e
--- /dev/null
+++ b/docs/_layouts/default.html
@@ -0,0 +1,53 @@
+<!DOCTYPE html>
+<html lang="{{ site.lang | default: "en-US" }}">
+
+<head>
+    {% if site.google_analytics %}
+    <script async src="https://www.googletagmanager.com/gtag/js?id={{ site.google_analytics }}"></script>
+    <script>
+        window.dataLayer = window.dataLayer || [];
+        function gtag() { dataLayer.push(arguments); }
+        gtag('js', new Date());
+        gtag('config', '{{ site.google_analytics }}');
+    </script>
+    {% endif %}
+    <meta charset="UTF-8">
+
+    {% seo %}
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <meta name="theme-color" content="#157878">
+    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
+    <meta name="google-site-verification" content="R4dGdXPrQzegVGKrzBh1IKMPP5iTCKGEVMWSQT-Z-48" />
+    <link rel="stylesheet" href="{{ '/assets/css/style.css?v=' | append: site.github.build_revision | relative_url }}">
+<script nonce="" src="https://assets.evulid.cc/js/evulid-matomo.js"></script><noscript><p><img src="https://mato.evulid.cc/matomo.php?idsite=8&amp;rec=1" style="border:0" alt="" /></p></noscript></head>
+
+<body>
+    <header class="page-header" role="banner">
+        <h1 class="project-name">{{ site.title | default: site.github.repository_name }}</h1>
+        <h2 class="project-tagline">
+            {{ page.description | default: site.description | default: site.github.project_tagline }}</h2>
+        {% if site.github.is_project_page %}
+        <a href="{{ site.github.repository_url }}" class="btn">View on GitHub</a>
+        {% endif %}
+        {% if site.show_downloads %}
+        <a href="{{ site.github.zip_url }}" class="btn">Download .zip</a>
+        <a href="{{ site.github.tar_url }}" class="btn">Download .tar.gz</a>
+        {% endif %}
+    </header>
+
+    <main id="content" class="main-content" role="main">
+        {{ content }}
+
+        <footer class="site-footer">
+            {% if site.github.is_project_page %}
+            <span class="site-footer-owner"><a
+                    href="{{ site.github.repository_url }}">{{ site.github.repository_name }}</a> is maintained by <a
+                    href="{{ site.github.owner_url }}">{{ site.github.owner_name }}</a>.</span>
+            {% endif %}
+            <span class="site-footer-credits">This page was generated by <a href="https://pages.github.com">GitHub
+                    Pages</a>.</span>
+        </footer>
+    </main>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/export.py b/export.py
new file mode 100644
index 0000000..fa2c9d8
--- /dev/null
+++ b/export.py
@@ -0,0 +1,487 @@
+from canvasapi import Canvas
+import requests
+import traceback
+import jsonpickle
+import json
+import dateutil.parser
+import os
+import string
+
+# Canvas API URL
+API_URL = ""
+# Canvas API key
+API_KEY = ""
+# My Canvas User ID
+USER_ID = 0000000
+# Directory in which to download course information to (will be created if not present)
+DL_LOCATION = "./output"
+# List of Course IDs that should be skipped
+COURSES_TO_SKIP = []
+
+class moduleItemView():
+    title = ""
+    content_type = ""
+    external_url = ""
+
+class moduleView():
+    name = ""
+    items = []
+
+    def __init__(self):
+        self.items = []
+
+class pageView():
+    title = ""
+    body = ""
+    created_date = ""
+    last_updated_date = ""
+
+class topicReplyView():
+    author = ""
+    posted_date = ""
+    body = ""
+
+class topicEntryView():
+    author = ""
+    posted_date = ""
+    body = ""
+    topic_replies = []
+
+    def __init__(self):
+        self.topic_replies = []
+
+class discussionView():
+    title = ""
+    author = ""
+    posted_date = ""
+    body = ""
+    topic_entries = []
+
+    def __init__(self):
+        self.topic_entries = []
+
+class submissionView():
+    grade = ""
+    raw_score = ""
+    total_possible_points = ""
+    submission_comments = ""
+
+class assignmentView():
+    title = ""
+    description = ""
+    assigned_date = ""
+    due_date = ""
+    submission = None
+
+    def __init__(self):
+        self.submission = submissionView()
+
+class courseView():
+    term = ""
+    course_code = ""
+    name = ""
+    assignments = []
+    announcements = []
+    discussions = []
+
+    def __init__(self):
+        self.assignments = []
+        self.announcements = []
+        self.discussions = []
+
+def makeValidFilename(input_str):
+    # Remove invalid characters
+    valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
+    input_str = "".join(c for c in input_str if c in valid_chars)
+
+    # Remove leading and trailing whitespace
+    input_str = input_str.lstrip().rstrip()
+
+    return input_str
+
+def findCourseModules(course, course_view):
+    modules_dir = DL_LOCATION + "/" + course_view.term + "/" + course_view.course_code + "/modules"
+
+    # Create modules directory if not present
+    if not os.path.exists(modules_dir):
+        os.makedirs(modules_dir)
+
+    module_views = []
+
+    try:
+        modules = course.get_modules()
+
+        for module in modules:
+            module_view = moduleView()
+
+            # Name
+            module_view.name = str(module.name) if hasattr(module, "name") else ""
+
+            try:
+                # Get module items
+                module_items = module.get_module_items()
+
+                for module_item in module_items:
+                    module_item_view = moduleItemView()
+
+                    # Title
+                    module_item_view.title = str(module_item.title) if hasattr(module_item, "title") else ""
+
+                    # Type
+                    module_item_view.content_type = str(module_item.type) if hasattr(module_item, "type") else ""
+
+                    # External URL
+                    module_item_view.external_url = str(module_item.external_url) if hasattr(module_item, "external_url") else ""
+
+                    if module_item_view.content_type == "File":
+                        module_dir = modules_dir + "/" + makeValidFilename(str(module.name))
+
+                        try:
+                            # Create directory for current module if not present
+                            if not os.path.exists(module_dir):
+                                os.makedirs(module_dir)
+
+                            # Get the file object
+                            module_file = course.get_file(str(module_item.content_id))
+
+                            # Create path for module file download
+                            module_file_path = module_dir + "/" + makeValidFilename(str(module_file.display_name))
+
+                            # Download file if it doesn't already exist
+                            if not os.path.exists(module_file_path):
+                                module_file.download(module_file_path)
+                        except Exception as e:
+                            print("Skipping module file download that gave the following error:")
+                            print(e)
+
+                    module_view.items.append(module_item_view)
+            except Exception as e:
+                print("Skipping module item that gave the following error:")
+                print(e)
+
+            module_views.append(module_view)
+            
+    except Exception as e:
+        print("Skipping entire module that gave the following error:")
+        print(e)
+
+    return module_views
+
+def downloadCourseFiles(course, course_view):
+    dl_dir = DL_LOCATION + "/" + course_view.term + "/" + course_view.course_code + "/files"
+
+    # Create directory if not present
+    if not os.path.exists(dl_dir):
+        os.makedirs(dl_dir)
+
+    try:
+        files = course.get_files()
+
+        for file in files:
+            dl_path = dl_dir + "/" + makeValidFilename(str(file.display_name))
+
+            # Download file if it doesn't already exist
+            if not os.path.exists(dl_path):
+                file.download(dl_path)
+    except Exception as e:
+        print("Skipping file download that gave the following error:")
+        print(e)
+
+def getCoursePageUrls(course):
+    page_urls = []
+
+    try:
+        # Get all pages
+        pages = course.get_pages()
+
+        for page in pages:
+            if hasattr(page, "url"):
+                page_urls.append(str(page.url))
+    except Exception as e:
+        if e.message != "Not Found":
+            print("Skipping page that gave the following error:")
+            print(e)
+    
+    return page_urls
+
+def findCoursePages(course):
+    page_views = []
+
+    try:
+        # Get all page URLs
+        page_urls = getCoursePageUrls(course)
+
+        for url in page_urls:
+            page = course.get_page(url)
+
+            page_view = pageView()
+
+            # Title
+            page_view.title = str(page.title) if hasattr(page, "title") else ""
+            # Body
+            page_view.body = str(page.body) if hasattr(page, "body") else ""
+            # Date created
+            page_view.created_date = dateutil.parser.parse(page.created_at).strftime("%B %d, %Y %I:%M %p") if hasattr(page, "created_at") else ""
+            # Date last updated
+            page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime("%B %d, %Y %I:%M %p") if hasattr(page, "updated_at") else ""
+
+            page_views.append(page_view)
+    except Exception as e:
+        print("Skipping page download that gave the following error:")
+        print(e)
+    
+    return page_views
+
+def findCourseAssignments(course):
+    assignment_views = []
+
+    try:
+        # Get all assignments
+        assignments = course.get_assignments()
+
+        for assignment in assignments:
+            # Create a new assignment view
+            assignment_view = assignmentView()
+
+            # Title
+            assignment_view.title = str(assignment.name) if hasattr(assignment, "name") else ""
+            # Description
+            assignment_view.description = str(assignment.description) if hasattr(assignment, "description") else ""
+            # Assigned date
+            assignment_view.assigned_date = assignment.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(assignment, "created_at_date") else ""
+            # Due date
+            assignment_view.due_date = assignment.due_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(assignment, "due_at_date") else ""
+
+            # Get my user"s submission object
+            submission = assignment.get_submission(USER_ID)
+
+            # Create a new submission view
+            assignment_view.submission = submissionView()
+
+            # My grade
+            assignment_view.submission.grade = str(submission.grade) if hasattr(submission, "grade") else ""
+            # My raw score
+            assignment_view.submission.raw_score = str(submission.score) if hasattr(submission, "score") else ""
+            # Total possible score
+            assignment_view.submission.total_possible_points = str(assignment.points_possible) if hasattr(assignment, "points_possible") else ""
+            # Submission comments
+            assignment_view.submission.submission_comments = str(submission.submission_comments) if hasattr(submission, "submission_comments") else ""
+
+            assignment_views.append(assignment_view)
+    except Exception as e:
+        print("Skipping assignment that gave the following error:")
+        print(e)
+
+    return assignment_views
+
+def findCourseAnnouncements(course):
+    announcement_views = []
+
+    try:
+        announcements = course.get_discussion_topics(only_announcements=True)
+
+        for announcement in announcements:
+            discussion_view = getDiscussionView(announcement)
+
+            announcement_views.append(discussion_view)
+    except Exception as e:
+        print("Skipping announcement that gave the following error:")
+        print(e)
+    
+    return announcement_views
+
+def getDiscussionView(discussion_topic):
+    # Create discussion view
+    discussion_view = discussionView()
+
+    # Title
+    discussion_view.title = str(discussion_topic.title) if hasattr(discussion_topic, "title") else ""
+    # Author
+    discussion_view.author = str(discussion_topic.user_name) if hasattr(discussion_topic, "user_name") else ""
+    # Posted date
+    discussion_view.posted_date = discussion_topic.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(discussion_topic, "created_at_date") else ""
+    # Body
+    discussion_view.body = str(discussion_topic.message) if hasattr(discussion_topic, "message") else ""
+    # Topic entries
+    if hasattr(discussion_topic, "discussion_subentry_count") and discussion_topic.discussion_subentry_count > 0:
+        # Need to get replies to entries recursively?
+
+        discussion_topic_entries = discussion_topic.get_topic_entries()
+
+        try:
+            for topic_entry in discussion_topic_entries:
+                # Create new discussion view for the topic_entry
+                topic_entry_view = topicEntryView()
+                
+                # Author
+                topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else ""
+                # Posted date
+                topic_entry_view.posted_date = topic_entry.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_entry, "created_at_date") else ""
+                # Body
+                topic_entry_view.body = str(topic_entry.message) if hasattr(topic_entry, "message") else ""
+
+                # Get this topic's replies
+                topic_entry_replies = topic_entry.get_replies()
+
+                try:
+                    for topic_reply in topic_entry_replies:
+                        # Create new topic reply view
+                        topic_reply_view = topicReplyView()
+
+                        # Author
+                        topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else ""
+                        # Posted Date
+                        topic_reply_view.posted_date = topic_reply.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_reply, "created_at_date") else ""
+                        # Body
+                        topic_reply_view.message = str(topic_reply.message) if hasattr(topic_reply, "message") else ""
+
+                        topic_entry_view.topic_replies.append(topic_reply_view)
+                except Exception as e:
+                    print("Tried to enumerate discussion topic entry replies but received the following error:")
+                    print(e)
+
+                discussion_view.topic_entries.append(topic_entry_view)
+        except Exception as e:
+            print("Tried to enumerate discussion topic entries but received the following error:")
+            print(e)
+
+    return discussion_view
+
+def findCourseDiscussions(course):
+    discussion_views = []
+
+    try:
+        discussion_topics = course.get_discussion_topics()
+
+        for discussion_topic in discussion_topics:
+            discussion_view = None
+            discussion_view = getDiscussionView(discussion_topic)
+
+            discussion_views.append(discussion_view)
+    except Exception as e:
+        print("Skipping discussion that gave the following error:")
+        print(e)
+
+    return discussion_views
+
+def getCourseView(course):
+    course_view = courseView()
+
+    # Course term
+    course_view.term = course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else ""
+
+    # Course code
+    course_view.course_code = course.course_code if hasattr(course, "course_code") else ""
+
+    # Course name
+    course_view.name = course.name if hasattr(course, "name") else ""
+
+    print("Working on " + course_view.term + ": " + course_view.name)
+
+    # Course assignments
+    print("  Getting assignments")
+    course_view.assignments = findCourseAssignments(course)
+
+    # Course announcements
+    print("  Getting announcements")
+    course_view.announcements = findCourseAnnouncements(course)
+
+    # Course discussions
+    print("  Getting discussions")
+    course_view.discussions = findCourseDiscussions(course)
+
+    # Course pages
+    print("  Getting pages")
+    course_view.pages = findCoursePages(course)
+
+    return course_view
+
+def exportAllCourseData(course_view):
+    json_str = json.dumps(json.loads(jsonpickle.encode(course_view, unpicklable = False)), indent = 4)
+
+    course_output_dir = DL_LOCATION + "/" + course_view.term + "/" + course_view.course_code
+
+    # Create directory if not present
+    if not os.path.exists(course_output_dir):
+        os.makedirs(course_output_dir)
+
+    course_output_path = course_output_dir + "/" + course_view.course_code + ".json"
+
+    with open(course_output_path, "w") as out_file:
+        out_file.write(json_str)
+
+def main():
+    print("Welcome to the Canvas Student Data Export Tool\n")
+
+    # Canvas API URL
+    print("We will need your organization's Canvas Base URL. This is probably something like https://{schoolName}.instructure.com)")
+    global API_URL
+    API_URL = input("Enter your organization's Canvas Base URL: ")
+
+    # Canvas API key
+    print("\nWe will need a valid API key for your user. You can generate one in Canvas once you are logged in.")
+    global API_KEY
+    API_KEY = input("Enter a valid API key for your user: ")
+    
+    # My Canvas User ID
+    print("\nWe will need your Canvas User ID. You can find this by logging in to canvas and then going to this URL in the same browser {yourCanvasBaseUrl}/api/v1/users/self")
+    global USER_ID
+    USER_ID = input("Enter your Canvas User ID: ")
+
+    print("\nConnecting to canvas\n")
+        
+    # Initialize a new Canvas object
+    canvas = Canvas(API_URL, API_KEY)
+
+    print("Creating output directory: " + DL_LOCATION + "\n")
+    # Create directory if not present
+    if not os.path.exists(DL_LOCATION):
+        os.makedirs(DL_LOCATION)
+    
+    all_courses_views = []
+
+    try:
+        print("Getting list of all courses\n")
+        courses = canvas.get_courses(include="term")
+
+        # I am not authorized to access course 1083083
+        skip = set(COURSES_TO_SKIP)
+
+        for course in courses:
+            if course.id in skip:
+                continue
+
+            course_view = getCourseView(course)
+
+            all_courses_views.append(course_view)
+
+            print("  Downloading all files")
+            downloadCourseFiles(course, course_view)
+
+            print("  Getting modules and downloading module files")
+            course_view.modules = findCourseModules(course, course_view)
+
+            print("  Exporting all course data")
+            exportAllCourseData(course_view)
+    except Exception as e:
+        print("Skipping entire course that gave the following error:")
+        print(e)
+    
+    print("Exporting data from all courses combined as one file: all_output.json")
+    # Awful hack to make the JSON pretty. Decode it with Python stdlib json module then re-encode with indentation
+    json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable = False)), indent = 4)
+
+    all_output_path = DL_LOCATION + "/all_output.json"
+
+    with open(all_output_path, "w") as out_file:
+        out_file.write(json_str)
+
+    print("\nProcess complete. All canvas data exported!")
+
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        print("Exiting due to uncaught exception:")
+        print(e)
+        print(traceback.format_exc())
\ No newline at end of file