Script seems to be working and downloading all submissions.

This commit is contained in:
Jason K. Moore 2020-07-08 13:51:04 -07:00
parent 2c54cbd18b
commit fa7e2f5046
1 changed files with 212 additions and 106 deletions

232
export.py
View File

@ -2,26 +2,38 @@
import json import json
import os import os
import string import string
import traceback
# external # external
from canvasapi import Canvas from canvasapi import Canvas
from canvasapi.exceptions import ResourceDoesNotExist
import dateutil.parser import dateutil.parser
import jsonpickle import jsonpickle
import requests import requests
import yaml
try:
with open("credentials.yaml", 'r') as f:
credentials = yaml.load(f)
except OSError:
# Canvas API URL
API_URL = ""
# Canvas API key
API_KEY = ""
# My Canvas User ID
USER_ID = 0000000
else:
API_URL = credentials["API_URL"]
API_KEY = credentials["API_KEY"]
USER_ID = credentials["USER_ID"]
# Canvas API URL
API_URL = ""
# Canvas API key
API_KEY = ""
# My Canvas User ID
USER_ID = 0000000
# Directory in which to download course information to (will be created if not # Directory in which to download course information to (will be created if not
# present) # present)
DL_LOCATION = "./output" DL_LOCATION = "./output"
# List of Course IDs that should be skipped (need to be integers) # List of Course IDs that should be skipped (need to be integers)
COURSES_TO_SKIP = [288290, 512033] COURSES_TO_SKIP = [288290, 512033]
DATE_TEMPLATE = "%B %d, %Y %I:%M %p"
class moduleItemView(): class moduleItemView():
title = "" title = ""
@ -72,29 +84,42 @@ class discussionView():
class submissionView(): class submissionView():
attachments = []
grade = "" grade = ""
raw_score = "" raw_score = ""
total_possible_points = ""
submission_comments = "" submission_comments = ""
user_id = None # integer total_possible_points = ""
attachments = [] user_id = "no-id"
def __init__(self):
self.attachments = []
self.grade = ""
self.raw_score = ""
self.submission_comments = ""
self.total_possible_points = ""
self.user_id = None # integer
class attachmentView(): class attachmentView():
filename = "" filename = ""
url = "" url = ""
def __init__(self):
self.filename = ""
self.url = ""
class assignmentView(): class assignmentView():
title = "" title = ""
description = "" description = ""
assigned_date = "" assigned_date = ""
due_date = "" due_date = ""
submissions = {}
submission = None submission = None
submissions = []
def __init__(self): def __init__(self):
self.submission = submissionView() self.submission = submissionView()
self.submissions = []
class courseView(): class courseView():
@ -123,7 +148,8 @@ def makeValidFilename(input_str):
def findCourseModules(course, course_view): def findCourseModules(course, course_view):
modules_dir = DL_LOCATION + "/" + course_view.term + "/" + course_view.course_code + "/modules" modules_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code, "modules")
# Create modules directory if not present # Create modules directory if not present
if not os.path.exists(modules_dir): if not os.path.exists(modules_dir):
@ -190,8 +216,10 @@ def findCourseModules(course, course_view):
return module_views return module_views
def downloadCourseFiles(course, course_view): def downloadCourseFiles(course, course_view):
dl_dir = DL_LOCATION + "/" + course_view.term + "/" + course_view.course_code + "/files" dl_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code, "files")
# Create directory if not present # Create directory if not present
if not os.path.exists(dl_dir): if not os.path.exists(dl_dir):
@ -201,7 +229,8 @@ def downloadCourseFiles(course, course_view):
files = course.get_files() files = course.get_files()
for file in files: for file in files:
dl_path = dl_dir + "/" + makeValidFilename(str(file.display_name)) dl_path = os.path.join(dl_dir,
makeValidFilename(str(file.display_name)))
# Download file if it doesn't already exist # Download file if it doesn't already exist
if not os.path.exists(dl_path): if not os.path.exists(dl_path):
@ -211,6 +240,30 @@ def downloadCourseFiles(course, course_view):
print("Skipping file download that gave the following error:") print("Skipping file download that gave the following error:")
print(e) print(e)
def download_submission_attachments(course, course_view):
course_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code)
# Create directory if not present
if not os.path.exists(course_dir):
os.makedirs(course_dir)
for assignment in course_view.assignments:
for submission in assignment.submissions:
attachment_dir = os.path.join(course_dir, assignment.title,
str(submission.user_id))
if not os.path.exists(attachment_dir):
os.makedirs(attachment_dir)
for attachment in submission.attachments:
filepath = os.path.join(attachment_dir, attachment.filename)
if not os.path.exists(filepath):
print('Downloading attachment: {}'.format(attachment.filename))
r = requests.get(attachment.url, allow_redirects=True)
with open(filepath, 'wb') as f:
f.write(r.content)
def getCoursePageUrls(course): def getCoursePageUrls(course):
page_urls = [] page_urls = []
@ -228,6 +281,7 @@ def getCoursePageUrls(course):
return page_urls return page_urls
def findCoursePages(course): def findCoursePages(course):
page_views = [] page_views = []
@ -245,9 +299,17 @@ def findCoursePages(course):
# Body # Body
page_view.body = str(page.body) if hasattr(page, "body") else "" page_view.body = str(page.body) if hasattr(page, "body") else ""
# Date created # Date created
page_view.created_date = dateutil.parser.parse(page.created_at).strftime("%B %d, %Y %I:%M %p") if hasattr(page, "created_at") else "" if hasattr(page, "created_at"):
page_view.created_date = dateutil.parser.parse(
page.created_at).strftime(DATE_TEMPLATE)
else:
page_view.created_date = ""
# Date last updated # Date last updated
page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime("%B %d, %Y %I:%M %p") if hasattr(page, "updated_at") else "" if hasattr(page, "updated_at"):
page_view.last_updated_date = dateutil.parser.parse(
page.updated_at).strftime(DATE_TEMPLATE)
else:
page_view.last_updated_date = ""
page_views.append(page_view) page_views.append(page_view)
except Exception as e: except Exception as e:
@ -256,37 +318,75 @@ def findCoursePages(course):
return page_views return page_views
def findCourseAssignments(course): def findCourseAssignments(course):
assignment_views = [] assignment_views = []
try:
# Get all assignments # Get all assignments
assignments = course.get_assignments() assignments = course.get_assignments()
for assignment in assignments: for assignment in assignments:
print(assignment)
# Create a new assignment view # Create a new assignment view
assignment_view = assignmentView() assignment_view = assignmentView()
# Title # Title
assignment_view.title = str(assignment.name) if hasattr(assignment, "name") else "" if hasattr(assignment, "name"):
assignment_view.title = str(assignment.name)
else:
assignment_view.title = ""
# Description # Description
assignment_view.description = str(assignment.description) if hasattr(assignment, "description") else "" if hasattr(assignment, "description"):
assignment_view.description = str(assignment.description)
else:
assignment_view.description = ""
# Assigned date # Assigned date
assignment_view.assigned_date = assignment.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(assignment, "created_at_date") else "" if hasattr(assignment, "created_at_date"):
assignment_view.assigned_date = assignment.created_at_date.strftime(DATE_TEMPLATE)
else:
assignment_view.assigned_date = ""
# Due date # Due date
assignment_view.due_date = assignment.due_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(assignment, "due_at_date") else "" if hasattr(assignment, "due_at_date"):
assignment_view.due_date = assignment.due_at_date.strftime(DATE_TEMPLATE)
else:
assignment_view.due_date = ""
# Download all submissions # Download all submissions
try: try:
submissions = assignment.get_submissions() submissions = assignment.get_submissions()
# TODO : Figure out the exact error raised
except: except:
print("Got no submissions for this assignment") print("Got no submissions for this assignment")
else: else:
print(submissions)
for submission in submissions: for submission in submissions:
print(submission)
sub_view = submissionView() sub_view = submissionView()
# My grade
if hasattr(submission, "grade"):
sub_view.grade = str(submission.grade)
else:
sub_view.grade = ""
# My raw score
if hasattr(submission, "score"):
sub_view.raw_score = str(submission.score)
else:
sub_view.raw_score = ""
# Total possible score
if hasattr(assignment, "points_possible"):
sub_view.total_possible_points = str(assignment.points_possible)
else:
sub_view.total_possible_points = ""
# Submission comments
if hasattr(submission, "submission_comments"):
sub_view.submission_comments = str(submission.submission_comments)
else:
sub_view.submission_comments = ""
if hasattr(submission, "user_id"):
sub_view.user_id = str(submission.user_id)
else:
sub_view.user_id = "no-id"
try: try:
submission.attachments submission.attachments
except AttributeError: except AttributeError:
@ -294,14 +394,18 @@ def findCourseAssignments(course):
else: else:
for attachment in submission.attachments: for attachment in submission.attachments:
attach_view = attachmentView() attach_view = attachmentView()
attach_view.url = attachment.url attach_view.url = attachment["url"]
attach_view.filename = attachment.filename attach_view.filename = attachment["filename"]
sub_view.attachments.append sub_view.attachments.append(attach_view)
print(attachment["url"]) assignment_view.submissions.append(sub_view)
# The following is only useful if you are a student in the class.
# Get my user"s submission object # Get my user"s submission object
try:
submission = assignment.get_submission(USER_ID) submission = assignment.get_submission(USER_ID)
except ResourceDoesNotExist:
print('No submission for user: {}'.format(USER_ID))
else:
# Create a new submission view # Create a new submission view
assignment_view.submission = submissionView() assignment_view.submission = submissionView()
@ -317,13 +421,9 @@ def findCourseAssignments(course):
assignment_views.append(assignment_view) assignment_views.append(assignment_view)
except Exception as e:
print("Skipping assignment that gave the following error:")
print(e)
return assignment_views return assignment_views
def findCourseAnnouncements(course): def findCourseAnnouncements(course):
announcement_views = [] announcement_views = []
@ -340,6 +440,7 @@ def findCourseAnnouncements(course):
return announcement_views return announcement_views
def getDiscussionView(discussion_topic): def getDiscussionView(discussion_topic):
# Create discussion view # Create discussion view
discussion_view = discussionView() discussion_view = discussionView()
@ -397,6 +498,7 @@ def getDiscussionView(discussion_topic):
return discussion_view return discussion_view
def findCourseDiscussions(course): def findCourseDiscussions(course):
discussion_views = [] discussion_views = []
@ -414,6 +516,7 @@ def findCourseDiscussions(course):
return discussion_views return discussion_views
def getCourseView(course): def getCourseView(course):
course_view = courseView() course_view = courseView()
@ -446,37 +549,46 @@ def getCourseView(course):
return course_view return course_view
def exportAllCourseData(course_view): def exportAllCourseData(course_view):
json_str = json.dumps(json.loads(jsonpickle.encode(course_view, unpicklable = False)), indent = 4) json_str = json.dumps(json.loads(jsonpickle.encode(course_view, unpicklable = False)), indent = 4)
course_output_dir = DL_LOCATION + "/" + course_view.term + "/" + course_view.course_code course_output_dir = os.path.join(DL_LOCATION, course_view.term,
course_view.course_code)
# Create directory if not present # Create directory if not present
if not os.path.exists(course_output_dir): if not os.path.exists(course_output_dir):
os.makedirs(course_output_dir) os.makedirs(course_output_dir)
course_output_path = course_output_dir + "/" + course_view.course_code + ".json" course_output_path = os.path.join(course_output_dir,
course_view.course_code + ".json")
with open(course_output_path, "w") as out_file: with open(course_output_path, "w") as out_file:
out_file.write(json_str) out_file.write(json_str)
def main():
if __name__ == "__main__":
print("Welcome to the Canvas Student Data Export Tool\n") print("Welcome to the Canvas Student Data Export Tool\n")
if API_URL == "":
# Canvas API URL # Canvas API URL
print("We will need your organization's Canvas Base URL. This is probably something like https://{schoolName}.instructure.com)") print("We will need your organization's Canvas Base URL. This is "
global API_URL "probably something like https://{schoolName}.instructure.com)")
#API_URL = input("Enter your organization's Canvas Base URL: ") API_URL = input("Enter your organization's Canvas Base URL: ")
if API_KEY == "":
# Canvas API key # Canvas API key
print("\nWe will need a valid API key for your user. You can generate one in Canvas once you are logged in.") print("\nWe will need a valid API key for your user. You can generate "
global API_KEY "one in Canvas once you are logged in.")
#API_KEY = input("Enter a valid API key for your user: ") API_KEY = input("Enter a valid API key for your user: ")
if USER_ID == 0000000:
# My Canvas User ID # My Canvas User ID
print("\nWe will need your Canvas User ID. You can find this by logging in to canvas and then going to this URL in the same browser {yourCanvasBaseUrl}/api/v1/users/self") print("\nWe will need your Canvas User ID. You can find this by "
global USER_ID "logging in to canvas and then going to this URL in the same "
#USER_ID = input("Enter your Canvas User ID: ") "browser {yourCanvasBaseUrl}/api/v1/users/self")
USER_ID = input("Enter your Canvas User ID: ")
print("\nConnecting to canvas\n") print("\nConnecting to canvas\n")
@ -490,11 +602,9 @@ def main():
all_courses_views = [] all_courses_views = []
try:
print("Getting list of all courses\n") print("Getting list of all courses\n")
courses = canvas.get_courses(include="term") courses = canvas.get_courses(include="term")
# I am not authorized to access course 1083083
skip = set(COURSES_TO_SKIP) skip = set(COURSES_TO_SKIP)
for course in courses: for course in courses:
@ -508,30 +618,26 @@ def main():
print(" Downloading all files") print(" Downloading all files")
downloadCourseFiles(course, course_view) downloadCourseFiles(course, course_view)
print(" Downloading submission attachments")
download_submission_attachments(course, course_view)
print(" Getting modules and downloading module files") print(" Getting modules and downloading module files")
course_view.modules = findCourseModules(course, course_view) course_view.modules = findCourseModules(course, course_view)
print(" Exporting all course data") print(" Exporting all course data")
exportAllCourseData(course_view) exportAllCourseData(course_view)
except Exception as e:
print("Skipping entire course that gave the following error:")
print(e)
print("Exporting data from all courses combined as one file: all_output.json") print("Exporting data from all courses combined as one file: "
# Awful hack to make the JSON pretty. Decode it with Python stdlib json module then re-encode with indentation "all_output.json")
json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable = False)), indent = 4) # Awful hack to make the JSON pretty. Decode it with Python stdlib json
# module then re-encode with indentation
json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views,
unpicklable=False)),
indent=4)
all_output_path = DL_LOCATION + "/all_output.json" all_output_path = os.path.join(DL_LOCATION, "all_output.json")
with open(all_output_path, "w") as out_file: with open(all_output_path, "w") as out_file:
out_file.write(json_str) out_file.write(json_str)
print("\nProcess complete. All canvas data exported!") print("\nProcess complete. All canvas data exported!")
if __name__ == "__main__":
try:
main()
except Exception as e:
print("Exiting due to uncaught exception:")
print(e)
print(traceback.format_exc())