canvas-student-data-export/stuff/get_canvas.py

288 lines
13 KiB
Python

import os
from http.cookiejar import MozillaCookieJar
import dateutil.parser
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
from stuff.const import DATE_TEMPLATE, DL_LOCATION, MAX_FOLDER_NAME_SIZE
from stuff.helpers import make_valid_filename, shorten_file_name
from stuff.items import AssignmentView, AttachmentView, DiscussionView, ModuleItemView, ModuleView, PageView, SubmissionView, TopicEntryView, TopicReplyView
def find_course_modules(course, course_view):
modules_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "modules")
# Create modules directory if not present
if not os.path.exists(modules_dir):
os.makedirs(modules_dir)
module_views = []
try:
modules = list(course.get_modules())
for module in tqdm(modules, desc='Fetching Modules and Downloading Files'):
module_view = ModuleView()
module_view.id = module.id if hasattr(module, "id") else ""
module_view.name = str(module.name) if hasattr(module, "name") else ""
try:
# Get module items
module_items = module.get_module_items()
for module_item in module_items:
module_item_view = ModuleItemView()
module_item_view.id = module_item.id if hasattr(module_item, "id") else 0
module_item_view.title = str(module_item.title).replace(' ', ' ') if hasattr(module_item, "title") else ""
module_item_view.content_type = str(module_item.type) if hasattr(module_item, "type") else ""
module_item_view.url = str(module_item.html_url) if hasattr(module_item, "html_url") else ""
module_item_view.external_url = str(module_item.external_url) if hasattr(module_item, "external_url") else ""
if module_item_view.content_type == "File":
# If problems arise due to long pathnames, changing module.name to module.id might help
# A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path)
module_name = make_valid_filename(str(module.name))
module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
module_dir = os.path.join(modules_dir, module_name, "files")
try:
# Create directory for current module if not present
if not os.path.exists(module_dir):
os.makedirs(module_dir)
# Get the file object
module_file = course.get_file(str(module_item.content_id))
# Create path for module file download
module_file_path = os.path.join(module_dir, make_valid_filename(str(module_file.display_name)))
# Download file if it doesn't already exist
if not os.path.exists(module_file_path):
module_file.download(module_file_path)
except Exception as e:
tqdm.write(f"Skipping module file download that gave the following error: {e}")
module_view.items.append(module_item_view)
except Exception as e:
tqdm.write(f"Skipping module file download that gave the following error: {e}")
module_views.append(module_view)
except Exception as e:
print("Skipping entire module that gave the following error:")
print(e)
return module_views
def get_extra_assignment_files(html, cookie_jar: MozillaCookieJar):
soup = BeautifulSoup(html, 'html.parser')
urls = [a['data-api-endpoint'] for a in soup.find_all('a', {'data-api-returntype': 'File'})]
s = requests.Session()
for cookie in cookie_jar:
s.cookies.set(cookie.name, cookie.value)
extra_files = []
for item in urls:
r = s.get(item)
if r.status_code == 404:
continue
r.raise_for_status()
j = r.json()
extra_files.append((j['display_name'], j['url']))
return extra_files
def get_course_page_urls(course):
page_urls = []
try:
pages = list(course.get_pages())
for page in pages:
if hasattr(page, "url"):
page_urls.append(str(page.url))
except Exception as e:
if e.message != "Not Found":
print(f"Skipping page: {e}")
return page_urls
def find_course_pages(course):
page_views = []
try:
page_urls = get_course_page_urls(course)
if not len(page_urls):
return
for url in tqdm(page_urls, desc='Fetching Pages'):
page = course.get_page(url)
page_view = PageView()
page_view.id = page.id if hasattr(page, "id") else 0
page_view.title = str(page.title).replace(' ', ' ') if hasattr(page, "title") else ""
page_view.body = str(page.body) if hasattr(page, "body") else ""
if hasattr(page, "created_at"):
page_view.created_date = dateutil.parser.parse(page.created_at).strftime(DATE_TEMPLATE)
else:
page_view.created_date = ''
if hasattr(page, "updated_at"):
page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime(DATE_TEMPLATE)
else:
page_view.last_updated_date = ''
page_views.append(page_view)
except Exception as e:
print("Skipping page download that gave the following error:")
print(e)
return page_views
def find_course_assignments(course, user_id):
assignment_views = []
# Get all assignments
assignments = list(course.get_assignments())
for assignment in tqdm(assignments, desc='Fetching Assignments'):
assignment_view = AssignmentView()
assignment_view.id = assignment.id if hasattr(assignment, "id") else ""
assignment_view.title = make_valid_filename(str(assignment.name).replace(' ', ' ')) if hasattr(assignment, "name") else ""
assignment_view.description = str(assignment.description) if hasattr(assignment, "description") else ""
assignment_view.assigned_date = assignment.created_at_date.strftime(DATE_TEMPLATE) if hasattr(assignment, "created_at_date") else ""
assignment_view.due_date = assignment.due_at_date.strftime(DATE_TEMPLATE) if hasattr(assignment, "due_at_date") else ""
assignment_view.html_url = assignment.html_url if hasattr(assignment, "html_url") else ""
assignment_view.ext_url = str(assignment.url) if hasattr(assignment, "url") else ""
assignment_view.updated_url = str(assignment.submissions_download_url).split("submissions?")[0] if hasattr(assignment, "submissions_download_url") else ""
# Download submission for this user only
submissions = [assignment.get_submission(user_id)]
if not len(submissions):
raise IndexError(f'No submissions found for assignment: {vars(assignment)}')
try:
for submission in submissions:
sub_view = SubmissionView()
sub_view.id = submission.id if hasattr(submission, "id") else 0
sub_view.grade = str(submission.grade) if hasattr(submission, "grade") else ""
sub_view.raw_score = str(submission.score) if hasattr(submission, "score") else ""
sub_view.total_possible_points = str(assignment.points_possible) if hasattr(assignment, "points_possible") else ""
sub_view.submission_comments = str(submission.submission_comments) if hasattr(submission, "submission_comments") else ""
sub_view.attempt = submission.attempt if hasattr(submission, "attempt") and submission.attempt is not None else 0
sub_view.user_id = str(submission.user_id) if hasattr(submission, "user_id") else ""
sub_view.preview_url = str(submission.preview_url) if hasattr(submission, "preview_url") else ""
sub_view.ext_url = str(submission.url) if hasattr(submission, "url") else ""
try:
submission.attachments
except AttributeError:
print('No attachments')
else:
for attachment in submission.attachments:
attach_view = AttachmentView()
attach_view.url = attachment.url
attach_view.id = attachment.id
attach_view.filename = attachment.filename
sub_view.attachments.append(attach_view)
assignment_view.submissions.append(sub_view)
except Exception as e:
raise
# print("Skipping submission that gave the following error:")
# print(e)
assignment_views.append(assignment_view)
return assignment_views
def find_course_announcements(course):
announcement_views = []
# try:
announcements = list(course.get_discussion_topics(only_announcements=True))
for announcement in tqdm(announcements, desc='Fetching Announcements'):
discussion_view = get_discussion_view(announcement)
announcement_views.append(discussion_view)
# except Exception as e:
# print("Skipping announcement that gave the following error:")
# print(e)
return announcement_views
def get_discussion_view(discussion_topic):
# Create discussion view
discussion_view = DiscussionView()
discussion_view.id = discussion_topic.id if hasattr(discussion_topic, "id") else 0
discussion_view.title = str(discussion_topic.title).replace(' ', ' ') if hasattr(discussion_topic, "title") else ""
discussion_view.author = str(discussion_topic.user_name) if hasattr(discussion_topic, "user_name") else ""
discussion_view.posted_date = discussion_topic.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(discussion_topic, "created_at_date") else ""
discussion_view.body = str(discussion_topic.message) if hasattr(discussion_topic, "message") else ""
discussion_view.url = str(discussion_topic.html_url) if hasattr(discussion_topic, "html_url") else ""
# Keeps track of how many topic_entries there are.
topic_entries_counter = 0
# Topic entries
if hasattr(discussion_topic, "discussion_subentry_count") and discussion_topic.discussion_subentry_count > 0:
# Need to get replies to entries recursively?
discussion_topic_entries = discussion_topic.get_topic_entries()
try:
for topic_entry in discussion_topic_entries:
topic_entries_counter += 1
# Create new discussion view for the topic_entry
topic_entry_view = TopicEntryView()
topic_entry_view.id = topic_entry.id if hasattr(topic_entry, "id") else 0
topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else ""
topic_entry_view.posted_date = topic_entry.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_entry, "created_at_date") else ""
topic_entry_view.body = str(topic_entry.message) if hasattr(topic_entry, "message") else ""
# Get this topic's replies
topic_entry_replies = topic_entry.get_replies()
try:
for topic_reply in topic_entry_replies:
# Create new topic reply view
topic_reply_view = TopicReplyView()
topic_reply_view.id = topic_reply.id if hasattr(topic_reply, "id") else 0
topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else ""
topic_reply_view.posted_date = topic_reply.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_reply, "created_at_date") else ""
topic_reply_view.message = str(topic_reply.message) if hasattr(topic_reply, "message") else ""
topic_entry_view.topic_replies.append(topic_reply_view)
except Exception as e:
print("Tried to enumerate discussion topic entry replies but received the following error:")
print(e)
discussion_view.topic_entries.append(topic_entry_view)
except Exception as e:
print("Tried to enumerate discussion topic entries but received the following error:")
print(e)
# Amount of pages.
# Typically 50 topic entries are stored on a page before it creates another page.
discussion_view.amount_pages = int(topic_entries_counter / 50) + 1
return discussion_view
def find_course_discussions(course):
discussion_views = []
# try:
discussion_topics = list(course.get_discussion_topics())
for discussion_topic in tqdm(discussion_topics, desc='Fetching Discussions'):
discussion_view = get_discussion_view(discussion_topic)
discussion_views.append(discussion_view)
# except Exception as e:
# print("Skipping discussion that gave the following error:")
# print(e)
return discussion_views