Added support to download canvas webpages (#7)
* fixed bug with loading cred yaml and added to gitignore * added single file dependency * Begun adding support for singlefile, added to classes and created new input. Also cleaned up code a bit. * Created singlefile.py to download webpages. * added function to download course home page. * Added course home page downloading. * Added downloading of assignment pages. * Added downloading for the rest of the information * Update README.md * Update README.md * Added checks to prevent redownloading of a page. * Fixed bug with trailing periods, Fixed folder naming bug, Fixed bug where pages were being downloaded to cookies folder. * Fixed some small issues, fixed comment and changed module folder name back to title * Update README.md * Fixed makeValidFilename back to makeValidFolderPath resolves https://github.com/davekats/canvas-student-data-export/pull/7#pullrequestreview-852487782
This commit is contained in:
parent
b82cccc54a
commit
e4a9bc6597
|
@ -1,2 +1,7 @@
|
||||||
.vscode
|
.vscode
|
||||||
output/
|
__pycache__/
|
||||||
|
node_modules/
|
||||||
|
output/
|
||||||
|
|
||||||
|
credentials.yaml
|
||||||
|
cookies.txt
|
||||||
|
|
|
@ -9,6 +9,8 @@ The tool exports all of the following data:
|
||||||
- Course Pages
|
- Course Pages
|
||||||
- Course Files
|
- Course Files
|
||||||
- Course Modules
|
- Course Modules
|
||||||
|
- SingleFile HTML of Assignments, Announcements, Discussions, Modules
|
||||||
|
|
||||||
|
|
||||||
The tool will export your data in JSON format, and will organize it nicely into folders named for every term of every year.
|
The tool will export your data in JSON format, and will organize it nicely into folders named for every term of every year.
|
||||||
Example:
|
Example:
|
||||||
|
@ -44,8 +46,12 @@ To run the program, you will need the following dependencies:
|
||||||
`pip install python-dateutil`
|
`pip install python-dateutil`
|
||||||
`pip install PyYAML`
|
`pip install PyYAML`
|
||||||
|
|
||||||
|
`npm i github:gildas-lormeau/SingleFile`
|
||||||
|
|
||||||
You can install these dependencies using
|
You can install these dependencies using
|
||||||
`pip install -r requirements.txt`
|
`pip install -r requirements.txt`
|
||||||
|
AND
|
||||||
|
`npm i`
|
||||||
|
|
||||||
Then run from the command line:
|
Then run from the command line:
|
||||||
`python export.py`
|
`python export.py`
|
||||||
|
@ -55,9 +61,12 @@ These are the configuration parameters for the program:
|
||||||
- Canvas API URL - this is the URL of your institution, for example `https://example.instructure.com`
|
- Canvas API URL - this is the URL of your institution, for example `https://example.instructure.com`
|
||||||
- Canvas API key - this can be created by going to Canvas and navigating to `Account` > `Settings` > `Approved Integrations` > `New Access Token`
|
- Canvas API key - this can be created by going to Canvas and navigating to `Account` > `Settings` > `Approved Integrations` > `New Access Token`
|
||||||
- Canvas User ID - this can be found at `https://example.instructure.com/api/v1/users/self` in the `id` field
|
- Canvas User ID - this can be found at `https://example.instructure.com/api/v1/users/self` in the `id` field
|
||||||
|
- Path to Cookies File - file needs to be in netscape format, you can get your cookies via a tool like "Get Cookies.txt" by Rahul Shaw. This can also be left blank if an html images are unwanted.
|
||||||
- Directory in which to download course information to (will be created if not present)
|
- Directory in which to download course information to (will be created if not present)
|
||||||
- List of Course IDs that should be skipped
|
- List of Course IDs that should be skipped
|
||||||
|
|
||||||
|
If single file fails to find your browser, you can set a path in singlefile.py. If you also want to run additional singlefile arguments that can also be done there.
|
||||||
|
|
||||||
### Loading credentials from a file
|
### Loading credentials from a file
|
||||||
To avoid manually entering credentials every time you run the program, you can create a `credentials.yaml` file in the same directory as the script that has the following fields:
|
To avoid manually entering credentials every time you run the program, you can create a `credentials.yaml` file in the same directory as the script that has the following fields:
|
||||||
|
|
||||||
|
@ -65,6 +74,7 @@ To avoid manually entering credentials every time you run the program, you can c
|
||||||
API_URL: < URL of your institution >
|
API_URL: < URL of your institution >
|
||||||
API_KEY: < API Key from Canvas >
|
API_KEY: < API Key from Canvas >
|
||||||
USER_ID: < User ID from Canvas >
|
USER_ID: < User ID from Canvas >
|
||||||
|
COOKIES_PATH: < Path to cookies file >
|
||||||
```
|
```
|
||||||
|
|
||||||
You can then run the script as normal:
|
You can then run the script as normal:
|
||||||
|
|
461
export.py
461
export.py
|
@ -5,8 +5,10 @@ import string
|
||||||
|
|
||||||
# external
|
# external
|
||||||
from canvasapi import Canvas
|
from canvasapi import Canvas
|
||||||
from canvasapi.exceptions import ResourceDoesNotExist
|
from canvasapi.exceptions import ResourceDoesNotExist, Unauthorized
|
||||||
from canvasapi.exceptions import Unauthorized
|
|
||||||
|
from singlefile import download_page
|
||||||
|
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
import jsonpickle
|
import jsonpickle
|
||||||
import requests
|
import requests
|
||||||
|
@ -14,7 +16,7 @@ import yaml
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open("credentials.yaml", 'r') as f:
|
with open("credentials.yaml", 'r') as f:
|
||||||
credentials = yaml.load(f)
|
credentials = yaml.full_load(f)
|
||||||
except OSError:
|
except OSError:
|
||||||
# Canvas API URL
|
# Canvas API URL
|
||||||
API_URL = ""
|
API_URL = ""
|
||||||
|
@ -22,10 +24,13 @@ except OSError:
|
||||||
API_KEY = ""
|
API_KEY = ""
|
||||||
# My Canvas User ID
|
# My Canvas User ID
|
||||||
USER_ID = 0000000
|
USER_ID = 0000000
|
||||||
|
# Browser Cookies File
|
||||||
|
COOKIES_PATH = ""
|
||||||
else:
|
else:
|
||||||
API_URL = credentials["API_URL"]
|
API_URL = credentials["API_URL"]
|
||||||
API_KEY = credentials["API_KEY"]
|
API_KEY = credentials["API_KEY"]
|
||||||
USER_ID = credentials["USER_ID"]
|
USER_ID = credentials["USER_ID"]
|
||||||
|
COOKIES_PATH = credentials["COOKIES_PATH"]
|
||||||
|
|
||||||
# Directory in which to download course information to (will be created if not
|
# Directory in which to download course information to (will be created if not
|
||||||
# present)
|
# present)
|
||||||
|
@ -37,12 +42,18 @@ DATE_TEMPLATE = "%B %d, %Y %I:%M %p"
|
||||||
|
|
||||||
|
|
||||||
class moduleItemView():
|
class moduleItemView():
|
||||||
|
id = 0
|
||||||
|
|
||||||
title = ""
|
title = ""
|
||||||
content_type = ""
|
content_type = ""
|
||||||
|
|
||||||
|
url = ""
|
||||||
external_url = ""
|
external_url = ""
|
||||||
|
|
||||||
|
|
||||||
class moduleView():
|
class moduleView():
|
||||||
|
id = 0
|
||||||
|
|
||||||
name = ""
|
name = ""
|
||||||
items = []
|
items = []
|
||||||
|
|
||||||
|
@ -51,6 +62,8 @@ class moduleView():
|
||||||
|
|
||||||
|
|
||||||
class pageView():
|
class pageView():
|
||||||
|
id = 0
|
||||||
|
|
||||||
title = ""
|
title = ""
|
||||||
body = ""
|
body = ""
|
||||||
created_date = ""
|
created_date = ""
|
||||||
|
@ -58,12 +71,16 @@ class pageView():
|
||||||
|
|
||||||
|
|
||||||
class topicReplyView():
|
class topicReplyView():
|
||||||
|
id = 0
|
||||||
|
|
||||||
author = ""
|
author = ""
|
||||||
posted_date = ""
|
posted_date = ""
|
||||||
body = ""
|
body = ""
|
||||||
|
|
||||||
|
|
||||||
class topicEntryView():
|
class topicEntryView():
|
||||||
|
id = 0
|
||||||
|
|
||||||
author = ""
|
author = ""
|
||||||
posted_date = ""
|
posted_date = ""
|
||||||
body = ""
|
body = ""
|
||||||
|
@ -74,70 +91,82 @@ class topicEntryView():
|
||||||
|
|
||||||
|
|
||||||
class discussionView():
|
class discussionView():
|
||||||
|
id = 0
|
||||||
|
|
||||||
title = ""
|
title = ""
|
||||||
author = ""
|
author = ""
|
||||||
posted_date = ""
|
posted_date = ""
|
||||||
body = ""
|
body = ""
|
||||||
topic_entries = []
|
topic_entries = []
|
||||||
|
|
||||||
|
url = ""
|
||||||
|
amount_pages = 0
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.topic_entries = []
|
self.topic_entries = []
|
||||||
|
|
||||||
|
|
||||||
class submissionView():
|
class submissionView():
|
||||||
|
id = 0
|
||||||
|
|
||||||
attachments = []
|
attachments = []
|
||||||
grade = ""
|
grade = ""
|
||||||
raw_score = ""
|
raw_score = ""
|
||||||
submission_comments = ""
|
submission_comments = ""
|
||||||
total_possible_points = ""
|
total_possible_points = ""
|
||||||
|
attempt = 0
|
||||||
user_id = "no-id"
|
user_id = "no-id"
|
||||||
|
|
||||||
|
preview_url = ""
|
||||||
|
ext_url = ""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.attachments = []
|
self.attachments = []
|
||||||
self.grade = ""
|
|
||||||
self.raw_score = ""
|
|
||||||
self.submission_comments = ""
|
|
||||||
self.total_possible_points = ""
|
|
||||||
self.user_id = None # integer
|
|
||||||
|
|
||||||
|
|
||||||
class attachmentView():
|
class attachmentView():
|
||||||
filename = ""
|
|
||||||
id = 0
|
id = 0
|
||||||
|
|
||||||
|
filename = ""
|
||||||
url = ""
|
url = ""
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.filename = ""
|
|
||||||
self.id = 0
|
|
||||||
self.url = ""
|
|
||||||
|
|
||||||
|
|
||||||
class assignmentView():
|
class assignmentView():
|
||||||
|
id = 0
|
||||||
|
|
||||||
title = ""
|
title = ""
|
||||||
description = ""
|
description = ""
|
||||||
assigned_date = ""
|
assigned_date = ""
|
||||||
due_date = ""
|
due_date = ""
|
||||||
submissions = []
|
submissions = []
|
||||||
|
|
||||||
|
html_url = ""
|
||||||
|
ext_url = ""
|
||||||
|
updated_url = ""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.submissions = []
|
self.submissions = []
|
||||||
|
|
||||||
|
|
||||||
class courseView():
|
class courseView():
|
||||||
|
course_id = 0
|
||||||
|
|
||||||
term = ""
|
term = ""
|
||||||
course_code = ""
|
course_code = ""
|
||||||
name = ""
|
name = ""
|
||||||
assignments = []
|
assignments = []
|
||||||
announcements = []
|
announcements = []
|
||||||
discussions = []
|
discussions = []
|
||||||
|
modules = []
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.assignments = []
|
self.assignments = []
|
||||||
self.announcements = []
|
self.announcements = []
|
||||||
self.discussions = []
|
self.discussions = []
|
||||||
|
self.modules = []
|
||||||
|
|
||||||
def makeValidFilename(input_str):
|
def makeValidFilename(input_str):
|
||||||
|
if(not input_str):
|
||||||
|
return input_str
|
||||||
|
|
||||||
# Remove invalid characters
|
# Remove invalid characters
|
||||||
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
|
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
|
||||||
input_str = input_str.replace("+"," ") # Canvas default for spaces
|
input_str = input_str.replace("+"," ") # Canvas default for spaces
|
||||||
|
@ -148,6 +177,12 @@ def makeValidFilename(input_str):
|
||||||
# Remove leading and trailing whitespace
|
# Remove leading and trailing whitespace
|
||||||
input_str = input_str.lstrip().rstrip()
|
input_str = input_str.lstrip().rstrip()
|
||||||
|
|
||||||
|
# Remove trailing periods
|
||||||
|
input_str = input_str.rstrip(".")
|
||||||
|
|
||||||
|
##Splits strings to prevent extremely long names
|
||||||
|
#input_str=input_str[:40]
|
||||||
|
|
||||||
return input_str
|
return input_str
|
||||||
|
|
||||||
def makeValidFolderPath(input_str):
|
def makeValidFolderPath(input_str):
|
||||||
|
@ -160,11 +195,18 @@ def makeValidFolderPath(input_str):
|
||||||
# Remove leading and trailing whitespace, separators
|
# Remove leading and trailing whitespace, separators
|
||||||
input_str = input_str.lstrip().rstrip().strip("/").strip("\\")
|
input_str = input_str.lstrip().rstrip().strip("/").strip("\\")
|
||||||
|
|
||||||
|
# Remove trailing periods
|
||||||
|
input_str = input_str.rstrip(".")
|
||||||
|
|
||||||
# Replace path separators with OS default
|
# Replace path separators with OS default
|
||||||
input_str=input_str.replace("/",os.sep)
|
input_str=input_str.replace("/",os.sep)
|
||||||
|
|
||||||
|
##Splits strings to prevent extremely long names
|
||||||
|
#input_str=input_str[:40]
|
||||||
|
|
||||||
return input_str
|
return input_str
|
||||||
|
|
||||||
|
|
||||||
def findCourseModules(course, course_view):
|
def findCourseModules(course, course_view):
|
||||||
modules_dir = os.path.join(DL_LOCATION, course_view.term,
|
modules_dir = os.path.join(DL_LOCATION, course_view.term,
|
||||||
course_view.course_code, "modules")
|
course_view.course_code, "modules")
|
||||||
|
@ -181,6 +223,9 @@ def findCourseModules(course, course_view):
|
||||||
for module in modules:
|
for module in modules:
|
||||||
module_view = moduleView()
|
module_view = moduleView()
|
||||||
|
|
||||||
|
# ID
|
||||||
|
module_view.id = module.id if hasattr(module, "id") else ""
|
||||||
|
|
||||||
# Name
|
# Name
|
||||||
module_view.name = str(module.name) if hasattr(module, "name") else ""
|
module_view.name = str(module.name) if hasattr(module, "name") else ""
|
||||||
|
|
||||||
|
@ -191,17 +236,23 @@ def findCourseModules(course, course_view):
|
||||||
for module_item in module_items:
|
for module_item in module_items:
|
||||||
module_item_view = moduleItemView()
|
module_item_view = moduleItemView()
|
||||||
|
|
||||||
|
# ID
|
||||||
|
module_item_view.id = module_item.id if hasattr(module_item, "id") else 0
|
||||||
|
|
||||||
# Title
|
# Title
|
||||||
module_item_view.title = str(module_item.title) if hasattr(module_item, "title") else ""
|
module_item_view.title = str(module_item.title) if hasattr(module_item, "title") else ""
|
||||||
|
|
||||||
# Type
|
# Type
|
||||||
module_item_view.content_type = str(module_item.type) if hasattr(module_item, "type") else ""
|
module_item_view.content_type = str(module_item.type) if hasattr(module_item, "type") else ""
|
||||||
|
|
||||||
|
# URL
|
||||||
|
module_item_view.url = str(module_item.html_url) if hasattr(module_item, "html_url") else ""
|
||||||
# External URL
|
# External URL
|
||||||
module_item_view.external_url = str(module_item.external_url) if hasattr(module_item, "external_url") else ""
|
module_item_view.external_url = str(module_item.external_url) if hasattr(module_item, "external_url") else ""
|
||||||
|
|
||||||
if module_item_view.content_type == "File":
|
if module_item_view.content_type == "File":
|
||||||
module_dir = modules_dir + "/" + makeValidFilename(str(module.name))
|
# If problems arise due to long pathnames, changing module.name to module.id might help
|
||||||
|
# A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path)
|
||||||
|
module_dir = os.path.join(modules_dir, makeValidFilename(str(module.name)), "files")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Create directory for current module if not present
|
# Create directory for current module if not present
|
||||||
|
@ -212,7 +263,7 @@ def findCourseModules(course, course_view):
|
||||||
module_file = course.get_file(str(module_item.content_id))
|
module_file = course.get_file(str(module_item.content_id))
|
||||||
|
|
||||||
# Create path for module file download
|
# Create path for module file download
|
||||||
module_file_path = module_dir + "/" + makeValidFilename(str(module_file.display_name))
|
module_file_path = os.path.join(module_dir, makeValidFilename(str(module_file.display_name)))
|
||||||
|
|
||||||
# Download file if it doesn't already exist
|
# Download file if it doesn't already exist
|
||||||
if not os.path.exists(module_file_path):
|
if not os.path.exists(module_file_path):
|
||||||
|
@ -250,13 +301,12 @@ def downloadCourseFiles(course, course_view):
|
||||||
for file in files:
|
for file in files:
|
||||||
file_folder=course.get_folder(file.folder_id)
|
file_folder=course.get_folder(file.folder_id)
|
||||||
|
|
||||||
folder_dl_dir=os.path.join(dl_dir,makeValidFolderPath(file_folder.full_name))
|
folder_dl_dir=os.path.join(dl_dir, makeValidFolderPath(file_folder.full_name))
|
||||||
|
|
||||||
if not os.path.exists(folder_dl_dir):
|
if not os.path.exists(folder_dl_dir):
|
||||||
os.makedirs(folder_dl_dir)
|
os.makedirs(folder_dl_dir)
|
||||||
|
|
||||||
dl_path = os.path.join(folder_dl_dir,
|
dl_path = os.path.join(folder_dl_dir, makeValidFilename(str(file.display_name)))
|
||||||
makeValidFilename(str(file.display_name)))
|
|
||||||
|
|
||||||
# Download file if it doesn't already exist
|
# Download file if it doesn't already exist
|
||||||
if not os.path.exists(dl_path):
|
if not os.path.exists(dl_path):
|
||||||
|
@ -324,22 +374,19 @@ def findCoursePages(course):
|
||||||
|
|
||||||
page_view = pageView()
|
page_view = pageView()
|
||||||
|
|
||||||
|
# ID
|
||||||
|
page_view.id = page.id if hasattr(page, "id") else 0
|
||||||
|
|
||||||
# Title
|
# Title
|
||||||
page_view.title = str(page.title) if hasattr(page, "title") else ""
|
page_view.title = str(page.title) if hasattr(page, "title") else ""
|
||||||
# Body
|
# Body
|
||||||
page_view.body = str(page.body) if hasattr(page, "body") else ""
|
page_view.body = str(page.body) if hasattr(page, "body") else ""
|
||||||
# Date created
|
# Date created
|
||||||
if hasattr(page, "created_at"):
|
page_view.created_date = dateutil.parser.parse(page.created_at).strftime(DATE_TEMPLATE) if \
|
||||||
page_view.created_date = dateutil.parser.parse(
|
hasattr(page, "created_at") else ""
|
||||||
page.created_at).strftime(DATE_TEMPLATE)
|
|
||||||
else:
|
|
||||||
page_view.created_date = ""
|
|
||||||
# Date last updated
|
# Date last updated
|
||||||
if hasattr(page, "updated_at"):
|
page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime(DATE_TEMPLATE) if \
|
||||||
page_view.last_updated_date = dateutil.parser.parse(
|
hasattr(page, "updated_at") else ""
|
||||||
page.updated_at).strftime(DATE_TEMPLATE)
|
|
||||||
else:
|
|
||||||
page_view.last_updated_date = ""
|
|
||||||
|
|
||||||
page_views.append(page_view)
|
page_views.append(page_view)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -360,26 +407,33 @@ def findCourseAssignments(course):
|
||||||
# Create a new assignment view
|
# Create a new assignment view
|
||||||
assignment_view = assignmentView()
|
assignment_view = assignmentView()
|
||||||
|
|
||||||
|
#ID
|
||||||
|
assignment_view.id = assignment.id if \
|
||||||
|
hasattr(assignment, "id") else ""
|
||||||
|
|
||||||
# Title
|
# Title
|
||||||
if hasattr(assignment, "name"):
|
assignment_view.title = makeValidFilename(str(assignment.name)) if \
|
||||||
assignment_view.title = makeValidFilename(str(assignment.name))
|
hasattr(assignment, "name") else ""
|
||||||
else:
|
|
||||||
assignment_view.title = ""
|
|
||||||
# Description
|
# Description
|
||||||
if hasattr(assignment, "description"):
|
assignment_view.description = str(assignment.description) if \
|
||||||
assignment_view.description = str(assignment.description)
|
hasattr(assignment, "description") else ""
|
||||||
else:
|
|
||||||
assignment_view.description = ""
|
|
||||||
# Assigned date
|
# Assigned date
|
||||||
if hasattr(assignment, "created_at_date"):
|
assignment_view.assigned_date = assignment.created_at_date.strftime(DATE_TEMPLATE) if \
|
||||||
assignment_view.assigned_date = assignment.created_at_date.strftime(DATE_TEMPLATE)
|
hasattr(assignment, "created_at_date") else ""
|
||||||
else:
|
|
||||||
assignment_view.assigned_date = ""
|
|
||||||
# Due date
|
# Due date
|
||||||
if hasattr(assignment, "due_at_date"):
|
assignment_view.due_date = assignment.due_at_date.strftime(DATE_TEMPLATE) if \
|
||||||
assignment_view.due_date = assignment.due_at_date.strftime(DATE_TEMPLATE)
|
hasattr(assignment, "due_at_date") else ""
|
||||||
else:
|
|
||||||
assignment_view.due_date = ""
|
# HTML Url
|
||||||
|
assignment_view.html_url = assignment.html_url if \
|
||||||
|
hasattr(assignment, "html_url") else ""
|
||||||
|
# External URL
|
||||||
|
assignment_view.ext_url = str(assignment.url) if \
|
||||||
|
hasattr(assignment, "url") else ""
|
||||||
|
# Other URL (more up-to-date)
|
||||||
|
assignment_view.updated_url = str(assignment.submissions_download_url).split("submissions?")[0] if \
|
||||||
|
hasattr(assignment, "submissions_download_url") else ""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
try: # Download all submissions for entire class
|
try: # Download all submissions for entire class
|
||||||
|
@ -401,31 +455,35 @@ def findCourseAssignments(course):
|
||||||
|
|
||||||
sub_view = submissionView()
|
sub_view = submissionView()
|
||||||
|
|
||||||
|
# Submission ID
|
||||||
|
sub_view.id = submission.id if \
|
||||||
|
hasattr(submission, "id") else 0
|
||||||
|
|
||||||
# My grade
|
# My grade
|
||||||
if hasattr(submission, "grade"):
|
sub_view.grade = str(submission.grade) if \
|
||||||
sub_view.grade = str(submission.grade)
|
hasattr(submission, "grade") else ""
|
||||||
else:
|
|
||||||
sub_view.grade = ""
|
|
||||||
# My raw score
|
# My raw score
|
||||||
if hasattr(submission, "score"):
|
sub_view.raw_score = str(submission.score) if \
|
||||||
sub_view.raw_score = str(submission.score)
|
hasattr(submission, "score") else ""
|
||||||
else:
|
|
||||||
sub_view.raw_score = ""
|
|
||||||
# Total possible score
|
# Total possible score
|
||||||
if hasattr(assignment, "points_possible"):
|
sub_view.total_possible_points = str(assignment.points_possible) if \
|
||||||
sub_view.total_possible_points = str(assignment.points_possible)
|
hasattr(assignment, "points_possible") else ""
|
||||||
else:
|
|
||||||
sub_view.total_possible_points = ""
|
|
||||||
# Submission comments
|
# Submission comments
|
||||||
if hasattr(submission, "submission_comments"):
|
sub_view.submission_comments = str(submission.submission_comments) if \
|
||||||
sub_view.submission_comments = str(submission.submission_comments)
|
hasattr(submission, "submission_comments") else ""
|
||||||
else:
|
# Attempt
|
||||||
sub_view.submission_comments = ""
|
sub_view.attempt = submission.attempt if \
|
||||||
|
hasattr(submission, "attempt") else 0
|
||||||
if hasattr(submission, "user_id"):
|
# User ID
|
||||||
sub_view.user_id = str(submission.user_id)
|
sub_view.user_id = str(submission.user_id) if \
|
||||||
else:
|
hasattr(submission, "user_id") else ""
|
||||||
sub_view.user_id = "no-id"
|
|
||||||
|
# Submission URL
|
||||||
|
sub_view.preview_url = str(submission.preview_url) if \
|
||||||
|
hasattr(submission, "preview_url") else ""
|
||||||
|
# External URL
|
||||||
|
sub_view.ext_url = str(submission.url) if \
|
||||||
|
hasattr(submission, "url") else ""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
submission.attachments
|
submission.attachments
|
||||||
|
@ -472,6 +530,9 @@ def getDiscussionView(discussion_topic):
|
||||||
# Create discussion view
|
# Create discussion view
|
||||||
discussion_view = discussionView()
|
discussion_view = discussionView()
|
||||||
|
|
||||||
|
#ID
|
||||||
|
discussion_view.id = discussion_topic.id if hasattr(discussion_topic, "id") else 0
|
||||||
|
|
||||||
# Title
|
# Title
|
||||||
discussion_view.title = str(discussion_topic.title) if hasattr(discussion_topic, "title") else ""
|
discussion_view.title = str(discussion_topic.title) if hasattr(discussion_topic, "title") else ""
|
||||||
# Author
|
# Author
|
||||||
|
@ -480,6 +541,13 @@ def getDiscussionView(discussion_topic):
|
||||||
discussion_view.posted_date = discussion_topic.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(discussion_topic, "created_at_date") else ""
|
discussion_view.posted_date = discussion_topic.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(discussion_topic, "created_at_date") else ""
|
||||||
# Body
|
# Body
|
||||||
discussion_view.body = str(discussion_topic.message) if hasattr(discussion_topic, "message") else ""
|
discussion_view.body = str(discussion_topic.message) if hasattr(discussion_topic, "message") else ""
|
||||||
|
|
||||||
|
# URL
|
||||||
|
discussion_view.url = str(discussion_topic.html_url) if hasattr(discussion_topic, "html_url") else ""
|
||||||
|
|
||||||
|
# Keeps track of how many topic_entries there are.
|
||||||
|
topic_entries_counter = 0
|
||||||
|
|
||||||
# Topic entries
|
# Topic entries
|
||||||
if hasattr(discussion_topic, "discussion_subentry_count") and discussion_topic.discussion_subentry_count > 0:
|
if hasattr(discussion_topic, "discussion_subentry_count") and discussion_topic.discussion_subentry_count > 0:
|
||||||
# Need to get replies to entries recursively?
|
# Need to get replies to entries recursively?
|
||||||
|
@ -488,9 +556,13 @@ def getDiscussionView(discussion_topic):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for topic_entry in discussion_topic_entries:
|
for topic_entry in discussion_topic_entries:
|
||||||
|
topic_entries_counter += 1
|
||||||
|
|
||||||
# Create new discussion view for the topic_entry
|
# Create new discussion view for the topic_entry
|
||||||
topic_entry_view = topicEntryView()
|
topic_entry_view = topicEntryView()
|
||||||
|
|
||||||
|
# ID
|
||||||
|
topic_entry_view.id = topic_entry.id if hasattr(topic_entry, "id") else 0
|
||||||
# Author
|
# Author
|
||||||
topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else ""
|
topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else ""
|
||||||
# Posted date
|
# Posted date
|
||||||
|
@ -505,6 +577,9 @@ def getDiscussionView(discussion_topic):
|
||||||
for topic_reply in topic_entry_replies:
|
for topic_reply in topic_entry_replies:
|
||||||
# Create new topic reply view
|
# Create new topic reply view
|
||||||
topic_reply_view = topicReplyView()
|
topic_reply_view = topicReplyView()
|
||||||
|
|
||||||
|
# ID
|
||||||
|
topic_reply_view.id = topic_reply.id if hasattr(topic_reply, "id") else 0
|
||||||
|
|
||||||
# Author
|
# Author
|
||||||
topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else ""
|
topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else ""
|
||||||
|
@ -522,7 +597,10 @@ def getDiscussionView(discussion_topic):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Tried to enumerate discussion topic entries but received the following error:")
|
print("Tried to enumerate discussion topic entries but received the following error:")
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
|
# Amount of pages
|
||||||
|
discussion_view.amount_pages = int(topic_entries_counter/50) + 1 # Typically 50 topic entries are stored on a page before it creates another page.
|
||||||
|
|
||||||
return discussion_view
|
return discussion_view
|
||||||
|
|
||||||
|
|
||||||
|
@ -547,6 +625,9 @@ def findCourseDiscussions(course):
|
||||||
def getCourseView(course):
|
def getCourseView(course):
|
||||||
course_view = courseView()
|
course_view = courseView()
|
||||||
|
|
||||||
|
# Course ID
|
||||||
|
course_view.course_id = course.id if hasattr(course, "id") else 0
|
||||||
|
|
||||||
# Course term
|
# Course term
|
||||||
course_view.term = makeValidFilename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "")
|
course_view.term = makeValidFilename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "")
|
||||||
|
|
||||||
|
@ -593,6 +674,211 @@ def exportAllCourseData(course_view):
|
||||||
with open(course_output_path, "w") as out_file:
|
with open(course_output_path, "w") as out_file:
|
||||||
out_file.write(json_str)
|
out_file.write(json_str)
|
||||||
|
|
||||||
|
def downloadCourseHTML(api_url, cookies_path):
|
||||||
|
if(cookies_path == ""):
|
||||||
|
return
|
||||||
|
|
||||||
|
course_dir = DL_LOCATION
|
||||||
|
|
||||||
|
if not os.path.exists(course_dir):
|
||||||
|
os.makedirs(course_dir)
|
||||||
|
|
||||||
|
course_list_path = os.path.join(course_dir, "course_list.html")
|
||||||
|
|
||||||
|
# Downloads the course list.
|
||||||
|
if not os.path.exists(course_list_path):
|
||||||
|
download_page(api_url + "/courses/", cookies_path, course_dir, "course_list.html")
|
||||||
|
|
||||||
|
def downloadCourseHomePageHTML(api_url, course_view, cookies_path):
|
||||||
|
if(cookies_path == ""):
|
||||||
|
return
|
||||||
|
|
||||||
|
dl_dir = os.path.join(DL_LOCATION, course_view.term,
|
||||||
|
course_view.course_code)
|
||||||
|
|
||||||
|
# Create directory if not present
|
||||||
|
if not os.path.exists(dl_dir):
|
||||||
|
os.makedirs(dl_dir)
|
||||||
|
|
||||||
|
homepage_path = os.path.join(dl_dir, "homepage.html")
|
||||||
|
|
||||||
|
# Downloads the course home page.
|
||||||
|
if not os.path.exists(homepage_path):
|
||||||
|
download_page(api_url + "/courses/" + str(course_view.course_id), cookies_path, dl_dir, "homepage.html")
|
||||||
|
|
||||||
|
def downloadAssignmentPages(api_url, course_view, cookies_path):
|
||||||
|
if(cookies_path == "" or len(course_view.assignments) == 0):
|
||||||
|
return
|
||||||
|
|
||||||
|
base_assign_dir = os.path.join(DL_LOCATION, course_view.term,
|
||||||
|
course_view.course_code, "assignments")
|
||||||
|
|
||||||
|
# Create directory if not present
|
||||||
|
if not os.path.exists(base_assign_dir):
|
||||||
|
os.makedirs(base_assign_dir)
|
||||||
|
|
||||||
|
assignment_list_path = os.path.join(base_assign_dir, "assignment_list.html")
|
||||||
|
|
||||||
|
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
|
||||||
|
if not os.path.exists(assignment_list_path):
|
||||||
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html")
|
||||||
|
|
||||||
|
for assignment in course_view.assignments:
|
||||||
|
assign_dir = os.path.join(base_assign_dir, makeValidFilename(assignment.title))
|
||||||
|
|
||||||
|
# Download an html image of each assignment (includes assignment instructions and other stuff).
|
||||||
|
# Currently, this will only download the main assignment page and not external pages, this is
|
||||||
|
# because these external pages are given in a json format. Saving these would require a lot
|
||||||
|
# more work then normal.
|
||||||
|
if assignment.html_url != "":
|
||||||
|
if not os.path.exists(assign_dir):
|
||||||
|
os.makedirs(assign_dir)
|
||||||
|
|
||||||
|
assignment_page_path = os.path.join(assign_dir, "assignment.html")
|
||||||
|
|
||||||
|
# Download assignment page, this usually has instructions and etc.
|
||||||
|
if not os.path.exists(assignment_page_path):
|
||||||
|
download_page(assignment.html_url, cookies_path, assign_dir, "assignment.html")
|
||||||
|
|
||||||
|
for submission in assignment.submissions:
|
||||||
|
submission_dir = assign_dir
|
||||||
|
|
||||||
|
# If theres more then 1 submission, add unique id to download dir
|
||||||
|
if len(assignment.submissions) != 1:
|
||||||
|
submission_dir = os.path.join(assign_dir, str(submission.user_id))
|
||||||
|
|
||||||
|
if submission.preview_url != "":
|
||||||
|
if not os.path.exists(submission_dir):
|
||||||
|
os.makedirs(submission_dir)
|
||||||
|
|
||||||
|
submission_page_dir = os.path.join(submission_dir, "submission.html")
|
||||||
|
|
||||||
|
# Download submission url, this is typically a more focused page
|
||||||
|
if not os.path.exists(submission_page_dir):
|
||||||
|
download_page(submission.preview_url, cookies_path, submission_dir, "submission.html")
|
||||||
|
|
||||||
|
# If theres more then 1 attempt, save each attempt in attempts folder
|
||||||
|
if (submission.attempt != 1 and assignment.updated_url != "" and assignment.html_url != ""
|
||||||
|
and assignment.html_url.rstrip("/") != assignment.updated_url.rstrip("/")):
|
||||||
|
submission_dir = os.path.join(assign_dir, "attempts")
|
||||||
|
|
||||||
|
if not os.path.exists(submission_dir):
|
||||||
|
os.makedirs(submission_dir)
|
||||||
|
|
||||||
|
# Saves the attempts if multiple were taken, doesn't account for
|
||||||
|
# different ID's however, as I wasnt able to find out what the url
|
||||||
|
# for the specific id's attempts would be.
|
||||||
|
for i in range(submission.attempt):
|
||||||
|
filename = "attempt_" + str(i+1) + ".html"
|
||||||
|
submission_page_attempt_dir = os.path.join(submission_dir, filename)
|
||||||
|
|
||||||
|
if not os.path.exists(submission_page_attempt_dir):
|
||||||
|
download_page(assignment.updated_url + "/history?version=" + str(i+1), cookies_path, submission_dir, filename)
|
||||||
|
|
||||||
|
def downloadCourseModulePages(api_url, course_view, cookies_path):
|
||||||
|
if(cookies_path == "" or len(course_view.modules) == 0):
|
||||||
|
return
|
||||||
|
|
||||||
|
modules_dir = os.path.join(DL_LOCATION, course_view.term,
|
||||||
|
course_view.course_code, "modules")
|
||||||
|
|
||||||
|
# Create modules directory if not present
|
||||||
|
if not os.path.exists(modules_dir):
|
||||||
|
os.makedirs(modules_dir)
|
||||||
|
|
||||||
|
module_list_dir = os.path.join(modules_dir, "modules_list.html")
|
||||||
|
|
||||||
|
# Downloads the modules page (possible this is disabled by the teacher)
|
||||||
|
if not os.path.exists(module_list_dir):
|
||||||
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/modules/", COOKIES_PATH, modules_dir, "modules_list.html")
|
||||||
|
|
||||||
|
for module in course_view.modules:
|
||||||
|
for item in module.items:
|
||||||
|
# If problems arise due to long pathnames, changing module.name to module.id might help, this can also be done with item.title
|
||||||
|
# A change would also have to be made in findCourseModules(course, course_view)
|
||||||
|
items_dir = os.path.join(modules_dir, makeValidFilename(str(module.name)))
|
||||||
|
|
||||||
|
# Create modules directory if not present
|
||||||
|
if item.url != "":
|
||||||
|
if not os.path.exists(items_dir):
|
||||||
|
os.makedirs(items_dir)
|
||||||
|
|
||||||
|
filename = makeValidFilename(str(item.title)) + ".html"
|
||||||
|
module_item_dir = os.path.join(items_dir, filename)
|
||||||
|
|
||||||
|
# Download the module page.
|
||||||
|
if not os.path.exists(module_item_dir):
|
||||||
|
download_page(item.url, cookies_path, items_dir, filename)
|
||||||
|
|
||||||
|
def downloadCourseAnnouncementPages(api_url, course_view, cookies_path):
|
||||||
|
if(cookies_path == "" or len(course_view.announcements) == 0):
|
||||||
|
return
|
||||||
|
|
||||||
|
base_announce_dir = os.path.join(DL_LOCATION, course_view.term,
|
||||||
|
course_view.course_code, "announcements")
|
||||||
|
|
||||||
|
# Create directory if not present
|
||||||
|
if not os.path.exists(base_announce_dir):
|
||||||
|
os.makedirs(base_announce_dir)
|
||||||
|
|
||||||
|
announcement_list_dir = os.path.join(base_announce_dir, "announcement_list.html")
|
||||||
|
|
||||||
|
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
|
||||||
|
if not os.path.exists(announcement_list_dir):
|
||||||
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/announcements/", cookies_path, base_announce_dir, "announcement_list.html")
|
||||||
|
|
||||||
|
for announcements in course_view.announcements:
|
||||||
|
announce_dir = os.path.join(base_announce_dir, makeValidFilename(announcements.title))
|
||||||
|
|
||||||
|
if announcements.url == "":
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not os.path.exists(announce_dir):
|
||||||
|
os.makedirs(announce_dir)
|
||||||
|
|
||||||
|
# Downloads each page that a discussion takes.
|
||||||
|
for i in range(announcements.amount_pages):
|
||||||
|
filename = "announcement_" + str(i+1) + ".html"
|
||||||
|
announcement_page_dir = os.path.join(announce_dir, filename)
|
||||||
|
|
||||||
|
# Download assignment page, this usually has instructions and etc.
|
||||||
|
if not os.path.exists(announcement_page_dir):
|
||||||
|
download_page(announcements.url + "/page-" + str(i+1), cookies_path, announce_dir, filename)
|
||||||
|
|
||||||
|
def downloadCourseDicussionPages(api_url, course_view, cookies_path):
|
||||||
|
if(cookies_path == "" or len(course_view.discussions) == 0):
|
||||||
|
return
|
||||||
|
|
||||||
|
base_discussion_dir = os.path.join(DL_LOCATION, course_view.term,
|
||||||
|
course_view.course_code, "discussions")
|
||||||
|
|
||||||
|
# Create directory if not present
|
||||||
|
if not os.path.exists(base_discussion_dir):
|
||||||
|
os.makedirs(base_discussion_dir)
|
||||||
|
|
||||||
|
dicussion_list_dir = os.path.join(base_discussion_dir, "discussion_list.html")
|
||||||
|
|
||||||
|
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
|
||||||
|
if not os.path.exists(dicussion_list_dir):
|
||||||
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html")
|
||||||
|
|
||||||
|
for discussion in course_view.discussions:
|
||||||
|
dicussion_dir = os.path.join(base_discussion_dir, makeValidFilename(discussion.title))
|
||||||
|
|
||||||
|
if discussion.url == "":
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not os.path.exists(dicussion_dir):
|
||||||
|
os.makedirs(dicussion_dir)
|
||||||
|
|
||||||
|
# Downloads each page that a discussion takes.
|
||||||
|
for i in range(discussion.amount_pages):
|
||||||
|
filename = "dicussion_" + str(i+1) + ".html"
|
||||||
|
dicussion_page_dir = os.path.join(dicussion_dir, filename)
|
||||||
|
|
||||||
|
# Download assignment page, this usually has instructions and etc.
|
||||||
|
if not os.path.exists(dicussion_page_dir):
|
||||||
|
download_page(discussion.url + "/page-" + str(i+1), cookies_path, dicussion_dir, filename)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
@ -616,6 +902,14 @@ if __name__ == "__main__":
|
||||||
"logging in to canvas and then going to this URL in the same "
|
"logging in to canvas and then going to this URL in the same "
|
||||||
"browser {yourCanvasBaseUrl}/api/v1/users/self")
|
"browser {yourCanvasBaseUrl}/api/v1/users/self")
|
||||||
USER_ID = input("Enter your Canvas User ID: ")
|
USER_ID = input("Enter your Canvas User ID: ")
|
||||||
|
|
||||||
|
if COOKIES_PATH == "":
|
||||||
|
# Cookies path
|
||||||
|
print("\nWe will need your browsers cookies file. This needs to be "
|
||||||
|
"exported using another tool. This needs to be a path to a file "
|
||||||
|
"formatted in the NetScape format. This can be left blank if an html "
|
||||||
|
"images aren't wanted. ")
|
||||||
|
COOKIES_PATH = input("Enter your cookies path: ")
|
||||||
|
|
||||||
print("\nConnecting to canvas\n")
|
print("\nConnecting to canvas\n")
|
||||||
|
|
||||||
|
@ -634,8 +928,13 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
skip = set(COURSES_TO_SKIP)
|
skip = set(COURSES_TO_SKIP)
|
||||||
|
|
||||||
|
|
||||||
|
if (COOKIES_PATH):
|
||||||
|
print(" Downloading course list page")
|
||||||
|
downloadCourseHTML(API_URL, COOKIES_PATH)
|
||||||
|
|
||||||
for course in courses:
|
for course in courses:
|
||||||
if course.id in skip:
|
if course.id in skip or not hasattr(course, "name") or not hasattr(course, "term"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
course_view = getCourseView(course)
|
course_view = getCourseView(course)
|
||||||
|
@ -651,6 +950,22 @@ if __name__ == "__main__":
|
||||||
print(" Getting modules and downloading module files")
|
print(" Getting modules and downloading module files")
|
||||||
course_view.modules = findCourseModules(course, course_view)
|
course_view.modules = findCourseModules(course, course_view)
|
||||||
|
|
||||||
|
if(COOKIES_PATH):
|
||||||
|
print(" Downloading course home page")
|
||||||
|
downloadCourseHomePageHTML(API_URL, course_view, COOKIES_PATH)
|
||||||
|
|
||||||
|
print(" Downloading assignment pages")
|
||||||
|
downloadAssignmentPages(API_URL, course_view, COOKIES_PATH)
|
||||||
|
|
||||||
|
print(" Downloading course module pages")
|
||||||
|
downloadCourseModulePages(API_URL, course_view, COOKIES_PATH)
|
||||||
|
|
||||||
|
print(" Downloading course announcements pages")
|
||||||
|
downloadCourseAnnouncementPages(API_URL, course_view, COOKIES_PATH)
|
||||||
|
|
||||||
|
print(" Downloading course dicussion pages")
|
||||||
|
downloadCourseDicussionPages(API_URL, course_view, COOKIES_PATH)
|
||||||
|
|
||||||
print(" Exporting all course data")
|
print(" Exporting all course data")
|
||||||
exportAllCourseData(course_view)
|
exportAllCourseData(course_view)
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,5 @@
|
||||||
|
{
|
||||||
|
"dependencies": {
|
||||||
|
"single-file": "github:gildas-lormeau/SingleFile"
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
from subprocess import run
|
||||||
|
|
||||||
|
SINGLEFILE_BINARY_PATH = ".\\node_modules\\single-file\\cli\\single-file"
|
||||||
|
#CHROME_PATH = "C:\\Program Files\\Google\Chrome\\Application\\chrome.exe" #Uncomment this and set your browser exe if it can't find yours.
|
||||||
|
|
||||||
|
def addQuotes(str):
|
||||||
|
return "\"" + str.strip("\"") + "\""
|
||||||
|
|
||||||
|
def download_page(url, cookies_path, output_path, output_name_template = ""):
|
||||||
|
args = [
|
||||||
|
addQuotes(SINGLEFILE_BINARY_PATH),
|
||||||
|
#"--browser-executeable-path=" + addQuotes(CHROME_PATH.strip("\"")), #Uncomment this and set your browser exe if it can't find yours.
|
||||||
|
"--browser-cookies-file=" + addQuotes(cookies_path),
|
||||||
|
"--output-directory=" + addQuotes(output_path),
|
||||||
|
url
|
||||||
|
]
|
||||||
|
|
||||||
|
if(output_name_template != ""):
|
||||||
|
args.append("--filename-template=" + addQuotes(output_name_template))
|
||||||
|
|
||||||
|
try:
|
||||||
|
run("node " + " ".join(args))
|
||||||
|
except Exception as e:
|
||||||
|
print("Was not able to save the URL " + url + " using singlefile. The reported error was " +
|
||||||
|
e.strerror)
|
||||||
|
|
||||||
|
#if __name__ == "__main__":
|
||||||
|
#download_page("https://www.google.com/", "", ".\\output\\test", "test.html")
|
Loading…
Reference in New Issue