update my stuff

2023-10-27 16:24:52 -06:00 · 2023-10-27 16:24:52 -06:00 · 9a67feefab
parent 941f675eae
commit 9a67feefab
17 changed files with 2393 additions and 1824 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,7 +1,295 @@
-.vscode
-__pycache__/
-node_modules/
-output/
-
 credentials.yaml
-cookies.txt
+cookies*.txt
+
+# ---> Python
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+.pnpm-debug.log*
+
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+
+# nyc test coverage
+.nyc_output
+
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# Bower dependency directory (https://bower.io/)
+bower_components
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# Snowpack dependency directory (https://snowpack.dev/)
+web_modules/
+
+# TypeScript cache
+*.tsbuildinfo
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional stylelint cache
+.stylelintcache
+
+# Microbundle cache
+.rpt2_cache/
+.rts2_cache_cjs/
+.rts2_cache_es/
+.rts2_cache_umd/
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variable files
+.env
+.env.development.local
+.env.test.local
+.env.production.local
+.env.local
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+.parcel-cache
+
+# Next.js build output
+.next
+out
+
+# Nuxt.js build / generate output
+.nuxt
+dist
+
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+
+# vuepress build output
+.vuepress/dist
+
+# vuepress v2.x temp and cache directory
+.temp
+.cache
+
+# Docusaurus cache and generated files
+.docusaurus
+
+# Serverless directories
+.serverless/
+
+# FuseBox cache
+.fusebox/
+
+# DynamoDB Local files
+.dynamodb/
+
+# TernJS port file
+.tern-port
+
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+
+# yarn v2
+.yarn/cache
+.yarn/unplugged
+.yarn/build-state.yml
+.yarn/install-state.gz
+.pnp.*
--- a/docs/LICENSE
+++ b/docs/LICENSE
--- a/docs/README.md
+++ b/docs/README.md
--- a/export.py
+++ b/export.py
--- a/package-lock.json
+++ b/package-lock.json
--- a/requirements.txt
+++ b/requirements.txt
@ -3,3 +3,5 @@ jsonpickle
 canvasapi
 python-dateutil
 PyYAML
+tqdm
+bs4
--- a/singlefile.py
+++ b/singlefile.py
@ -1,27 +0,0 @@
-from subprocess import run
-
-SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file"
-CHROME_PATH = "C:/Program Files/Google\ Chrome/Application/chrome.exe" #Uncomment this and set your browser exe if it can't find yours.
-
-def addQuotes(str):
-    return "\"" + str.strip("\"") + "\""
-
-def download_page(url, cookies_path, output_path, output_name_template = ""):
-    args = [
-        addQuotes(SINGLEFILE_BINARY_PATH),
-        #"--browser-executable-path=" + addQuotes(CHROME_PATH.strip("\"")), #Uncomment this and set your browser exe if it can't find yours.
-        "--browser-cookies-file=" + addQuotes(cookies_path),
-        "--output-directory=" + addQuotes(output_path),
-        addQuotes(url)
-        ]
-
-    if(output_name_template != ""):
-        args.append("--filename-template=" + addQuotes(output_name_template))
-
-    try:
-        run("node " + " ".join(args), shell=True)
-    except Exception as e:
-        print("Was not able to save the URL " + url + " using singlefile. The reported error was " + e.strerror)
-
-#if __name__ == "__main__":
-    #download_page("https://www.google.com/", "", "./output/test", "test.html")
--- a/stuff/init.py
+++ b/stuff/init.py
--- a/stuff/const.py
+++ b/stuff/const.py
@ -0,0 +1,12 @@
+# Directory in which to download course information to (will be created if not
+# present)
+DL_LOCATION = "./output"
+# List of Course IDs that should be skipped (need to be integers)
+COURSES_TO_SKIP = [288290, 512033]
+
+DATE_TEMPLATE = "%B %d, %Y %I:%M %p"
+
+# Max PATH length is 260 characters on Windows. 70 is just an estimate for a reasonable max folder name to prevent the chance of reaching the limit
+# Applies to modules, assignments, announcements, and discussions
+# If a folder exceeds this limit, a "-" will be added to the end to indicate it was shortened ("..." not valid)
+MAX_FOLDER_NAME_SIZE = 70
--- a/stuff/download.py
+++ b/stuff/download.py
@ -0,0 +1,21 @@
+from http.cookiejar import MozillaCookieJar
+
+import requests
+
+
+def download_file(url, output, cookie_jar: MozillaCookieJar):
+    s = requests.Session()
+    for cookie in cookie_jar:
+        s.cookies.set(cookie.name, cookie.value)
+
+    local_filename = output
+    # NOTE the stream=True parameter below
+    with s.get(url, stream=True) as r:
+        r.raise_for_status()
+        with open(local_filename, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                # If you have chunk encoded response uncomment if
+                # and set chunk_size parameter to None.
+                # if chunk:
+                f.write(chunk)
+    return local_filename
--- a/stuff/download_canvas.py
+++ b/stuff/download_canvas.py
@ -0,0 +1,300 @@
+import os
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from functools import partial
+from http.cookiejar import MozillaCookieJar
+
+import requests
+from tqdm import tqdm
+
+from stuff.singlefile import download_page
+from stuff.const import DL_LOCATION, MAX_FOLDER_NAME_SIZE
+from stuff.helpers import make_valid_filename, make_valid_folder_path, shorten_file_name
+from stuff.threading import download_assignment, download_module_item
+
+
+def download_course_files(course, course_view):
+    # file full_name starts with "course files"
+    dl_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name)
+
+    # Create directory if not present
+    if not os.path.exists(dl_dir):
+        os.makedirs(dl_dir)
+
+    try:
+        files = list(course.get_files())
+
+        for file in tqdm(files, desc='Downloading Files'):
+            file_folder = course.get_folder(file.folder_id)
+
+            folder_dl_dir = os.path.join(dl_dir, make_valid_folder_path(file_folder.full_name))
+
+            if not os.path.exists(folder_dl_dir):
+                os.makedirs(folder_dl_dir)
+
+            dl_path = os.path.join(folder_dl_dir, make_valid_filename(str(file.display_name)))
+
+            # Download file if it doesn't already exist
+            if not os.path.exists(dl_path):
+                print('Downloading: {}'.format(dl_path))
+                file.download(dl_path)
+    except Exception as e:
+        tqdm.write(f"Skipping file download that gave the following error: {e}")
+
+
+def download_course_discussion_pages(api_url, course_view, cookies_path):
+    if cookies_path == "" or len(course_view.discussions) == 0:
+        return
+
+    base_discussion_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "discussions")
+    if not os.path.exists(base_discussion_dir):
+        os.makedirs(base_discussion_dir)
+
+    discussion_list_dir = os.path.join(base_discussion_dir, "discussion_list.html")
+
+    # Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
+    if not os.path.exists(discussion_list_dir):
+        download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html")
+
+    for discussion in tqdm(list(course_view.discussions), desc='Downloading Discussion Pages'):
+        discussion_title = make_valid_filename(str(discussion.title))
+        discussion_title = shorten_file_name(discussion_title, len(discussion_title) - MAX_FOLDER_NAME_SIZE)
+        discussion_dir = os.path.join(base_discussion_dir, discussion_title)
+
+        if discussion.url == "":
+            continue
+
+        if not os.path.exists(discussion_dir):
+            os.makedirs(discussion_dir)
+
+        # Downloads each page that a discussion takes.
+        for i in range(discussion.amount_pages):
+            filename = "discussion_" + str(i + 1) + ".html"
+            discussion_page_dir = os.path.join(discussion_dir, filename)
+
+            # Download assignment page, this usually has instructions and etc.
+            if not os.path.exists(discussion_page_dir):
+                download_page(discussion.url + "/page-" + str(i + 1), cookies_path, discussion_dir, filename)
+
+
+def download_assignment_pages(api_url, course_view, cookies_path, cookie_jar: MozillaCookieJar):
+    if cookies_path == "" or len(course_view.assignments) == 0:
+        return
+
+    base_assign_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "assignments")
+    if not os.path.exists(base_assign_dir):
+        os.makedirs(base_assign_dir)
+
+    assignment_list_path = os.path.join(base_assign_dir, "assignment_list.html")
+
+    # Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
+    if not os.path.exists(assignment_list_path):
+        download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html")
+
+    # for assignment in tqdm(course_view.assignments, desc='Downloading Assignments'):
+    #     assignment_title = make_valid_filename(str(assignment.title))
+    #     assignment_title = shorten_file_name(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
+    #     assign_dir = os.path.join(base_assign_dir, assignment_title)
+    #
+    #     # Download an html image of each assignment (includes assignment instructions and other stuff).
+    #     # Currently, this will only download the main assignment page and not external pages, this is
+    #     # because these external pages are given in a json format. Saving these would require a lot
+    #     # more work then normal.
+    #     if assignment.html_url != "":
+    #         if not os.path.exists(assign_dir):
+    #             os.makedirs(assign_dir)
+    #
+    #         assignment_page_path = os.path.join(assign_dir, "assignment.html")
+    #
+    #         # Download assignment page, this usually has instructions and etc.
+    #         if not os.path.exists(assignment_page_path):
+    #             download_page(assignment.html_url, cookies_path, assign_dir, "assignment.html")
+    #
+    #         extra_files = get_extra_assignment_files(assignment.description, cookie_jar)
+    #         if extra_files:  # in an if statement so that we only show the bar when there's things to do.
+    #             for name, url in tqdm(extra_files, desc='Downloading Additional Files', leave=False):
+    #                 download_file(url, Path(assign_dir, name), cookie_jar)
+    #
+    #     for submission in assignment.submissions:
+    #         submission_dir = assign_dir
+    #
+    #         # If theres more then 1 submission, add unique id to download dir
+    #         if len(assignment.submissions) != 1:
+    #             submission_dir = os.path.join(assign_dir, str(submission.user_id))
+    #
+    #         if submission.preview_url != "":
+    #             if not os.path.exists(submission_dir):
+    #                 os.makedirs(submission_dir)
+    #
+    #             submission_page_dir = os.path.join(submission_dir, "submission.html")
+    #
+    #             # Download submission url, this is typically a more focused page
+    #             if not os.path.exists(submission_page_dir):
+    #                 download_page(submission.preview_url, cookies_path, submission_dir, "submission.html")
+    #
+    #         # If theres more then 1 attempt, save each attempt in attempts folder
+    #         if (submission.attempt != 1 and assignment.updated_url != "" and assignment.html_url != ""
+    #                 and assignment.html_url.rstrip("/") != assignment.updated_url.rstrip("/")):
+    #             submission_dir = os.path.join(assign_dir, "attempts")
+    #
+    #             if not os.path.exists(submission_dir):
+    #                 os.makedirs(submission_dir)
+    #
+    #             # Saves the attempts if multiple were taken, doesn't account for
+    #             # different ID's however, as I wasnt able to find out what the url
+    #             # for the specific id's attempts would be.
+    #             for i in range(submission.attempt):
+    #                 filename = "attempt_" + str(i + 1) + ".html"
+    #                 submission_page_attempt_dir = os.path.join(submission_dir, filename)
+    #
+    #                 if not os.path.exists(submission_page_attempt_dir):
+    #                     download_page(assignment.updated_url + "/history?version=" + str(i + 1), cookies_path, submission_dir, filename)
+
+    with ThreadPoolExecutor(max_workers=3) as executor:
+        download_func = partial(download_assignment, cookies_path, cookie_jar, base_assign_dir)
+        list(tqdm(executor.map(download_func, course_view.assignments), total=len(course_view.assignments), desc='Downloading Assignments'))
+
+
+def download_course_announcement_pages(api_url, course_view, cookies_path):
+    """
+    Download assignment list.
+    There's a chance this might be the course homepage if the course has the assignments page disabled.
+    :param api_url:
+    :param course_view:
+    :param cookies_path:
+    :return:
+    """
+
+    if cookies_path == "" or len(course_view.announcements) == 0:
+        return
+
+    base_announce_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "announcements")
+    if not os.path.exists(base_announce_dir):
+        os.makedirs(base_announce_dir)
+    announcement_list_dir = os.path.join(base_announce_dir, "announcement_list.html")
+    if not os.path.exists(announcement_list_dir):
+        download_page(api_url + "/courses/" + str(course_view.course_id) + "/announcements/", cookies_path, base_announce_dir, "announcement_list.html")
+
+    for announcements in tqdm(list(course_view.announcements), desc='Downloading Announcements'):
+        announcements_title = make_valid_filename(str(announcements.title))
+        announcements_title = shorten_file_name(announcements_title, len(announcements_title) - MAX_FOLDER_NAME_SIZE)
+        announce_dir = os.path.join(base_announce_dir, announcements_title)
+
+        if announcements.url == "":
+            continue
+
+        if not os.path.exists(announce_dir):
+            os.makedirs(announce_dir)
+
+        # Downloads each page that a discussion takes.
+        for i in range(announcements.amount_pages):
+            filename = "announcement_" + str(i + 1) + ".html"
+            announcement_page_dir = os.path.join(announce_dir, filename)
+
+            # Download assignment page, this usually has instructions and etc.
+            if not os.path.exists(announcement_page_dir):
+                download_page(announcements.url + "/page-" + str(i + 1), cookies_path, announce_dir, filename)
+
+
+def download_submission_attachments(course, course_view):
+    course_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name)
+
+    # Create directory if not present
+    if not os.path.exists(course_dir):
+        os.makedirs(course_dir)
+
+    for assignment in tqdm(list(course_view.assignments), desc='Downloading Submissions'):
+        for submission in assignment.submissions:
+            assignment_title = make_valid_filename(str(assignment.title))
+            assignment_title = shorten_file_name(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
+            attachment_dir = os.path.join(course_dir, "assignments", assignment_title)
+            if len(assignment.submissions) != 1:
+                attachment_dir = os.path.join(attachment_dir, str(submission.user_id))
+            if not os.path.exists(attachment_dir) and submission.attachments:
+                os.makedirs(attachment_dir)
+            for attachment in submission.attachments:
+                filepath = os.path.join(attachment_dir, make_valid_filename(str(attachment.id) + "_" + attachment.filename))
+                if not os.path.exists(filepath):
+                    # print('Downloading attachment: {}'.format(filepath))
+                    r = requests.get(attachment.url, allow_redirects=True)
+                    with open(filepath, 'wb') as f:
+                        f.write(r.content)
+                # else:
+                #     print('File already exists: {}'.format(filepath))
+
+
+def download_course_html(api_url, cookies_path):
+    if cookies_path == "":
+        return
+
+    course_dir = DL_LOCATION
+
+    if not os.path.exists(course_dir):
+        os.makedirs(course_dir)
+
+    course_list_path = os.path.join(course_dir, "course_list.html")
+
+    # Downloads the course list.
+    if not os.path.exists(course_list_path):
+        download_page(api_url + "/courses/", cookies_path, course_dir, "course_list.html")
+
+
+def download_course_home_page_html(api_url, course_view, cookies_path):
+    if cookies_path == "":
+        return
+
+    dl_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name)
+    if not os.path.exists(dl_dir):
+        os.makedirs(dl_dir)
+
+    homepage_path = os.path.join(dl_dir, "homepage.html")
+
+    # Downloads the course home page.
+    if not os.path.exists(homepage_path):
+        download_page(api_url + "/courses/" + str(course_view.course_id), cookies_path, dl_dir, "homepage.html")
+
+
+def download_course_module_pages(api_url, course_view, cookies_path):
+    if cookies_path == "" or len(course_view.modules) == 0:
+        return
+
+    modules_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "modules")
+    if not os.path.exists(modules_dir):
+        os.makedirs(modules_dir)
+
+    module_list_dir = os.path.join(modules_dir, "modules_list.html")
+
+    # Downloads the modules page (possible this is disabled by the teacher)
+    if not os.path.exists(module_list_dir):
+        download_page(api_url + "/courses/" + str(course_view.course_id) + "/modules/", cookies_path, modules_dir, "modules_list.html")
+
+    # for module in tqdm(list(course_view.modules), desc='Downloading Module Pages'):
+    #     bar = tqdm(list(module.items), leave=False, desc=module.name)
+    #     for item in module.items:
+    #         # bar.set_postfix({'title': item.title})
+    #
+    #         # If problems arise due to long pathnames, changing module.name to module.id might help, this can also be done with item.title
+    #         # A change would also have to be made in findCourseModules(course, course_view)
+    #         module_name = make_valid_filename(str(module.name))
+    #         module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
+    #         items_dir = os.path.join(modules_dir, module_name)
+    #
+    #         if item.url != "":
+    #             if not os.path.exists(items_dir):
+    #                 os.makedirs(items_dir)
+    #
+    #             filename = make_valid_filename(str(item.title)) + ".html"
+    #             module_item_dir = os.path.join(items_dir, filename)
+    #
+    #             # Download the module page.
+    #             if not os.path.exists(module_item_dir):
+    #                 download_page(item.url, cookies_path, items_dir, filename)
+    #         bar.update()
+    #     bar.close()
+
+    with ThreadPoolExecutor(max_workers=3) as executor:
+        for module in tqdm(list(course_view.modules), desc='Downloading Module Pages'):
+            bar = tqdm(list(module.items), leave=False, desc=module.name)
+            futures = [executor.submit(download_module_item, module, item, modules_dir, cookies_path) for item in module.items]
+            for future in as_completed(futures):
+                bar.update()
+            bar.close()
--- a/stuff/get_canvas.py
+++ b/stuff/get_canvas.py
@ -0,0 +1,287 @@
+import os
+from http.cookiejar import MozillaCookieJar
+
+import dateutil.parser
+import requests
+from bs4 import BeautifulSoup
+from tqdm import tqdm
+
+from stuff.const import DATE_TEMPLATE, DL_LOCATION, MAX_FOLDER_NAME_SIZE
+from stuff.helpers import make_valid_filename, shorten_file_name
+from stuff.items import AssignmentView, AttachmentView, DiscussionView, ModuleItemView, ModuleView, PageView, SubmissionView, TopicEntryView, TopicReplyView
+
+
+def find_course_modules(course, course_view):
+    modules_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "modules")
+
+    # Create modules directory if not present
+    if not os.path.exists(modules_dir):
+        os.makedirs(modules_dir)
+
+    module_views = []
+
+    try:
+        modules = list(course.get_modules())
+
+        for module in tqdm(modules, desc='Fetching Modules and Downloading Files'):
+            module_view = ModuleView()
+            module_view.id = module.id if hasattr(module, "id") else ""
+            module_view.name = str(module.name) if hasattr(module, "name") else ""
+
+            try:
+                # Get module items
+                module_items = module.get_module_items()
+
+                for module_item in module_items:
+                    module_item_view = ModuleItemView()
+                    module_item_view.id = module_item.id if hasattr(module_item, "id") else 0
+                    module_item_view.title = str(module_item.title).replace('  ', ' ') if hasattr(module_item, "title") else ""
+                    module_item_view.content_type = str(module_item.type) if hasattr(module_item, "type") else ""
+                    module_item_view.url = str(module_item.html_url) if hasattr(module_item, "html_url") else ""
+                    module_item_view.external_url = str(module_item.external_url) if hasattr(module_item, "external_url") else ""
+
+                    if module_item_view.content_type == "File":
+                        # If problems arise due to long pathnames, changing module.name to module.id might help
+                        # A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path)
+                        module_name = make_valid_filename(str(module.name))
+                        module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
+                        module_dir = os.path.join(modules_dir, module_name, "files")
+
+                        try:
+                            # Create directory for current module if not present
+                            if not os.path.exists(module_dir):
+                                os.makedirs(module_dir)
+
+                            # Get the file object
+                            module_file = course.get_file(str(module_item.content_id))
+
+                            # Create path for module file download
+                            module_file_path = os.path.join(module_dir, make_valid_filename(str(module_file.display_name)))
+
+                            # Download file if it doesn't already exist
+                            if not os.path.exists(module_file_path):
+                                module_file.download(module_file_path)
+                        except Exception as e:
+                            tqdm.write(f"Skipping module file download that gave the following error: {e}")
+
+                    module_view.items.append(module_item_view)
+            except Exception as e:
+                tqdm.write(f"Skipping module file download that gave the following error: {e}")
+
+            module_views.append(module_view)
+
+    except Exception as e:
+        print("Skipping entire module that gave the following error:")
+        print(e)
+
+    return module_views
+
+
+def get_extra_assignment_files(html, cookie_jar: MozillaCookieJar):
+    soup = BeautifulSoup(html, 'html.parser')
+    urls = [a['data-api-endpoint'] for a in soup.find_all('a', {'data-api-returntype': 'File'})]
+
+    s = requests.Session()
+    for cookie in cookie_jar:
+        s.cookies.set(cookie.name, cookie.value)
+
+    extra_files = []
+    for item in urls:
+        r = s.get(item)
+        if r.status_code == 404:
+            continue
+        r.raise_for_status()
+        j = r.json()
+        extra_files.append((j['display_name'], j['url']))
+
+    return extra_files
+
+
+def get_course_page_urls(course):
+    page_urls = []
+    try:
+        pages = list(course.get_pages())
+        for page in pages:
+            if hasattr(page, "url"):
+                page_urls.append(str(page.url))
+    except Exception as e:
+        if e.message != "Not Found":
+            print(f"Skipping page: {e}")
+    return page_urls
+
+
+def find_course_pages(course):
+    page_views = []
+    try:
+        page_urls = get_course_page_urls(course)
+        if not len(page_urls):
+            return
+
+        for url in tqdm(page_urls, desc='Fetching Pages'):
+            page = course.get_page(url)
+            page_view = PageView()
+            page_view.id = page.id if hasattr(page, "id") else 0
+            page_view.title = str(page.title).replace('  ', ' ') if hasattr(page, "title") else ""
+            page_view.body = str(page.body) if hasattr(page, "body") else ""
+
+            if hasattr(page, "created_at"):
+                page_view.created_date = dateutil.parser.parse(page.created_at).strftime(DATE_TEMPLATE)
+            else:
+                page_view.created_date = ''
+
+            if hasattr(page, "updated_at"):
+                page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime(DATE_TEMPLATE)
+            else:
+                page_view.last_updated_date = ''
+
+            page_views.append(page_view)
+    except Exception as e:
+        print("Skipping page download that gave the following error:")
+        print(e)
+    return page_views
+
+
+def find_course_assignments(course, user_id):
+    assignment_views = []
+
+    # Get all assignments
+    assignments = list(course.get_assignments())
+
+    for assignment in tqdm(assignments, desc='Fetching Assignments'):
+        assignment_view = AssignmentView()
+        assignment_view.id = assignment.id if hasattr(assignment, "id") else ""
+        assignment_view.title = make_valid_filename(str(assignment.name).replace('  ', ' ')) if hasattr(assignment, "name") else ""
+        assignment_view.description = str(assignment.description) if hasattr(assignment, "description") else ""
+        assignment_view.assigned_date = assignment.created_at_date.strftime(DATE_TEMPLATE) if hasattr(assignment, "created_at_date") else ""
+        assignment_view.due_date = assignment.due_at_date.strftime(DATE_TEMPLATE) if hasattr(assignment, "due_at_date") else ""
+        assignment_view.html_url = assignment.html_url if hasattr(assignment, "html_url") else ""
+        assignment_view.ext_url = str(assignment.url) if hasattr(assignment, "url") else ""
+        assignment_view.updated_url = str(assignment.submissions_download_url).split("submissions?")[0] if hasattr(assignment, "submissions_download_url") else ""
+
+        # Download submission for this user only
+        submissions = [assignment.get_submission(user_id)]
+        if not len(submissions):
+            raise IndexError(f'No submissions found for assignment: {vars(assignment)}')
+
+        try:
+            for submission in submissions:
+                sub_view = SubmissionView()
+                sub_view.id = submission.id if hasattr(submission, "id") else 0
+                sub_view.grade = str(submission.grade) if hasattr(submission, "grade") else ""
+                sub_view.raw_score = str(submission.score) if hasattr(submission, "score") else ""
+                sub_view.total_possible_points = str(assignment.points_possible) if hasattr(assignment, "points_possible") else ""
+                sub_view.submission_comments = str(submission.submission_comments) if hasattr(submission, "submission_comments") else ""
+                sub_view.attempt = submission.attempt if hasattr(submission, "attempt") and submission.attempt is not None else 0
+                sub_view.user_id = str(submission.user_id) if hasattr(submission, "user_id") else ""
+                sub_view.preview_url = str(submission.preview_url) if hasattr(submission, "preview_url") else ""
+                sub_view.ext_url = str(submission.url) if hasattr(submission, "url") else ""
+
+                try:
+                    submission.attachments
+                except AttributeError:
+                    print('No attachments')
+                else:
+                    for attachment in submission.attachments:
+                        attach_view = AttachmentView()
+                        attach_view.url = attachment.url
+                        attach_view.id = attachment.id
+                        attach_view.filename = attachment.filename
+                        sub_view.attachments.append(attach_view)
+                assignment_view.submissions.append(sub_view)
+        except Exception as e:
+            raise
+            # print("Skipping submission that gave the following error:")
+            # print(e)
+
+        assignment_views.append(assignment_view)
+
+    return assignment_views
+
+
+def find_course_announcements(course):
+    announcement_views = []
+
+    # try:
+    announcements = list(course.get_discussion_topics(only_announcements=True))
+
+    for announcement in tqdm(announcements, desc='Fetching Announcements'):
+        discussion_view = get_discussion_view(announcement)
+
+        announcement_views.append(discussion_view)
+    # except Exception as e:
+    #     print("Skipping announcement that gave the following error:")
+    #     print(e)
+
+    return announcement_views
+
+
+def get_discussion_view(discussion_topic):
+    # Create discussion view
+    discussion_view = DiscussionView()
+    discussion_view.id = discussion_topic.id if hasattr(discussion_topic, "id") else 0
+    discussion_view.title = str(discussion_topic.title).replace('  ', ' ') if hasattr(discussion_topic, "title") else ""
+    discussion_view.author = str(discussion_topic.user_name) if hasattr(discussion_topic, "user_name") else ""
+    discussion_view.posted_date = discussion_topic.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(discussion_topic, "created_at_date") else ""
+    discussion_view.body = str(discussion_topic.message) if hasattr(discussion_topic, "message") else ""
+    discussion_view.url = str(discussion_topic.html_url) if hasattr(discussion_topic, "html_url") else ""
+
+    # Keeps track of how many topic_entries there are.
+    topic_entries_counter = 0
+
+    # Topic entries
+    if hasattr(discussion_topic, "discussion_subentry_count") and discussion_topic.discussion_subentry_count > 0:
+        # Need to get replies to entries recursively?
+        discussion_topic_entries = discussion_topic.get_topic_entries()
+        try:
+            for topic_entry in discussion_topic_entries:
+                topic_entries_counter += 1
+
+                # Create new discussion view for the topic_entry
+                topic_entry_view = TopicEntryView()
+                topic_entry_view.id = topic_entry.id if hasattr(topic_entry, "id") else 0
+                topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else ""
+                topic_entry_view.posted_date = topic_entry.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_entry, "created_at_date") else ""
+                topic_entry_view.body = str(topic_entry.message) if hasattr(topic_entry, "message") else ""
+
+                # Get this topic's replies
+                topic_entry_replies = topic_entry.get_replies()
+
+                try:
+                    for topic_reply in topic_entry_replies:
+                        # Create new topic reply view
+                        topic_reply_view = TopicReplyView()
+                        topic_reply_view.id = topic_reply.id if hasattr(topic_reply, "id") else 0
+                        topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else ""
+                        topic_reply_view.posted_date = topic_reply.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_reply, "created_at_date") else ""
+                        topic_reply_view.message = str(topic_reply.message) if hasattr(topic_reply, "message") else ""
+                        topic_entry_view.topic_replies.append(topic_reply_view)
+                except Exception as e:
+                    print("Tried to enumerate discussion topic entry replies but received the following error:")
+                    print(e)
+
+                discussion_view.topic_entries.append(topic_entry_view)
+        except Exception as e:
+            print("Tried to enumerate discussion topic entries but received the following error:")
+            print(e)
+
+    # Amount of pages.
+    # Typically 50 topic entries are stored on a page before it creates another page.
+    discussion_view.amount_pages = int(topic_entries_counter / 50) + 1
+
+    return discussion_view
+
+
+def find_course_discussions(course):
+    discussion_views = []
+
+    # try:
+    discussion_topics = list(course.get_discussion_topics())
+
+    for discussion_topic in tqdm(discussion_topics, desc='Fetching Discussions'):
+        discussion_view = get_discussion_view(discussion_topic)
+        discussion_views.append(discussion_view)
+    # except Exception as e:
+    #     print("Skipping discussion that gave the following error:")
+    #     print(e)
+
+    return discussion_views
--- a/stuff/helpers.py
+++ b/stuff/helpers.py
@ -0,0 +1,54 @@
+import os
+import string
+
+
+def make_valid_filename(input_str):
+    if not input_str:
+        return input_str
+
+    # Remove invalid characters
+    valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
+    input_str = input_str.replace("+", " ")  # Canvas default for spaces
+    input_str = input_str.replace(":", "-")
+    input_str = input_str.replace("/", "-")
+    input_str = "".join(c for c in input_str if c in valid_chars)
+
+    # Remove leading and trailing whitespace
+    input_str = input_str.lstrip().rstrip()
+
+    # Remove trailing periods
+    input_str = input_str.rstrip(".")
+
+    return input_str
+
+
+def make_valid_folder_path(input_str):
+    # Remove invalid characters
+    valid_chars = "-_.()/ %s%s" % (string.ascii_letters, string.digits)
+    input_str = input_str.replace("+", " ")  # Canvas default for spaces
+    input_str = input_str.replace(":", "-")
+    input_str = "".join(c for c in input_str if c in valid_chars)
+
+    # Remove leading and trailing whitespace, separators
+    input_str = input_str.lstrip().rstrip().strip("/").strip("\\")
+
+    # Remove trailing periods
+    input_str = input_str.rstrip(".")
+
+    # Replace path separators with OS default
+    input_str = input_str.replace("/", os.sep)
+
+    return input_str
+
+
+def shorten_file_name(string, shorten_by) -> str:
+    if not string or shorten_by <= 0:
+        return string
+
+    # Shorten string by specified value + 1 for "-" to indicate incomplete file name (trailing periods not allowed)
+    string = string[:len(string) - (shorten_by + 1)]
+
+    string = string.rstrip().rstrip(".").rstrip("-")
+    string += "-"
+
+    return string
--- a/stuff/items.py
+++ b/stuff/items.py
@ -0,0 +1,106 @@
+from stuff.helpers import make_valid_filename
+
+
+class ModuleItemView:
+    def __init__(self):
+        self.id = 0
+        self.title = ""
+        self.content_type = ""
+        self.url = ""
+        self.external_url = ""
+
+
+class ModuleView:
+    def __init__(self):
+        self.id = 0
+        self.name = ""
+        self.items = []
+
+
+class PageView:
+    def __init__(self):
+        self.id = 0
+        self.title = ""
+        self.body = ""
+        self.created_date = ""
+        self.last_updated_date = ""
+
+
+class TopicReplyView:
+    def __init__(self):
+        self.id = 0
+        self.author = ""
+        self.posted_date = ""
+        self.body = ""
+
+
+class TopicEntryView:
+    def __init__(self):
+        self.id = 0
+        self.author = ""
+        self.posted_date = ""
+        self.body = ""
+        self.topic_replies = []
+
+
+class DiscussionView:
+    def __init__(self):
+        self.id = 0
+        self.title = ""
+        self.author = ""
+        self.posted_date = ""
+        self.body = ""
+        self.topic_entries = []
+        self.url = ""
+        self.amount_pages = 0
+
+
+class SubmissionView:
+    def __init__(self):
+        self.id = 0
+        self.attachments = []
+        self.grade = ""
+        self.raw_score = ""
+        self.submission_comments = ""
+        self.total_possible_points = ""
+        self.attempt = 0
+        self.user_id = "no-id"
+        self.preview_url = ""
+        self.ext_url = ""
+
+
+class AttachmentView:
+    def __init__(self):
+        self.id = 0
+        self.filename = ""
+        self.url = ""
+
+
+class AssignmentView:
+
+    def __init__(self):
+        self.id = 0
+        self.title = ""
+        self.description = ""
+        self.assigned_date = ""
+        self.due_date = ""
+        self.submissions = []
+        self.html_url = ""
+        self.ext_url = ""
+        self.updated_url = ""
+
+
+class CourseView:
+    def __init__(self, course):
+        self.course_id = course.id if hasattr(course, "id") else 0
+        self.term = make_valid_filename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "")
+        self.course_code = make_valid_filename(course.course_code if hasattr(course, "course_code") else "")
+        self.name = course.name if hasattr(course, "name") else ""
+
+        self.course_code = self.course_code.replace('  ', ' ')
+        self.name = self.name.replace('  ', ' ')
+
+        self.assignments = []
+        self.announcements = []
+        self.discussions = []
+        self.modules = []
--- a/stuff/singlefile.py
+++ b/stuff/singlefile.py
@ -0,0 +1,26 @@
+from subprocess import run
+
+SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file"
+CHROME_PATH = "/usr/bin/chromium-browser"
+
+
+def add_quotes(s):
+    return "\"" + s.strip("\"") + "\""
+
+
+def download_page(url, cookies_path, output_path, output_name_template=""):
+    args = [
+        add_quotes(SINGLEFILE_BINARY_PATH),
+        "--browser-executable-path=" + add_quotes(CHROME_PATH.strip("\"")),
+        "--browser-cookies-file=" + add_quotes(cookies_path),
+        "--output-directory=" + add_quotes(output_path),
+        add_quotes(url)
+    ]
+
+    if output_name_template != "":
+        args.append("--filename-template=" + add_quotes(output_name_template))
+
+    try:
+        run("node " + " ".join(args), shell=True)
+    except Exception as e:
+        print("Was not able to save the URL " + url + " using singlefile. The reported error was " + e.strerror)
--- a/stuff/threading.py
+++ b/stuff/threading.py
@ -0,0 +1,79 @@
+import os
+from pathlib import Path
+
+from stuff.singlefile import download_page
+from stuff.const import MAX_FOLDER_NAME_SIZE
+from stuff.download import download_file
+from stuff.get_canvas import get_extra_assignment_files
+from stuff.helpers import make_valid_filename, shorten_file_name
+
+
+def download_module_item(module, item, modules_dir, cookies_path):
+    # If problems arise due to long pathnames, changing module.name to module.id might help, this can also be done with item.title
+    # A change would also have to be made in findCourseModules(course, course_view)
+    module_name = make_valid_filename(str(module.name))
+    module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
+    items_dir = os.path.join(modules_dir, module_name)
+
+    if item.url != "":
+        if not os.path.exists(items_dir):
+            os.makedirs(items_dir)
+
+        filename = make_valid_filename(str(item.title)) + ".html"
+        module_item_dir = os.path.join(items_dir, filename)
+
+        # Download the module page.
+        if not os.path.exists(module_item_dir):
+            download_page(item.url, cookies_path, items_dir, filename)
+
+
+def download_assignment(cookies_path, cookie_jar, base_assign_dir, assignment):
+    assignment_title = make_valid_filename(str(assignment.title))
+    assignment_title = shorten_file_name(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
+    assign_dir = os.path.join(base_assign_dir, assignment_title)
+
+    if assignment.html_url != "":
+        if not os.path.exists(assign_dir):
+            os.makedirs(assign_dir)
+
+        assignment_page_path = os.path.join(assign_dir, "assignment.html")
+
+        if not os.path.exists(assignment_page_path):
+            download_page(assignment.html_url, cookies_path, assign_dir, "assignment.html")
+
+        extra_files = get_extra_assignment_files(assignment.description, cookie_jar)
+        for name, url in extra_files:
+            download_file(url, Path(assign_dir, name), cookie_jar)
+
+    for submission in assignment.submissions:
+        download_submission(assignment, submission, assign_dir, cookies_path)
+
+
+def download_submission(assignment, submission, assign_dir, cookies_path):
+    submission_dir = assign_dir
+
+    if len(assignment.submissions) != 1:
+        submission_dir = os.path.join(assign_dir, str(submission.user_id))
+
+    if submission.preview_url != "":
+        if not os.path.exists(submission_dir):
+            os.makedirs(submission_dir)
+
+        submission_page_dir = os.path.join(submission_dir, "submission.html")
+
+        if not os.path.exists(submission_page_dir):
+            download_page(submission.preview_url, cookies_path, submission_dir, "submission.html")
+
+    if (submission.attempt != 1 and assignment.updated_url != "" and assignment.html_url != ""
+            and assignment.html_url.rstrip("/") != assignment.updated_url.rstrip("/")):
+        submission_dir = os.path.join(assign_dir, "attempts")
+
+        if not os.path.exists(submission_dir):
+            os.makedirs(submission_dir)
+
+        for i in range(submission.attempt):
+            filename = "attempt_" + str(i + 1) + ".html"
+            submission_page_attempt_dir = os.path.join(submission_dir, filename)
+
+            if not os.path.exists(submission_page_attempt_dir):
+                download_page(assignment.updated_url + "/history?version=" + str(i + 1), cookies_path, submission_dir, filename)
--- a/test.py
+++ b/test.py
@ -0,0 +1,11 @@
+from http.cookiejar import MozillaCookieJar
+
+import requests
+
+s = requests.Session()
+cookies = MozillaCookieJar('cookies-canvas-uccs-edu.txt')
+cookies.load(ignore_discard=True, ignore_expires=True)
+for cookie in cookies:
+    s.cookies.set(cookie.name, cookie.value)
+r = s.get('https://canvas.uccs.edu/api/v1/courses/146797/files/8232290')
+print(r.text)