update my stuff
This commit is contained in:
parent
941f675eae
commit
9a67feefab
|
@ -1,7 +1,295 @@
|
||||||
.vscode
|
|
||||||
__pycache__/
|
|
||||||
node_modules/
|
|
||||||
output/
|
|
||||||
|
|
||||||
credentials.yaml
|
credentials.yaml
|
||||||
cookies.txt
|
cookies*.txt
|
||||||
|
|
||||||
|
# ---> Python
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/#use-with-ide
|
||||||
|
.pdm.toml
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
logs
|
||||||
|
*.log
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
lerna-debug.log*
|
||||||
|
.pnpm-debug.log*
|
||||||
|
|
||||||
|
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||||
|
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||||
|
|
||||||
|
# Runtime data
|
||||||
|
pids
|
||||||
|
*.pid
|
||||||
|
*.seed
|
||||||
|
*.pid.lock
|
||||||
|
|
||||||
|
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||||
|
lib-cov
|
||||||
|
|
||||||
|
# Coverage directory used by tools like istanbul
|
||||||
|
coverage
|
||||||
|
*.lcov
|
||||||
|
|
||||||
|
# nyc test coverage
|
||||||
|
.nyc_output
|
||||||
|
|
||||||
|
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||||
|
.grunt
|
||||||
|
|
||||||
|
# Bower dependency directory (https://bower.io/)
|
||||||
|
bower_components
|
||||||
|
|
||||||
|
# node-waf configuration
|
||||||
|
.lock-wscript
|
||||||
|
|
||||||
|
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||||
|
build/Release
|
||||||
|
|
||||||
|
# Dependency directories
|
||||||
|
node_modules/
|
||||||
|
jspm_packages/
|
||||||
|
|
||||||
|
# Snowpack dependency directory (https://snowpack.dev/)
|
||||||
|
web_modules/
|
||||||
|
|
||||||
|
# TypeScript cache
|
||||||
|
*.tsbuildinfo
|
||||||
|
|
||||||
|
# Optional npm cache directory
|
||||||
|
.npm
|
||||||
|
|
||||||
|
# Optional eslint cache
|
||||||
|
.eslintcache
|
||||||
|
|
||||||
|
# Optional stylelint cache
|
||||||
|
.stylelintcache
|
||||||
|
|
||||||
|
# Microbundle cache
|
||||||
|
.rpt2_cache/
|
||||||
|
.rts2_cache_cjs/
|
||||||
|
.rts2_cache_es/
|
||||||
|
.rts2_cache_umd/
|
||||||
|
|
||||||
|
# Optional REPL history
|
||||||
|
.node_repl_history
|
||||||
|
|
||||||
|
# Output of 'npm pack'
|
||||||
|
*.tgz
|
||||||
|
|
||||||
|
# Yarn Integrity file
|
||||||
|
.yarn-integrity
|
||||||
|
|
||||||
|
# dotenv environment variable files
|
||||||
|
.env
|
||||||
|
.env.development.local
|
||||||
|
.env.test.local
|
||||||
|
.env.production.local
|
||||||
|
.env.local
|
||||||
|
|
||||||
|
# parcel-bundler cache (https://parceljs.org/)
|
||||||
|
.cache
|
||||||
|
.parcel-cache
|
||||||
|
|
||||||
|
# Next.js build output
|
||||||
|
.next
|
||||||
|
out
|
||||||
|
|
||||||
|
# Nuxt.js build / generate output
|
||||||
|
.nuxt
|
||||||
|
dist
|
||||||
|
|
||||||
|
# Gatsby files
|
||||||
|
.cache/
|
||||||
|
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||||
|
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||||
|
# public
|
||||||
|
|
||||||
|
# vuepress build output
|
||||||
|
.vuepress/dist
|
||||||
|
|
||||||
|
# vuepress v2.x temp and cache directory
|
||||||
|
.temp
|
||||||
|
.cache
|
||||||
|
|
||||||
|
# Docusaurus cache and generated files
|
||||||
|
.docusaurus
|
||||||
|
|
||||||
|
# Serverless directories
|
||||||
|
.serverless/
|
||||||
|
|
||||||
|
# FuseBox cache
|
||||||
|
.fusebox/
|
||||||
|
|
||||||
|
# DynamoDB Local files
|
||||||
|
.dynamodb/
|
||||||
|
|
||||||
|
# TernJS port file
|
||||||
|
.tern-port
|
||||||
|
|
||||||
|
# Stores VSCode versions used for testing VSCode extensions
|
||||||
|
.vscode-test
|
||||||
|
|
||||||
|
# yarn v2
|
||||||
|
.yarn/cache
|
||||||
|
.yarn/unplugged
|
||||||
|
.yarn/build-state.yml
|
||||||
|
.yarn/install-state.gz
|
||||||
|
.pnp.*
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -3,3 +3,5 @@ jsonpickle
|
||||||
canvasapi
|
canvasapi
|
||||||
python-dateutil
|
python-dateutil
|
||||||
PyYAML
|
PyYAML
|
||||||
|
tqdm
|
||||||
|
bs4
|
|
@ -1,27 +0,0 @@
|
||||||
from subprocess import run
|
|
||||||
|
|
||||||
SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file"
|
|
||||||
CHROME_PATH = "C:/Program Files/Google\ Chrome/Application/chrome.exe" #Uncomment this and set your browser exe if it can't find yours.
|
|
||||||
|
|
||||||
def addQuotes(str):
|
|
||||||
return "\"" + str.strip("\"") + "\""
|
|
||||||
|
|
||||||
def download_page(url, cookies_path, output_path, output_name_template = ""):
|
|
||||||
args = [
|
|
||||||
addQuotes(SINGLEFILE_BINARY_PATH),
|
|
||||||
#"--browser-executable-path=" + addQuotes(CHROME_PATH.strip("\"")), #Uncomment this and set your browser exe if it can't find yours.
|
|
||||||
"--browser-cookies-file=" + addQuotes(cookies_path),
|
|
||||||
"--output-directory=" + addQuotes(output_path),
|
|
||||||
addQuotes(url)
|
|
||||||
]
|
|
||||||
|
|
||||||
if(output_name_template != ""):
|
|
||||||
args.append("--filename-template=" + addQuotes(output_name_template))
|
|
||||||
|
|
||||||
try:
|
|
||||||
run("node " + " ".join(args), shell=True)
|
|
||||||
except Exception as e:
|
|
||||||
print("Was not able to save the URL " + url + " using singlefile. The reported error was " + e.strerror)
|
|
||||||
|
|
||||||
#if __name__ == "__main__":
|
|
||||||
#download_page("https://www.google.com/", "", "./output/test", "test.html")
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
# Directory in which to download course information to (will be created if not
|
||||||
|
# present)
|
||||||
|
DL_LOCATION = "./output"
|
||||||
|
# List of Course IDs that should be skipped (need to be integers)
|
||||||
|
COURSES_TO_SKIP = [288290, 512033]
|
||||||
|
|
||||||
|
DATE_TEMPLATE = "%B %d, %Y %I:%M %p"
|
||||||
|
|
||||||
|
# Max PATH length is 260 characters on Windows. 70 is just an estimate for a reasonable max folder name to prevent the chance of reaching the limit
|
||||||
|
# Applies to modules, assignments, announcements, and discussions
|
||||||
|
# If a folder exceeds this limit, a "-" will be added to the end to indicate it was shortened ("..." not valid)
|
||||||
|
MAX_FOLDER_NAME_SIZE = 70
|
|
@ -0,0 +1,21 @@
|
||||||
|
from http.cookiejar import MozillaCookieJar
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def download_file(url, output, cookie_jar: MozillaCookieJar):
|
||||||
|
s = requests.Session()
|
||||||
|
for cookie in cookie_jar:
|
||||||
|
s.cookies.set(cookie.name, cookie.value)
|
||||||
|
|
||||||
|
local_filename = output
|
||||||
|
# NOTE the stream=True parameter below
|
||||||
|
with s.get(url, stream=True) as r:
|
||||||
|
r.raise_for_status()
|
||||||
|
with open(local_filename, 'wb') as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=8192):
|
||||||
|
# If you have chunk encoded response uncomment if
|
||||||
|
# and set chunk_size parameter to None.
|
||||||
|
# if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
return local_filename
|
|
@ -0,0 +1,300 @@
|
||||||
|
import os
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from functools import partial
|
||||||
|
from http.cookiejar import MozillaCookieJar
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from stuff.singlefile import download_page
|
||||||
|
from stuff.const import DL_LOCATION, MAX_FOLDER_NAME_SIZE
|
||||||
|
from stuff.helpers import make_valid_filename, make_valid_folder_path, shorten_file_name
|
||||||
|
from stuff.threading import download_assignment, download_module_item
|
||||||
|
|
||||||
|
|
||||||
|
def download_course_files(course, course_view):
|
||||||
|
# file full_name starts with "course files"
|
||||||
|
dl_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name)
|
||||||
|
|
||||||
|
# Create directory if not present
|
||||||
|
if not os.path.exists(dl_dir):
|
||||||
|
os.makedirs(dl_dir)
|
||||||
|
|
||||||
|
try:
|
||||||
|
files = list(course.get_files())
|
||||||
|
|
||||||
|
for file in tqdm(files, desc='Downloading Files'):
|
||||||
|
file_folder = course.get_folder(file.folder_id)
|
||||||
|
|
||||||
|
folder_dl_dir = os.path.join(dl_dir, make_valid_folder_path(file_folder.full_name))
|
||||||
|
|
||||||
|
if not os.path.exists(folder_dl_dir):
|
||||||
|
os.makedirs(folder_dl_dir)
|
||||||
|
|
||||||
|
dl_path = os.path.join(folder_dl_dir, make_valid_filename(str(file.display_name)))
|
||||||
|
|
||||||
|
# Download file if it doesn't already exist
|
||||||
|
if not os.path.exists(dl_path):
|
||||||
|
print('Downloading: {}'.format(dl_path))
|
||||||
|
file.download(dl_path)
|
||||||
|
except Exception as e:
|
||||||
|
tqdm.write(f"Skipping file download that gave the following error: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def download_course_discussion_pages(api_url, course_view, cookies_path):
|
||||||
|
if cookies_path == "" or len(course_view.discussions) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
base_discussion_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "discussions")
|
||||||
|
if not os.path.exists(base_discussion_dir):
|
||||||
|
os.makedirs(base_discussion_dir)
|
||||||
|
|
||||||
|
discussion_list_dir = os.path.join(base_discussion_dir, "discussion_list.html")
|
||||||
|
|
||||||
|
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
|
||||||
|
if not os.path.exists(discussion_list_dir):
|
||||||
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html")
|
||||||
|
|
||||||
|
for discussion in tqdm(list(course_view.discussions), desc='Downloading Discussion Pages'):
|
||||||
|
discussion_title = make_valid_filename(str(discussion.title))
|
||||||
|
discussion_title = shorten_file_name(discussion_title, len(discussion_title) - MAX_FOLDER_NAME_SIZE)
|
||||||
|
discussion_dir = os.path.join(base_discussion_dir, discussion_title)
|
||||||
|
|
||||||
|
if discussion.url == "":
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not os.path.exists(discussion_dir):
|
||||||
|
os.makedirs(discussion_dir)
|
||||||
|
|
||||||
|
# Downloads each page that a discussion takes.
|
||||||
|
for i in range(discussion.amount_pages):
|
||||||
|
filename = "discussion_" + str(i + 1) + ".html"
|
||||||
|
discussion_page_dir = os.path.join(discussion_dir, filename)
|
||||||
|
|
||||||
|
# Download assignment page, this usually has instructions and etc.
|
||||||
|
if not os.path.exists(discussion_page_dir):
|
||||||
|
download_page(discussion.url + "/page-" + str(i + 1), cookies_path, discussion_dir, filename)
|
||||||
|
|
||||||
|
|
||||||
|
def download_assignment_pages(api_url, course_view, cookies_path, cookie_jar: MozillaCookieJar):
|
||||||
|
if cookies_path == "" or len(course_view.assignments) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
base_assign_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "assignments")
|
||||||
|
if not os.path.exists(base_assign_dir):
|
||||||
|
os.makedirs(base_assign_dir)
|
||||||
|
|
||||||
|
assignment_list_path = os.path.join(base_assign_dir, "assignment_list.html")
|
||||||
|
|
||||||
|
# Download assignment list (theres a chance this might be the course homepage if the course has the assignments page disabled)
|
||||||
|
if not os.path.exists(assignment_list_path):
|
||||||
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html")
|
||||||
|
|
||||||
|
# for assignment in tqdm(course_view.assignments, desc='Downloading Assignments'):
|
||||||
|
# assignment_title = make_valid_filename(str(assignment.title))
|
||||||
|
# assignment_title = shorten_file_name(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
|
||||||
|
# assign_dir = os.path.join(base_assign_dir, assignment_title)
|
||||||
|
#
|
||||||
|
# # Download an html image of each assignment (includes assignment instructions and other stuff).
|
||||||
|
# # Currently, this will only download the main assignment page and not external pages, this is
|
||||||
|
# # because these external pages are given in a json format. Saving these would require a lot
|
||||||
|
# # more work then normal.
|
||||||
|
# if assignment.html_url != "":
|
||||||
|
# if not os.path.exists(assign_dir):
|
||||||
|
# os.makedirs(assign_dir)
|
||||||
|
#
|
||||||
|
# assignment_page_path = os.path.join(assign_dir, "assignment.html")
|
||||||
|
#
|
||||||
|
# # Download assignment page, this usually has instructions and etc.
|
||||||
|
# if not os.path.exists(assignment_page_path):
|
||||||
|
# download_page(assignment.html_url, cookies_path, assign_dir, "assignment.html")
|
||||||
|
#
|
||||||
|
# extra_files = get_extra_assignment_files(assignment.description, cookie_jar)
|
||||||
|
# if extra_files: # in an if statement so that we only show the bar when there's things to do.
|
||||||
|
# for name, url in tqdm(extra_files, desc='Downloading Additional Files', leave=False):
|
||||||
|
# download_file(url, Path(assign_dir, name), cookie_jar)
|
||||||
|
#
|
||||||
|
# for submission in assignment.submissions:
|
||||||
|
# submission_dir = assign_dir
|
||||||
|
#
|
||||||
|
# # If theres more then 1 submission, add unique id to download dir
|
||||||
|
# if len(assignment.submissions) != 1:
|
||||||
|
# submission_dir = os.path.join(assign_dir, str(submission.user_id))
|
||||||
|
#
|
||||||
|
# if submission.preview_url != "":
|
||||||
|
# if not os.path.exists(submission_dir):
|
||||||
|
# os.makedirs(submission_dir)
|
||||||
|
#
|
||||||
|
# submission_page_dir = os.path.join(submission_dir, "submission.html")
|
||||||
|
#
|
||||||
|
# # Download submission url, this is typically a more focused page
|
||||||
|
# if not os.path.exists(submission_page_dir):
|
||||||
|
# download_page(submission.preview_url, cookies_path, submission_dir, "submission.html")
|
||||||
|
#
|
||||||
|
# # If theres more then 1 attempt, save each attempt in attempts folder
|
||||||
|
# if (submission.attempt != 1 and assignment.updated_url != "" and assignment.html_url != ""
|
||||||
|
# and assignment.html_url.rstrip("/") != assignment.updated_url.rstrip("/")):
|
||||||
|
# submission_dir = os.path.join(assign_dir, "attempts")
|
||||||
|
#
|
||||||
|
# if not os.path.exists(submission_dir):
|
||||||
|
# os.makedirs(submission_dir)
|
||||||
|
#
|
||||||
|
# # Saves the attempts if multiple were taken, doesn't account for
|
||||||
|
# # different ID's however, as I wasnt able to find out what the url
|
||||||
|
# # for the specific id's attempts would be.
|
||||||
|
# for i in range(submission.attempt):
|
||||||
|
# filename = "attempt_" + str(i + 1) + ".html"
|
||||||
|
# submission_page_attempt_dir = os.path.join(submission_dir, filename)
|
||||||
|
#
|
||||||
|
# if not os.path.exists(submission_page_attempt_dir):
|
||||||
|
# download_page(assignment.updated_url + "/history?version=" + str(i + 1), cookies_path, submission_dir, filename)
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||||
|
download_func = partial(download_assignment, cookies_path, cookie_jar, base_assign_dir)
|
||||||
|
list(tqdm(executor.map(download_func, course_view.assignments), total=len(course_view.assignments), desc='Downloading Assignments'))
|
||||||
|
|
||||||
|
|
||||||
|
def download_course_announcement_pages(api_url, course_view, cookies_path):
|
||||||
|
"""
|
||||||
|
Download assignment list.
|
||||||
|
There's a chance this might be the course homepage if the course has the assignments page disabled.
|
||||||
|
:param api_url:
|
||||||
|
:param course_view:
|
||||||
|
:param cookies_path:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
if cookies_path == "" or len(course_view.announcements) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
base_announce_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "announcements")
|
||||||
|
if not os.path.exists(base_announce_dir):
|
||||||
|
os.makedirs(base_announce_dir)
|
||||||
|
announcement_list_dir = os.path.join(base_announce_dir, "announcement_list.html")
|
||||||
|
if not os.path.exists(announcement_list_dir):
|
||||||
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/announcements/", cookies_path, base_announce_dir, "announcement_list.html")
|
||||||
|
|
||||||
|
for announcements in tqdm(list(course_view.announcements), desc='Downloading Announcements'):
|
||||||
|
announcements_title = make_valid_filename(str(announcements.title))
|
||||||
|
announcements_title = shorten_file_name(announcements_title, len(announcements_title) - MAX_FOLDER_NAME_SIZE)
|
||||||
|
announce_dir = os.path.join(base_announce_dir, announcements_title)
|
||||||
|
|
||||||
|
if announcements.url == "":
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not os.path.exists(announce_dir):
|
||||||
|
os.makedirs(announce_dir)
|
||||||
|
|
||||||
|
# Downloads each page that a discussion takes.
|
||||||
|
for i in range(announcements.amount_pages):
|
||||||
|
filename = "announcement_" + str(i + 1) + ".html"
|
||||||
|
announcement_page_dir = os.path.join(announce_dir, filename)
|
||||||
|
|
||||||
|
# Download assignment page, this usually has instructions and etc.
|
||||||
|
if not os.path.exists(announcement_page_dir):
|
||||||
|
download_page(announcements.url + "/page-" + str(i + 1), cookies_path, announce_dir, filename)
|
||||||
|
|
||||||
|
|
||||||
|
def download_submission_attachments(course, course_view):
|
||||||
|
course_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name)
|
||||||
|
|
||||||
|
# Create directory if not present
|
||||||
|
if not os.path.exists(course_dir):
|
||||||
|
os.makedirs(course_dir)
|
||||||
|
|
||||||
|
for assignment in tqdm(list(course_view.assignments), desc='Downloading Submissions'):
|
||||||
|
for submission in assignment.submissions:
|
||||||
|
assignment_title = make_valid_filename(str(assignment.title))
|
||||||
|
assignment_title = shorten_file_name(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
|
||||||
|
attachment_dir = os.path.join(course_dir, "assignments", assignment_title)
|
||||||
|
if len(assignment.submissions) != 1:
|
||||||
|
attachment_dir = os.path.join(attachment_dir, str(submission.user_id))
|
||||||
|
if not os.path.exists(attachment_dir) and submission.attachments:
|
||||||
|
os.makedirs(attachment_dir)
|
||||||
|
for attachment in submission.attachments:
|
||||||
|
filepath = os.path.join(attachment_dir, make_valid_filename(str(attachment.id) + "_" + attachment.filename))
|
||||||
|
if not os.path.exists(filepath):
|
||||||
|
# print('Downloading attachment: {}'.format(filepath))
|
||||||
|
r = requests.get(attachment.url, allow_redirects=True)
|
||||||
|
with open(filepath, 'wb') as f:
|
||||||
|
f.write(r.content)
|
||||||
|
# else:
|
||||||
|
# print('File already exists: {}'.format(filepath))
|
||||||
|
|
||||||
|
|
||||||
|
def download_course_html(api_url, cookies_path):
|
||||||
|
if cookies_path == "":
|
||||||
|
return
|
||||||
|
|
||||||
|
course_dir = DL_LOCATION
|
||||||
|
|
||||||
|
if not os.path.exists(course_dir):
|
||||||
|
os.makedirs(course_dir)
|
||||||
|
|
||||||
|
course_list_path = os.path.join(course_dir, "course_list.html")
|
||||||
|
|
||||||
|
# Downloads the course list.
|
||||||
|
if not os.path.exists(course_list_path):
|
||||||
|
download_page(api_url + "/courses/", cookies_path, course_dir, "course_list.html")
|
||||||
|
|
||||||
|
|
||||||
|
def download_course_home_page_html(api_url, course_view, cookies_path):
|
||||||
|
if cookies_path == "":
|
||||||
|
return
|
||||||
|
|
||||||
|
dl_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name)
|
||||||
|
if not os.path.exists(dl_dir):
|
||||||
|
os.makedirs(dl_dir)
|
||||||
|
|
||||||
|
homepage_path = os.path.join(dl_dir, "homepage.html")
|
||||||
|
|
||||||
|
# Downloads the course home page.
|
||||||
|
if not os.path.exists(homepage_path):
|
||||||
|
download_page(api_url + "/courses/" + str(course_view.course_id), cookies_path, dl_dir, "homepage.html")
|
||||||
|
|
||||||
|
|
||||||
|
def download_course_module_pages(api_url, course_view, cookies_path):
|
||||||
|
if cookies_path == "" or len(course_view.modules) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
modules_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "modules")
|
||||||
|
if not os.path.exists(modules_dir):
|
||||||
|
os.makedirs(modules_dir)
|
||||||
|
|
||||||
|
module_list_dir = os.path.join(modules_dir, "modules_list.html")
|
||||||
|
|
||||||
|
# Downloads the modules page (possible this is disabled by the teacher)
|
||||||
|
if not os.path.exists(module_list_dir):
|
||||||
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/modules/", cookies_path, modules_dir, "modules_list.html")
|
||||||
|
|
||||||
|
# for module in tqdm(list(course_view.modules), desc='Downloading Module Pages'):
|
||||||
|
# bar = tqdm(list(module.items), leave=False, desc=module.name)
|
||||||
|
# for item in module.items:
|
||||||
|
# # bar.set_postfix({'title': item.title})
|
||||||
|
#
|
||||||
|
# # If problems arise due to long pathnames, changing module.name to module.id might help, this can also be done with item.title
|
||||||
|
# # A change would also have to be made in findCourseModules(course, course_view)
|
||||||
|
# module_name = make_valid_filename(str(module.name))
|
||||||
|
# module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
|
||||||
|
# items_dir = os.path.join(modules_dir, module_name)
|
||||||
|
#
|
||||||
|
# if item.url != "":
|
||||||
|
# if not os.path.exists(items_dir):
|
||||||
|
# os.makedirs(items_dir)
|
||||||
|
#
|
||||||
|
# filename = make_valid_filename(str(item.title)) + ".html"
|
||||||
|
# module_item_dir = os.path.join(items_dir, filename)
|
||||||
|
#
|
||||||
|
# # Download the module page.
|
||||||
|
# if not os.path.exists(module_item_dir):
|
||||||
|
# download_page(item.url, cookies_path, items_dir, filename)
|
||||||
|
# bar.update()
|
||||||
|
# bar.close()
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||||
|
for module in tqdm(list(course_view.modules), desc='Downloading Module Pages'):
|
||||||
|
bar = tqdm(list(module.items), leave=False, desc=module.name)
|
||||||
|
futures = [executor.submit(download_module_item, module, item, modules_dir, cookies_path) for item in module.items]
|
||||||
|
for future in as_completed(futures):
|
||||||
|
bar.update()
|
||||||
|
bar.close()
|
|
@ -0,0 +1,287 @@
|
||||||
|
import os
|
||||||
|
from http.cookiejar import MozillaCookieJar
|
||||||
|
|
||||||
|
import dateutil.parser
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from stuff.const import DATE_TEMPLATE, DL_LOCATION, MAX_FOLDER_NAME_SIZE
|
||||||
|
from stuff.helpers import make_valid_filename, shorten_file_name
|
||||||
|
from stuff.items import AssignmentView, AttachmentView, DiscussionView, ModuleItemView, ModuleView, PageView, SubmissionView, TopicEntryView, TopicReplyView
|
||||||
|
|
||||||
|
|
||||||
|
def find_course_modules(course, course_view):
|
||||||
|
modules_dir = os.path.join(DL_LOCATION, course_view.term, course_view.name, "modules")
|
||||||
|
|
||||||
|
# Create modules directory if not present
|
||||||
|
if not os.path.exists(modules_dir):
|
||||||
|
os.makedirs(modules_dir)
|
||||||
|
|
||||||
|
module_views = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
modules = list(course.get_modules())
|
||||||
|
|
||||||
|
for module in tqdm(modules, desc='Fetching Modules and Downloading Files'):
|
||||||
|
module_view = ModuleView()
|
||||||
|
module_view.id = module.id if hasattr(module, "id") else ""
|
||||||
|
module_view.name = str(module.name) if hasattr(module, "name") else ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get module items
|
||||||
|
module_items = module.get_module_items()
|
||||||
|
|
||||||
|
for module_item in module_items:
|
||||||
|
module_item_view = ModuleItemView()
|
||||||
|
module_item_view.id = module_item.id if hasattr(module_item, "id") else 0
|
||||||
|
module_item_view.title = str(module_item.title).replace(' ', ' ') if hasattr(module_item, "title") else ""
|
||||||
|
module_item_view.content_type = str(module_item.type) if hasattr(module_item, "type") else ""
|
||||||
|
module_item_view.url = str(module_item.html_url) if hasattr(module_item, "html_url") else ""
|
||||||
|
module_item_view.external_url = str(module_item.external_url) if hasattr(module_item, "external_url") else ""
|
||||||
|
|
||||||
|
if module_item_view.content_type == "File":
|
||||||
|
# If problems arise due to long pathnames, changing module.name to module.id might help
|
||||||
|
# A change would also have to be made in downloadCourseModulePages(api_url, course_view, cookies_path)
|
||||||
|
module_name = make_valid_filename(str(module.name))
|
||||||
|
module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
|
||||||
|
module_dir = os.path.join(modules_dir, module_name, "files")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create directory for current module if not present
|
||||||
|
if not os.path.exists(module_dir):
|
||||||
|
os.makedirs(module_dir)
|
||||||
|
|
||||||
|
# Get the file object
|
||||||
|
module_file = course.get_file(str(module_item.content_id))
|
||||||
|
|
||||||
|
# Create path for module file download
|
||||||
|
module_file_path = os.path.join(module_dir, make_valid_filename(str(module_file.display_name)))
|
||||||
|
|
||||||
|
# Download file if it doesn't already exist
|
||||||
|
if not os.path.exists(module_file_path):
|
||||||
|
module_file.download(module_file_path)
|
||||||
|
except Exception as e:
|
||||||
|
tqdm.write(f"Skipping module file download that gave the following error: {e}")
|
||||||
|
|
||||||
|
module_view.items.append(module_item_view)
|
||||||
|
except Exception as e:
|
||||||
|
tqdm.write(f"Skipping module file download that gave the following error: {e}")
|
||||||
|
|
||||||
|
module_views.append(module_view)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print("Skipping entire module that gave the following error:")
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
return module_views
|
||||||
|
|
||||||
|
|
||||||
|
def get_extra_assignment_files(html, cookie_jar: MozillaCookieJar):
|
||||||
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
|
urls = [a['data-api-endpoint'] for a in soup.find_all('a', {'data-api-returntype': 'File'})]
|
||||||
|
|
||||||
|
s = requests.Session()
|
||||||
|
for cookie in cookie_jar:
|
||||||
|
s.cookies.set(cookie.name, cookie.value)
|
||||||
|
|
||||||
|
extra_files = []
|
||||||
|
for item in urls:
|
||||||
|
r = s.get(item)
|
||||||
|
if r.status_code == 404:
|
||||||
|
continue
|
||||||
|
r.raise_for_status()
|
||||||
|
j = r.json()
|
||||||
|
extra_files.append((j['display_name'], j['url']))
|
||||||
|
|
||||||
|
return extra_files
|
||||||
|
|
||||||
|
|
||||||
|
def get_course_page_urls(course):
|
||||||
|
page_urls = []
|
||||||
|
try:
|
||||||
|
pages = list(course.get_pages())
|
||||||
|
for page in pages:
|
||||||
|
if hasattr(page, "url"):
|
||||||
|
page_urls.append(str(page.url))
|
||||||
|
except Exception as e:
|
||||||
|
if e.message != "Not Found":
|
||||||
|
print(f"Skipping page: {e}")
|
||||||
|
return page_urls
|
||||||
|
|
||||||
|
|
||||||
|
def find_course_pages(course):
|
||||||
|
page_views = []
|
||||||
|
try:
|
||||||
|
page_urls = get_course_page_urls(course)
|
||||||
|
if not len(page_urls):
|
||||||
|
return
|
||||||
|
|
||||||
|
for url in tqdm(page_urls, desc='Fetching Pages'):
|
||||||
|
page = course.get_page(url)
|
||||||
|
page_view = PageView()
|
||||||
|
page_view.id = page.id if hasattr(page, "id") else 0
|
||||||
|
page_view.title = str(page.title).replace(' ', ' ') if hasattr(page, "title") else ""
|
||||||
|
page_view.body = str(page.body) if hasattr(page, "body") else ""
|
||||||
|
|
||||||
|
if hasattr(page, "created_at"):
|
||||||
|
page_view.created_date = dateutil.parser.parse(page.created_at).strftime(DATE_TEMPLATE)
|
||||||
|
else:
|
||||||
|
page_view.created_date = ''
|
||||||
|
|
||||||
|
if hasattr(page, "updated_at"):
|
||||||
|
page_view.last_updated_date = dateutil.parser.parse(page.updated_at).strftime(DATE_TEMPLATE)
|
||||||
|
else:
|
||||||
|
page_view.last_updated_date = ''
|
||||||
|
|
||||||
|
page_views.append(page_view)
|
||||||
|
except Exception as e:
|
||||||
|
print("Skipping page download that gave the following error:")
|
||||||
|
print(e)
|
||||||
|
return page_views
|
||||||
|
|
||||||
|
|
||||||
|
def find_course_assignments(course, user_id):
|
||||||
|
assignment_views = []
|
||||||
|
|
||||||
|
# Get all assignments
|
||||||
|
assignments = list(course.get_assignments())
|
||||||
|
|
||||||
|
for assignment in tqdm(assignments, desc='Fetching Assignments'):
|
||||||
|
assignment_view = AssignmentView()
|
||||||
|
assignment_view.id = assignment.id if hasattr(assignment, "id") else ""
|
||||||
|
assignment_view.title = make_valid_filename(str(assignment.name).replace(' ', ' ')) if hasattr(assignment, "name") else ""
|
||||||
|
assignment_view.description = str(assignment.description) if hasattr(assignment, "description") else ""
|
||||||
|
assignment_view.assigned_date = assignment.created_at_date.strftime(DATE_TEMPLATE) if hasattr(assignment, "created_at_date") else ""
|
||||||
|
assignment_view.due_date = assignment.due_at_date.strftime(DATE_TEMPLATE) if hasattr(assignment, "due_at_date") else ""
|
||||||
|
assignment_view.html_url = assignment.html_url if hasattr(assignment, "html_url") else ""
|
||||||
|
assignment_view.ext_url = str(assignment.url) if hasattr(assignment, "url") else ""
|
||||||
|
assignment_view.updated_url = str(assignment.submissions_download_url).split("submissions?")[0] if hasattr(assignment, "submissions_download_url") else ""
|
||||||
|
|
||||||
|
# Download submission for this user only
|
||||||
|
submissions = [assignment.get_submission(user_id)]
|
||||||
|
if not len(submissions):
|
||||||
|
raise IndexError(f'No submissions found for assignment: {vars(assignment)}')
|
||||||
|
|
||||||
|
try:
|
||||||
|
for submission in submissions:
|
||||||
|
sub_view = SubmissionView()
|
||||||
|
sub_view.id = submission.id if hasattr(submission, "id") else 0
|
||||||
|
sub_view.grade = str(submission.grade) if hasattr(submission, "grade") else ""
|
||||||
|
sub_view.raw_score = str(submission.score) if hasattr(submission, "score") else ""
|
||||||
|
sub_view.total_possible_points = str(assignment.points_possible) if hasattr(assignment, "points_possible") else ""
|
||||||
|
sub_view.submission_comments = str(submission.submission_comments) if hasattr(submission, "submission_comments") else ""
|
||||||
|
sub_view.attempt = submission.attempt if hasattr(submission, "attempt") and submission.attempt is not None else 0
|
||||||
|
sub_view.user_id = str(submission.user_id) if hasattr(submission, "user_id") else ""
|
||||||
|
sub_view.preview_url = str(submission.preview_url) if hasattr(submission, "preview_url") else ""
|
||||||
|
sub_view.ext_url = str(submission.url) if hasattr(submission, "url") else ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
submission.attachments
|
||||||
|
except AttributeError:
|
||||||
|
print('No attachments')
|
||||||
|
else:
|
||||||
|
for attachment in submission.attachments:
|
||||||
|
attach_view = AttachmentView()
|
||||||
|
attach_view.url = attachment.url
|
||||||
|
attach_view.id = attachment.id
|
||||||
|
attach_view.filename = attachment.filename
|
||||||
|
sub_view.attachments.append(attach_view)
|
||||||
|
assignment_view.submissions.append(sub_view)
|
||||||
|
except Exception as e:
|
||||||
|
raise
|
||||||
|
# print("Skipping submission that gave the following error:")
|
||||||
|
# print(e)
|
||||||
|
|
||||||
|
assignment_views.append(assignment_view)
|
||||||
|
|
||||||
|
return assignment_views
|
||||||
|
|
||||||
|
|
||||||
|
def find_course_announcements(course):
|
||||||
|
announcement_views = []
|
||||||
|
|
||||||
|
# try:
|
||||||
|
announcements = list(course.get_discussion_topics(only_announcements=True))
|
||||||
|
|
||||||
|
for announcement in tqdm(announcements, desc='Fetching Announcements'):
|
||||||
|
discussion_view = get_discussion_view(announcement)
|
||||||
|
|
||||||
|
announcement_views.append(discussion_view)
|
||||||
|
# except Exception as e:
|
||||||
|
# print("Skipping announcement that gave the following error:")
|
||||||
|
# print(e)
|
||||||
|
|
||||||
|
return announcement_views
|
||||||
|
|
||||||
|
|
||||||
|
def get_discussion_view(discussion_topic):
|
||||||
|
# Create discussion view
|
||||||
|
discussion_view = DiscussionView()
|
||||||
|
discussion_view.id = discussion_topic.id if hasattr(discussion_topic, "id") else 0
|
||||||
|
discussion_view.title = str(discussion_topic.title).replace(' ', ' ') if hasattr(discussion_topic, "title") else ""
|
||||||
|
discussion_view.author = str(discussion_topic.user_name) if hasattr(discussion_topic, "user_name") else ""
|
||||||
|
discussion_view.posted_date = discussion_topic.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(discussion_topic, "created_at_date") else ""
|
||||||
|
discussion_view.body = str(discussion_topic.message) if hasattr(discussion_topic, "message") else ""
|
||||||
|
discussion_view.url = str(discussion_topic.html_url) if hasattr(discussion_topic, "html_url") else ""
|
||||||
|
|
||||||
|
# Keeps track of how many topic_entries there are.
|
||||||
|
topic_entries_counter = 0
|
||||||
|
|
||||||
|
# Topic entries
|
||||||
|
if hasattr(discussion_topic, "discussion_subentry_count") and discussion_topic.discussion_subentry_count > 0:
|
||||||
|
# Need to get replies to entries recursively?
|
||||||
|
discussion_topic_entries = discussion_topic.get_topic_entries()
|
||||||
|
try:
|
||||||
|
for topic_entry in discussion_topic_entries:
|
||||||
|
topic_entries_counter += 1
|
||||||
|
|
||||||
|
# Create new discussion view for the topic_entry
|
||||||
|
topic_entry_view = TopicEntryView()
|
||||||
|
topic_entry_view.id = topic_entry.id if hasattr(topic_entry, "id") else 0
|
||||||
|
topic_entry_view.author = str(topic_entry.user_name) if hasattr(topic_entry, "user_name") else ""
|
||||||
|
topic_entry_view.posted_date = topic_entry.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_entry, "created_at_date") else ""
|
||||||
|
topic_entry_view.body = str(topic_entry.message) if hasattr(topic_entry, "message") else ""
|
||||||
|
|
||||||
|
# Get this topic's replies
|
||||||
|
topic_entry_replies = topic_entry.get_replies()
|
||||||
|
|
||||||
|
try:
|
||||||
|
for topic_reply in topic_entry_replies:
|
||||||
|
# Create new topic reply view
|
||||||
|
topic_reply_view = TopicReplyView()
|
||||||
|
topic_reply_view.id = topic_reply.id if hasattr(topic_reply, "id") else 0
|
||||||
|
topic_reply_view.author = str(topic_reply.user_name) if hasattr(topic_reply, "user_name") else ""
|
||||||
|
topic_reply_view.posted_date = topic_reply.created_at_date.strftime("%B %d, %Y %I:%M %p") if hasattr(topic_reply, "created_at_date") else ""
|
||||||
|
topic_reply_view.message = str(topic_reply.message) if hasattr(topic_reply, "message") else ""
|
||||||
|
topic_entry_view.topic_replies.append(topic_reply_view)
|
||||||
|
except Exception as e:
|
||||||
|
print("Tried to enumerate discussion topic entry replies but received the following error:")
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
discussion_view.topic_entries.append(topic_entry_view)
|
||||||
|
except Exception as e:
|
||||||
|
print("Tried to enumerate discussion topic entries but received the following error:")
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
# Amount of pages.
|
||||||
|
# Typically 50 topic entries are stored on a page before it creates another page.
|
||||||
|
discussion_view.amount_pages = int(topic_entries_counter / 50) + 1
|
||||||
|
|
||||||
|
return discussion_view
|
||||||
|
|
||||||
|
|
||||||
|
def find_course_discussions(course):
|
||||||
|
discussion_views = []
|
||||||
|
|
||||||
|
# try:
|
||||||
|
discussion_topics = list(course.get_discussion_topics())
|
||||||
|
|
||||||
|
for discussion_topic in tqdm(discussion_topics, desc='Fetching Discussions'):
|
||||||
|
discussion_view = get_discussion_view(discussion_topic)
|
||||||
|
discussion_views.append(discussion_view)
|
||||||
|
# except Exception as e:
|
||||||
|
# print("Skipping discussion that gave the following error:")
|
||||||
|
# print(e)
|
||||||
|
|
||||||
|
return discussion_views
|
|
@ -0,0 +1,54 @@
|
||||||
|
import os
|
||||||
|
import string
|
||||||
|
|
||||||
|
|
||||||
|
def make_valid_filename(input_str):
|
||||||
|
if not input_str:
|
||||||
|
return input_str
|
||||||
|
|
||||||
|
# Remove invalid characters
|
||||||
|
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
|
||||||
|
input_str = input_str.replace("+", " ") # Canvas default for spaces
|
||||||
|
input_str = input_str.replace(":", "-")
|
||||||
|
input_str = input_str.replace("/", "-")
|
||||||
|
input_str = "".join(c for c in input_str if c in valid_chars)
|
||||||
|
|
||||||
|
# Remove leading and trailing whitespace
|
||||||
|
input_str = input_str.lstrip().rstrip()
|
||||||
|
|
||||||
|
# Remove trailing periods
|
||||||
|
input_str = input_str.rstrip(".")
|
||||||
|
|
||||||
|
return input_str
|
||||||
|
|
||||||
|
|
||||||
|
def make_valid_folder_path(input_str):
|
||||||
|
# Remove invalid characters
|
||||||
|
valid_chars = "-_.()/ %s%s" % (string.ascii_letters, string.digits)
|
||||||
|
input_str = input_str.replace("+", " ") # Canvas default for spaces
|
||||||
|
input_str = input_str.replace(":", "-")
|
||||||
|
input_str = "".join(c for c in input_str if c in valid_chars)
|
||||||
|
|
||||||
|
# Remove leading and trailing whitespace, separators
|
||||||
|
input_str = input_str.lstrip().rstrip().strip("/").strip("\\")
|
||||||
|
|
||||||
|
# Remove trailing periods
|
||||||
|
input_str = input_str.rstrip(".")
|
||||||
|
|
||||||
|
# Replace path separators with OS default
|
||||||
|
input_str = input_str.replace("/", os.sep)
|
||||||
|
|
||||||
|
return input_str
|
||||||
|
|
||||||
|
|
||||||
|
def shorten_file_name(string, shorten_by) -> str:
|
||||||
|
if not string or shorten_by <= 0:
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Shorten string by specified value + 1 for "-" to indicate incomplete file name (trailing periods not allowed)
|
||||||
|
string = string[:len(string) - (shorten_by + 1)]
|
||||||
|
|
||||||
|
string = string.rstrip().rstrip(".").rstrip("-")
|
||||||
|
string += "-"
|
||||||
|
|
||||||
|
return string
|
|
@ -0,0 +1,106 @@
|
||||||
|
from stuff.helpers import make_valid_filename
|
||||||
|
|
||||||
|
|
||||||
|
class ModuleItemView:
|
||||||
|
def __init__(self):
|
||||||
|
self.id = 0
|
||||||
|
self.title = ""
|
||||||
|
self.content_type = ""
|
||||||
|
self.url = ""
|
||||||
|
self.external_url = ""
|
||||||
|
|
||||||
|
|
||||||
|
class ModuleView:
|
||||||
|
def __init__(self):
|
||||||
|
self.id = 0
|
||||||
|
self.name = ""
|
||||||
|
self.items = []
|
||||||
|
|
||||||
|
|
||||||
|
class PageView:
|
||||||
|
def __init__(self):
|
||||||
|
self.id = 0
|
||||||
|
self.title = ""
|
||||||
|
self.body = ""
|
||||||
|
self.created_date = ""
|
||||||
|
self.last_updated_date = ""
|
||||||
|
|
||||||
|
|
||||||
|
class TopicReplyView:
|
||||||
|
def __init__(self):
|
||||||
|
self.id = 0
|
||||||
|
self.author = ""
|
||||||
|
self.posted_date = ""
|
||||||
|
self.body = ""
|
||||||
|
|
||||||
|
|
||||||
|
class TopicEntryView:
|
||||||
|
def __init__(self):
|
||||||
|
self.id = 0
|
||||||
|
self.author = ""
|
||||||
|
self.posted_date = ""
|
||||||
|
self.body = ""
|
||||||
|
self.topic_replies = []
|
||||||
|
|
||||||
|
|
||||||
|
class DiscussionView:
|
||||||
|
def __init__(self):
|
||||||
|
self.id = 0
|
||||||
|
self.title = ""
|
||||||
|
self.author = ""
|
||||||
|
self.posted_date = ""
|
||||||
|
self.body = ""
|
||||||
|
self.topic_entries = []
|
||||||
|
self.url = ""
|
||||||
|
self.amount_pages = 0
|
||||||
|
|
||||||
|
|
||||||
|
class SubmissionView:
|
||||||
|
def __init__(self):
|
||||||
|
self.id = 0
|
||||||
|
self.attachments = []
|
||||||
|
self.grade = ""
|
||||||
|
self.raw_score = ""
|
||||||
|
self.submission_comments = ""
|
||||||
|
self.total_possible_points = ""
|
||||||
|
self.attempt = 0
|
||||||
|
self.user_id = "no-id"
|
||||||
|
self.preview_url = ""
|
||||||
|
self.ext_url = ""
|
||||||
|
|
||||||
|
|
||||||
|
class AttachmentView:
|
||||||
|
def __init__(self):
|
||||||
|
self.id = 0
|
||||||
|
self.filename = ""
|
||||||
|
self.url = ""
|
||||||
|
|
||||||
|
|
||||||
|
class AssignmentView:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.id = 0
|
||||||
|
self.title = ""
|
||||||
|
self.description = ""
|
||||||
|
self.assigned_date = ""
|
||||||
|
self.due_date = ""
|
||||||
|
self.submissions = []
|
||||||
|
self.html_url = ""
|
||||||
|
self.ext_url = ""
|
||||||
|
self.updated_url = ""
|
||||||
|
|
||||||
|
|
||||||
|
class CourseView:
|
||||||
|
def __init__(self, course):
|
||||||
|
self.course_id = course.id if hasattr(course, "id") else 0
|
||||||
|
self.term = make_valid_filename(course.term["name"] if hasattr(course, "term") and "name" in course.term.keys() else "")
|
||||||
|
self.course_code = make_valid_filename(course.course_code if hasattr(course, "course_code") else "")
|
||||||
|
self.name = course.name if hasattr(course, "name") else ""
|
||||||
|
|
||||||
|
self.course_code = self.course_code.replace(' ', ' ')
|
||||||
|
self.name = self.name.replace(' ', ' ')
|
||||||
|
|
||||||
|
self.assignments = []
|
||||||
|
self.announcements = []
|
||||||
|
self.discussions = []
|
||||||
|
self.modules = []
|
|
@ -0,0 +1,26 @@
|
||||||
|
from subprocess import run
|
||||||
|
|
||||||
|
SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file"
|
||||||
|
CHROME_PATH = "/usr/bin/chromium-browser"
|
||||||
|
|
||||||
|
|
||||||
|
def add_quotes(s):
|
||||||
|
return "\"" + s.strip("\"") + "\""
|
||||||
|
|
||||||
|
|
||||||
|
def download_page(url, cookies_path, output_path, output_name_template=""):
|
||||||
|
args = [
|
||||||
|
add_quotes(SINGLEFILE_BINARY_PATH),
|
||||||
|
"--browser-executable-path=" + add_quotes(CHROME_PATH.strip("\"")),
|
||||||
|
"--browser-cookies-file=" + add_quotes(cookies_path),
|
||||||
|
"--output-directory=" + add_quotes(output_path),
|
||||||
|
add_quotes(url)
|
||||||
|
]
|
||||||
|
|
||||||
|
if output_name_template != "":
|
||||||
|
args.append("--filename-template=" + add_quotes(output_name_template))
|
||||||
|
|
||||||
|
try:
|
||||||
|
run("node " + " ".join(args), shell=True)
|
||||||
|
except Exception as e:
|
||||||
|
print("Was not able to save the URL " + url + " using singlefile. The reported error was " + e.strerror)
|
|
@ -0,0 +1,79 @@
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from stuff.singlefile import download_page
|
||||||
|
from stuff.const import MAX_FOLDER_NAME_SIZE
|
||||||
|
from stuff.download import download_file
|
||||||
|
from stuff.get_canvas import get_extra_assignment_files
|
||||||
|
from stuff.helpers import make_valid_filename, shorten_file_name
|
||||||
|
|
||||||
|
|
||||||
|
def download_module_item(module, item, modules_dir, cookies_path):
|
||||||
|
# If problems arise due to long pathnames, changing module.name to module.id might help, this can also be done with item.title
|
||||||
|
# A change would also have to be made in findCourseModules(course, course_view)
|
||||||
|
module_name = make_valid_filename(str(module.name))
|
||||||
|
module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
|
||||||
|
items_dir = os.path.join(modules_dir, module_name)
|
||||||
|
|
||||||
|
if item.url != "":
|
||||||
|
if not os.path.exists(items_dir):
|
||||||
|
os.makedirs(items_dir)
|
||||||
|
|
||||||
|
filename = make_valid_filename(str(item.title)) + ".html"
|
||||||
|
module_item_dir = os.path.join(items_dir, filename)
|
||||||
|
|
||||||
|
# Download the module page.
|
||||||
|
if not os.path.exists(module_item_dir):
|
||||||
|
download_page(item.url, cookies_path, items_dir, filename)
|
||||||
|
|
||||||
|
|
||||||
|
def download_assignment(cookies_path, cookie_jar, base_assign_dir, assignment):
|
||||||
|
assignment_title = make_valid_filename(str(assignment.title))
|
||||||
|
assignment_title = shorten_file_name(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
|
||||||
|
assign_dir = os.path.join(base_assign_dir, assignment_title)
|
||||||
|
|
||||||
|
if assignment.html_url != "":
|
||||||
|
if not os.path.exists(assign_dir):
|
||||||
|
os.makedirs(assign_dir)
|
||||||
|
|
||||||
|
assignment_page_path = os.path.join(assign_dir, "assignment.html")
|
||||||
|
|
||||||
|
if not os.path.exists(assignment_page_path):
|
||||||
|
download_page(assignment.html_url, cookies_path, assign_dir, "assignment.html")
|
||||||
|
|
||||||
|
extra_files = get_extra_assignment_files(assignment.description, cookie_jar)
|
||||||
|
for name, url in extra_files:
|
||||||
|
download_file(url, Path(assign_dir, name), cookie_jar)
|
||||||
|
|
||||||
|
for submission in assignment.submissions:
|
||||||
|
download_submission(assignment, submission, assign_dir, cookies_path)
|
||||||
|
|
||||||
|
|
||||||
|
def download_submission(assignment, submission, assign_dir, cookies_path):
|
||||||
|
submission_dir = assign_dir
|
||||||
|
|
||||||
|
if len(assignment.submissions) != 1:
|
||||||
|
submission_dir = os.path.join(assign_dir, str(submission.user_id))
|
||||||
|
|
||||||
|
if submission.preview_url != "":
|
||||||
|
if not os.path.exists(submission_dir):
|
||||||
|
os.makedirs(submission_dir)
|
||||||
|
|
||||||
|
submission_page_dir = os.path.join(submission_dir, "submission.html")
|
||||||
|
|
||||||
|
if not os.path.exists(submission_page_dir):
|
||||||
|
download_page(submission.preview_url, cookies_path, submission_dir, "submission.html")
|
||||||
|
|
||||||
|
if (submission.attempt != 1 and assignment.updated_url != "" and assignment.html_url != ""
|
||||||
|
and assignment.html_url.rstrip("/") != assignment.updated_url.rstrip("/")):
|
||||||
|
submission_dir = os.path.join(assign_dir, "attempts")
|
||||||
|
|
||||||
|
if not os.path.exists(submission_dir):
|
||||||
|
os.makedirs(submission_dir)
|
||||||
|
|
||||||
|
for i in range(submission.attempt):
|
||||||
|
filename = "attempt_" + str(i + 1) + ".html"
|
||||||
|
submission_page_attempt_dir = os.path.join(submission_dir, filename)
|
||||||
|
|
||||||
|
if not os.path.exists(submission_page_attempt_dir):
|
||||||
|
download_page(assignment.updated_url + "/history?version=" + str(i + 1), cookies_path, submission_dir, filename)
|
|
@ -0,0 +1,11 @@
|
||||||
|
from http.cookiejar import MozillaCookieJar
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
s = requests.Session()
|
||||||
|
cookies = MozillaCookieJar('cookies-canvas-uccs-edu.txt')
|
||||||
|
cookies.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
for cookie in cookies:
|
||||||
|
s.cookies.set(cookie.name, cookie.value)
|
||||||
|
r = s.get('https://canvas.uccs.edu/api/v1/courses/146797/files/8232290')
|
||||||
|
print(r.text)
|
Loading…
Reference in New Issue