fix path issues, clean up, try to exclude already downloaded pages
This commit is contained in:
parent
fe96f19eca
commit
c1988263d6
|
@ -1,6 +1,7 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from http.cookiejar import MozillaCookieJar
|
from http.cookiejar import MozillaCookieJar
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import jsonpickle
|
import jsonpickle
|
||||||
import yaml
|
import yaml
|
||||||
|
@ -17,7 +18,7 @@ with open("credentials.yaml", 'r') as f:
|
||||||
API_URL = credentials["API_URL"]
|
API_URL = credentials["API_URL"]
|
||||||
API_KEY = credentials["API_KEY"]
|
API_KEY = credentials["API_KEY"]
|
||||||
USER_ID = credentials["USER_ID"]
|
USER_ID = credentials["USER_ID"]
|
||||||
COOKIES_PATH = credentials["COOKIES_PATH"]
|
COOKIES_PATH = str(Path(credentials["COOKIES_PATH"]).resolve().expanduser().absolute())
|
||||||
COOKIE_JAR = MozillaCookieJar(COOKIES_PATH)
|
COOKIE_JAR = MozillaCookieJar(COOKIES_PATH)
|
||||||
COOKIE_JAR.load(ignore_discard=True, ignore_expires=True)
|
COOKIE_JAR.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,7 @@ def download_course_files(course, course_view):
|
||||||
try:
|
try:
|
||||||
files = list(course.get_files())
|
files = list(course.get_files())
|
||||||
except canvasapi.exceptions.Forbidden:
|
except canvasapi.exceptions.Forbidden:
|
||||||
print('Files view disabled for this course.')
|
print('Files view is disabled for this course.')
|
||||||
return
|
return
|
||||||
|
|
||||||
for file in tqdm(files, desc='Downloading Files'):
|
for file in tqdm(files, desc='Downloading Files'):
|
||||||
|
|
|
@ -6,6 +6,9 @@ def make_valid_filename(input_str):
|
||||||
if not input_str:
|
if not input_str:
|
||||||
return input_str
|
return input_str
|
||||||
|
|
||||||
|
# Make sure we have a string and not PosixPath
|
||||||
|
input_str = str(input_str)
|
||||||
|
|
||||||
# Remove invalid characters
|
# Remove invalid characters
|
||||||
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
|
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
|
||||||
input_str = input_str.replace("+", " ") # Canvas default for spaces
|
input_str = input_str.replace("+", " ") # Canvas default for spaces
|
||||||
|
@ -23,6 +26,7 @@ def make_valid_filename(input_str):
|
||||||
|
|
||||||
|
|
||||||
def make_valid_folder_path(input_str):
|
def make_valid_folder_path(input_str):
|
||||||
|
input_str = str(input_str)
|
||||||
# Remove invalid characters
|
# Remove invalid characters
|
||||||
valid_chars = "-_.()/ %s%s" % (string.ascii_letters, string.digits)
|
valid_chars = "-_.()/ %s%s" % (string.ascii_letters, string.digits)
|
||||||
input_str = input_str.replace("+", " ") # Canvas default for spaces
|
input_str = input_str.replace("+", " ") # Canvas default for spaces
|
||||||
|
@ -41,14 +45,15 @@ def make_valid_folder_path(input_str):
|
||||||
return input_str
|
return input_str
|
||||||
|
|
||||||
|
|
||||||
def shorten_file_name(string, shorten_by) -> str:
|
def shorten_file_name(input_string, shorten_by) -> str:
|
||||||
if not string or shorten_by <= 0:
|
if not input_string or shorten_by <= 0:
|
||||||
return string
|
return input_string
|
||||||
|
input_string = str(input_string)
|
||||||
|
|
||||||
# Shorten string by specified value + 1 for "-" to indicate incomplete file name (trailing periods not allowed)
|
# Shorten string by specified value + 1 for "-" to indicate incomplete file name (trailing periods not allowed)
|
||||||
string = string[:len(string) - (shorten_by + 1)]
|
input_string = input_string[:len(input_string) - (shorten_by + 1)]
|
||||||
|
|
||||||
string = string.rstrip().rstrip(".").rstrip("-")
|
input_string = input_string.rstrip().rstrip(".").rstrip("-")
|
||||||
string += "-"
|
input_string += "-"
|
||||||
|
|
||||||
return string
|
return input_string
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
from pathlib import Path
|
||||||
from subprocess import run
|
from subprocess import run
|
||||||
|
|
||||||
SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file"
|
SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file"
|
||||||
|
@ -5,12 +6,16 @@ CHROME_PATH = "/usr/bin/chromium-browser"
|
||||||
|
|
||||||
|
|
||||||
def add_quotes(s):
|
def add_quotes(s):
|
||||||
return "\"" + s.strip("\"") + "\""
|
return "\"" + str(s).strip("\"") + "\""
|
||||||
|
|
||||||
|
|
||||||
def download_page(url, cookies_path, output_path, output_name_template=""):
|
def download_page(url, cookies_path, output_path, output_name_template=""):
|
||||||
# TODO: we can probably safely exclude pages that match the regex r'/external_tools/retrieve\?'
|
# TODO: we can probably safely exclude pages that match the regex r'/external_tools/retrieve\?'
|
||||||
|
|
||||||
|
if output_name_template and Path(output_path, output_name_template).exists():
|
||||||
|
print('exists')
|
||||||
|
return
|
||||||
|
|
||||||
args = [
|
args = [
|
||||||
add_quotes(SINGLEFILE_BINARY_PATH),
|
add_quotes(SINGLEFILE_BINARY_PATH),
|
||||||
"--browser-executable-path=" + add_quotes(CHROME_PATH.strip("\"")),
|
"--browser-executable-path=" + add_quotes(CHROME_PATH.strip("\"")),
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
requests
|
PyYAML==6.0.1
|
||||||
jsonpickle
|
beautifulsoup4==4.12.2
|
||||||
canvasapi
|
canvasapi==3.2.0
|
||||||
python-dateutil
|
jsonpickle==3.0.2
|
||||||
PyYAML
|
requests==2.31.0
|
||||||
tqdm
|
tqdm==4.66.1
|
||||||
bs4
|
python-dateutil==-2.8.2
|
Loading…
Reference in New Issue