From c1988263d65042f23148cfc1bc972e3f82c72aeb Mon Sep 17 00:00:00 2001 From: Cyberes Date: Fri, 27 Oct 2023 18:59:58 -0600 Subject: [PATCH] fix path issues, clean up, try to exclude already downloaded pages --- export.py | 3 ++- module/download_canvas.py | 2 +- module/helpers.py | 19 ++++++++++++------- module/singlefile.py | 7 ++++++- requirements.txt | 14 +++++++------- 5 files changed, 28 insertions(+), 17 deletions(-) diff --git a/export.py b/export.py index b96919a..f765760 100644 --- a/export.py +++ b/export.py @@ -1,6 +1,7 @@ import json import os from http.cookiejar import MozillaCookieJar +from pathlib import Path import jsonpickle import yaml @@ -17,7 +18,7 @@ with open("credentials.yaml", 'r') as f: API_URL = credentials["API_URL"] API_KEY = credentials["API_KEY"] USER_ID = credentials["USER_ID"] -COOKIES_PATH = credentials["COOKIES_PATH"] +COOKIES_PATH = str(Path(credentials["COOKIES_PATH"]).resolve().expanduser().absolute()) COOKIE_JAR = MozillaCookieJar(COOKIES_PATH) COOKIE_JAR.load(ignore_discard=True, ignore_expires=True) diff --git a/module/download_canvas.py b/module/download_canvas.py index 13b53ab..5a00c8b 100644 --- a/module/download_canvas.py +++ b/module/download_canvas.py @@ -24,7 +24,7 @@ def download_course_files(course, course_view): try: files = list(course.get_files()) except canvasapi.exceptions.Forbidden: - print('Files view disabled for this course.') + print('Files view is disabled for this course.') return for file in tqdm(files, desc='Downloading Files'): diff --git a/module/helpers.py b/module/helpers.py index 1508b7d..27844ae 100644 --- a/module/helpers.py +++ b/module/helpers.py @@ -6,6 +6,9 @@ def make_valid_filename(input_str): if not input_str: return input_str + # Make sure we have a string and not PosixPath + input_str = str(input_str) + # Remove invalid characters valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) input_str = input_str.replace("+", " ") # Canvas default for spaces @@ -23,6 +26,7 @@ def make_valid_filename(input_str): def make_valid_folder_path(input_str): + input_str = str(input_str) # Remove invalid characters valid_chars = "-_.()/ %s%s" % (string.ascii_letters, string.digits) input_str = input_str.replace("+", " ") # Canvas default for spaces @@ -41,14 +45,15 @@ def make_valid_folder_path(input_str): return input_str -def shorten_file_name(string, shorten_by) -> str: - if not string or shorten_by <= 0: - return string +def shorten_file_name(input_string, shorten_by) -> str: + if not input_string or shorten_by <= 0: + return input_string + input_string = str(input_string) # Shorten string by specified value + 1 for "-" to indicate incomplete file name (trailing periods not allowed) - string = string[:len(string) - (shorten_by + 1)] + input_string = input_string[:len(input_string) - (shorten_by + 1)] - string = string.rstrip().rstrip(".").rstrip("-") - string += "-" + input_string = input_string.rstrip().rstrip(".").rstrip("-") + input_string += "-" - return string + return input_string diff --git a/module/singlefile.py b/module/singlefile.py index e4e6084..a1a2f42 100644 --- a/module/singlefile.py +++ b/module/singlefile.py @@ -1,3 +1,4 @@ +from pathlib import Path from subprocess import run SINGLEFILE_BINARY_PATH = "./node_modules/single-file/cli/single-file" @@ -5,12 +6,16 @@ CHROME_PATH = "/usr/bin/chromium-browser" def add_quotes(s): - return "\"" + s.strip("\"") + "\"" + return "\"" + str(s).strip("\"") + "\"" def download_page(url, cookies_path, output_path, output_name_template=""): # TODO: we can probably safely exclude pages that match the regex r'/external_tools/retrieve\?' + if output_name_template and Path(output_path, output_name_template).exists(): + print('exists') + return + args = [ add_quotes(SINGLEFILE_BINARY_PATH), "--browser-executable-path=" + add_quotes(CHROME_PATH.strip("\"")), diff --git a/requirements.txt b/requirements.txt index e282d92..36751f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -requests -jsonpickle -canvasapi -python-dateutil -PyYAML -tqdm -bs4 \ No newline at end of file +PyYAML==6.0.1 +beautifulsoup4==4.12.2 +canvasapi==3.2.0 +jsonpickle==3.0.2 +requests==2.31.0 +tqdm==4.66.1 +python-dateutil==-2.8.2 \ No newline at end of file