download user files, clean up code
This commit is contained in:
parent
76b2b6604e
commit
a27aa18f40
15
README.md
15
README.md
|
@ -2,12 +2,19 @@
|
||||||
|
|
||||||
Forked from https://github.com/davekats/canvas-student-data-export
|
Forked from https://github.com/davekats/canvas-student-data-export
|
||||||
|
|
||||||
Major rewrite and improvement.
|
Major changes:
|
||||||
|
|
||||||
|
- Reorganized the project structure.
|
||||||
|
- Refactored the code to make it more Pythonic.
|
||||||
|
- Added progress bars.
|
||||||
|
- Use threading where possible.
|
||||||
|
- Save assignment attachements.
|
||||||
|
- Download all user files (as seen in the file manager at `/files` on your Canvas platform.)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
The Canvas Student Data Export Tool can export nearly all of a student's data from Instructure Canvas Learning Management System (Canvas LMS).
|
The Canvas Student Data Export Tool can export nearly all of a student's data from Instructure Canvas Learning Management System (Canvas LMS).
|
||||||
This is useful when you are graduating or leaving your college or university, and would like to have a backup of all the data you had in canvas.
|
This is useful when you are graduating or leaving your college or university, and would like to have a backup of all the data you had in canvas. Also, some instructors disable the built-in export tool.
|
||||||
|
|
||||||
The tool exports all of the following data for each course:
|
The tool exports all of the following data for each course:
|
||||||
|
|
||||||
|
@ -28,6 +35,8 @@ pip install -r requirements.txt
|
||||||
npm install
|
npm install
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Make sure you have Chomium or Chrome installed. Currently, the executable path is hardcoded to `/usr/bin/chromium-browser` in `module/singlefile.py`. If you are not on Linux or do not use Chromium, you will need to change the path.
|
||||||
|
|
||||||
## Run
|
## Run
|
||||||
|
|
||||||
1. Get your Canvas API key by going to Canvas and navigating to `Account` > `Settings` > `Approved Integrations` > `New Access Token`
|
1. Get your Canvas API key by going to Canvas and navigating to `Account` > `Settings` > `Approved Integrations` > `New Access Token`
|
||||||
|
@ -52,3 +61,5 @@ Now, run the program:
|
||||||
```shell
|
```shell
|
||||||
python export.py
|
python export.py
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The folder `./output` will be created and your data downloaded to this path.
|
||||||
|
|
23
export.py
23
export.py
|
@ -10,6 +10,7 @@ from module.const import COURSES_TO_SKIP, DL_LOCATION
|
||||||
from module.download_canvas import download_assignment_pages, download_course_announcement_pages, download_course_discussion_pages, download_course_files, download_course_home_page_html, download_course_html, download_course_module_pages, download_submission_attachments
|
from module.download_canvas import download_assignment_pages, download_course_announcement_pages, download_course_discussion_pages, download_course_files, download_course_home_page_html, download_course_html, download_course_module_pages, download_submission_attachments
|
||||||
from module.get_canvas import find_course_announcements, find_course_assignments, find_course_discussions, find_course_modules, find_course_pages
|
from module.get_canvas import find_course_announcements, find_course_assignments, find_course_discussions, find_course_modules, find_course_pages
|
||||||
from module.items import CourseView
|
from module.items import CourseView
|
||||||
|
from module.user_files import download_user_files
|
||||||
|
|
||||||
with open("credentials.yaml", 'r') as f:
|
with open("credentials.yaml", 'r') as f:
|
||||||
credentials = yaml.full_load(f)
|
credentials = yaml.full_load(f)
|
||||||
|
@ -37,13 +38,18 @@ if __name__ == "__main__":
|
||||||
if not os.path.exists(DL_LOCATION):
|
if not os.path.exists(DL_LOCATION):
|
||||||
os.makedirs(DL_LOCATION)
|
os.makedirs(DL_LOCATION)
|
||||||
|
|
||||||
print("\nConnecting to Canvas...")
|
print("Connecting to Canvas...")
|
||||||
canvas = Canvas(API_URL, API_KEY)
|
canvas = Canvas(API_URL, API_KEY)
|
||||||
|
|
||||||
|
print('\nDownloading user files...')
|
||||||
|
download_user_files(canvas, DL_LOCATION / 'User Files')
|
||||||
|
print('')
|
||||||
|
|
||||||
all_courses_views = []
|
all_courses_views = []
|
||||||
|
|
||||||
print("Getting list of all courses...")
|
print("Getting list of all courses...")
|
||||||
courses = canvas.get_courses(include="term")
|
courses = canvas.get_courses(include="term")
|
||||||
|
course_count = len(list(courses))
|
||||||
|
|
||||||
skip = set(COURSES_TO_SKIP)
|
skip = set(COURSES_TO_SKIP)
|
||||||
|
|
||||||
|
@ -59,12 +65,17 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
course_view = CourseView(course)
|
course_view = CourseView(course)
|
||||||
print(f"=== {course_view.term}: {course_view.name} ===")
|
print(f"=== {course_view.term}: {course_view.name} ===")
|
||||||
|
|
||||||
|
valid, r = course_view.test_course(API_URL, COOKIE_JAR)
|
||||||
|
if not valid:
|
||||||
|
print(f'Invalid course: {course_view.course_id} - {r}')
|
||||||
|
continue
|
||||||
|
|
||||||
course_view.assignments = find_course_assignments(course, USER_ID)
|
course_view.assignments = find_course_assignments(course, USER_ID)
|
||||||
course_view.announcements = find_course_announcements(course)
|
course_view.announcements = find_course_announcements(course)
|
||||||
course_view.discussions = find_course_discussions(course)
|
course_view.discussions = find_course_discussions(course)
|
||||||
course_view.pages = find_course_pages(course)
|
course_view.pages = find_course_pages(course)
|
||||||
course_view.modules = find_course_modules(course, course_view)
|
course_view.modules = find_course_modules(course, course_view)
|
||||||
|
|
||||||
all_courses_views.append(course_view)
|
all_courses_views.append(course_view)
|
||||||
|
|
||||||
download_course_files(course, course_view)
|
download_course_files(course, course_view)
|
||||||
|
@ -85,17 +96,13 @@ if __name__ == "__main__":
|
||||||
print("Exporting all course data...")
|
print("Exporting all course data...")
|
||||||
export_all_course_data(course_view)
|
export_all_course_data(course_view)
|
||||||
|
|
||||||
if len(courses) > 1:
|
if course_count > 1:
|
||||||
print('')
|
print('')
|
||||||
|
|
||||||
print("Exporting data from all courses combined as all_output.json")
|
# Remove elemnts from the course objects that can't be JSON serialized, then format it.
|
||||||
|
|
||||||
# Awful hack to make the JSON pretty. Decode it with Python stdlib json
|
|
||||||
# module then re-encode with indentation
|
|
||||||
json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable=False)), indent=4)
|
json_str = json.dumps(json.loads(jsonpickle.encode(all_courses_views, unpicklable=False)), indent=4)
|
||||||
|
|
||||||
all_output_path = os.path.join(DL_LOCATION, "all_output.json")
|
all_output_path = os.path.join(DL_LOCATION, "all_output.json")
|
||||||
|
|
||||||
with open(all_output_path, "w") as out_file:
|
with open(all_output_path, "w") as out_file:
|
||||||
out_file.write(json_str)
|
out_file.write(json_str)
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
# Directory in which to download course information to (will be created if not
|
from pathlib import Path
|
||||||
# present)
|
|
||||||
DL_LOCATION = "./output"
|
# Directory in which to download course information to (will be created if not present)
|
||||||
|
DL_LOCATION = Path("./output").resolve().expanduser().absolute()
|
||||||
|
|
||||||
# List of Course IDs that should be skipped (need to be integers)
|
# List of Course IDs that should be skipped (need to be integers)
|
||||||
COURSES_TO_SKIP = [288290, 512033]
|
COURSES_TO_SKIP = [288290, 512033]
|
||||||
|
|
||||||
|
@ -9,4 +11,4 @@ DATE_TEMPLATE = "%B %d, %Y %I:%M %p"
|
||||||
# Max PATH length is 260 characters on Windows. 70 is just an estimate for a reasonable max folder name to prevent the chance of reaching the limit
|
# Max PATH length is 260 characters on Windows. 70 is just an estimate for a reasonable max folder name to prevent the chance of reaching the limit
|
||||||
# Applies to modules, assignments, announcements, and discussions
|
# Applies to modules, assignments, announcements, and discussions
|
||||||
# If a folder exceeds this limit, a "-" will be added to the end to indicate it was shortened ("..." not valid)
|
# If a folder exceeds this limit, a "-" will be added to the end to indicate it was shortened ("..." not valid)
|
||||||
MAX_FOLDER_NAME_SIZE = 70
|
MAX_FOLDER_NAME_SIZE = 70
|
||||||
|
|
|
@ -3,12 +3,13 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from http.cookiejar import MozillaCookieJar
|
from http.cookiejar import MozillaCookieJar
|
||||||
|
|
||||||
|
import canvasapi
|
||||||
import requests
|
import requests
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from module.singlefile import download_page
|
|
||||||
from module.const import DL_LOCATION, MAX_FOLDER_NAME_SIZE
|
from module.const import DL_LOCATION, MAX_FOLDER_NAME_SIZE
|
||||||
from module.helpers import make_valid_filename, make_valid_folder_path, shorten_file_name
|
from module.helpers import make_valid_filename, make_valid_folder_path, shorten_file_name
|
||||||
|
from module.singlefile import download_page
|
||||||
from module.threading import download_assignment, download_module_item
|
from module.threading import download_assignment, download_module_item
|
||||||
|
|
||||||
|
|
||||||
|
@ -22,8 +23,12 @@ def download_course_files(course, course_view):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
files = list(course.get_files())
|
files = list(course.get_files())
|
||||||
|
except canvasapi.exceptions.Forbidden:
|
||||||
|
print('Files view disabled for this course.')
|
||||||
|
return
|
||||||
|
|
||||||
for file in tqdm(files, desc='Downloading Files'):
|
for file in tqdm(files, desc='Downloading Files'):
|
||||||
|
try:
|
||||||
file_folder = course.get_folder(file.folder_id)
|
file_folder = course.get_folder(file.folder_id)
|
||||||
|
|
||||||
folder_dl_dir = os.path.join(dl_dir, make_valid_folder_path(file_folder.full_name))
|
folder_dl_dir = os.path.join(dl_dir, make_valid_folder_path(file_folder.full_name))
|
||||||
|
@ -35,10 +40,10 @@ def download_course_files(course, course_view):
|
||||||
|
|
||||||
# Download file if it doesn't already exist
|
# Download file if it doesn't already exist
|
||||||
if not os.path.exists(dl_path):
|
if not os.path.exists(dl_path):
|
||||||
print('Downloading: {}'.format(dl_path))
|
# print('Downloading: {}'.format(dl_path))
|
||||||
file.download(dl_path)
|
file.download(dl_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
tqdm.write(f"Skipping file download that gave the following error: {e}")
|
tqdm.write(f"Skipping {file.display_name} - {e}")
|
||||||
|
|
||||||
|
|
||||||
def download_course_discussion_pages(api_url, course_view, cookies_path):
|
def download_course_discussion_pages(api_url, course_view, cookies_path):
|
||||||
|
@ -55,7 +60,7 @@ def download_course_discussion_pages(api_url, course_view, cookies_path):
|
||||||
if not os.path.exists(discussion_list_dir):
|
if not os.path.exists(discussion_list_dir):
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html")
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/discussion_topics/", cookies_path, base_discussion_dir, "discussion_list.html")
|
||||||
|
|
||||||
for discussion in tqdm(list(course_view.discussions), desc='Downloading Discussion Pages'):
|
for discussion in tqdm(list(course_view.discussions), desc='Downloading Discussions'):
|
||||||
discussion_title = make_valid_filename(str(discussion.title))
|
discussion_title = make_valid_filename(str(discussion.title))
|
||||||
discussion_title = shorten_file_name(discussion_title, len(discussion_title) - MAX_FOLDER_NAME_SIZE)
|
discussion_title = shorten_file_name(discussion_title, len(discussion_title) - MAX_FOLDER_NAME_SIZE)
|
||||||
discussion_dir = os.path.join(base_discussion_dir, discussion_title)
|
discussion_dir = os.path.join(base_discussion_dir, discussion_title)
|
||||||
|
@ -90,65 +95,6 @@ def download_assignment_pages(api_url, course_view, cookies_path, cookie_jar: Mo
|
||||||
if not os.path.exists(assignment_list_path):
|
if not os.path.exists(assignment_list_path):
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html")
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/assignments/", cookies_path, base_assign_dir, "assignment_list.html")
|
||||||
|
|
||||||
# for assignment in tqdm(course_view.assignments, desc='Downloading Assignments'):
|
|
||||||
# assignment_title = make_valid_filename(str(assignment.title))
|
|
||||||
# assignment_title = shorten_file_name(assignment_title, len(assignment_title) - MAX_FOLDER_NAME_SIZE)
|
|
||||||
# assign_dir = os.path.join(base_assign_dir, assignment_title)
|
|
||||||
#
|
|
||||||
# # Download an html image of each assignment (includes assignment instructions and other stuff).
|
|
||||||
# # Currently, this will only download the main assignment page and not external pages, this is
|
|
||||||
# # because these external pages are given in a json format. Saving these would require a lot
|
|
||||||
# # more work then normal.
|
|
||||||
# if assignment.html_url != "":
|
|
||||||
# if not os.path.exists(assign_dir):
|
|
||||||
# os.makedirs(assign_dir)
|
|
||||||
#
|
|
||||||
# assignment_page_path = os.path.join(assign_dir, "assignment.html")
|
|
||||||
#
|
|
||||||
# # Download assignment page, this usually has instructions and etc.
|
|
||||||
# if not os.path.exists(assignment_page_path):
|
|
||||||
# download_page(assignment.html_url, cookies_path, assign_dir, "assignment.html")
|
|
||||||
#
|
|
||||||
# extra_files = get_extra_assignment_files(assignment.description, cookie_jar)
|
|
||||||
# if extra_files: # in an if statement so that we only show the bar when there's things to do.
|
|
||||||
# for name, url in tqdm(extra_files, desc='Downloading Additional Files', leave=False):
|
|
||||||
# download_file(url, Path(assign_dir, name), cookie_jar)
|
|
||||||
#
|
|
||||||
# for submission in assignment.submissions:
|
|
||||||
# submission_dir = assign_dir
|
|
||||||
#
|
|
||||||
# # If theres more then 1 submission, add unique id to download dir
|
|
||||||
# if len(assignment.submissions) != 1:
|
|
||||||
# submission_dir = os.path.join(assign_dir, str(submission.user_id))
|
|
||||||
#
|
|
||||||
# if submission.preview_url != "":
|
|
||||||
# if not os.path.exists(submission_dir):
|
|
||||||
# os.makedirs(submission_dir)
|
|
||||||
#
|
|
||||||
# submission_page_dir = os.path.join(submission_dir, "submission.html")
|
|
||||||
#
|
|
||||||
# # Download submission url, this is typically a more focused page
|
|
||||||
# if not os.path.exists(submission_page_dir):
|
|
||||||
# download_page(submission.preview_url, cookies_path, submission_dir, "submission.html")
|
|
||||||
#
|
|
||||||
# # If theres more then 1 attempt, save each attempt in attempts folder
|
|
||||||
# if (submission.attempt != 1 and assignment.updated_url != "" and assignment.html_url != ""
|
|
||||||
# and assignment.html_url.rstrip("/") != assignment.updated_url.rstrip("/")):
|
|
||||||
# submission_dir = os.path.join(assign_dir, "attempts")
|
|
||||||
#
|
|
||||||
# if not os.path.exists(submission_dir):
|
|
||||||
# os.makedirs(submission_dir)
|
|
||||||
#
|
|
||||||
# # Saves the attempts if multiple were taken, doesn't account for
|
|
||||||
# # different ID's however, as I wasnt able to find out what the url
|
|
||||||
# # for the specific id's attempts would be.
|
|
||||||
# for i in range(submission.attempt):
|
|
||||||
# filename = "attempt_" + str(i + 1) + ".html"
|
|
||||||
# submission_page_attempt_dir = os.path.join(submission_dir, filename)
|
|
||||||
#
|
|
||||||
# if not os.path.exists(submission_page_attempt_dir):
|
|
||||||
# download_page(assignment.updated_url + "/history?version=" + str(i + 1), cookies_path, submission_dir, filename)
|
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||||
download_func = partial(download_assignment, cookies_path, cookie_jar, base_assign_dir)
|
download_func = partial(download_assignment, cookies_path, cookie_jar, base_assign_dir)
|
||||||
list(tqdm(executor.map(download_func, course_view.assignments), total=len(course_view.assignments), desc='Downloading Assignments'))
|
list(tqdm(executor.map(download_func, course_view.assignments), total=len(course_view.assignments), desc='Downloading Assignments'))
|
||||||
|
@ -267,30 +213,6 @@ def download_course_module_pages(api_url, course_view, cookies_path):
|
||||||
if not os.path.exists(module_list_dir):
|
if not os.path.exists(module_list_dir):
|
||||||
download_page(api_url + "/courses/" + str(course_view.course_id) + "/modules/", cookies_path, modules_dir, "modules_list.html")
|
download_page(api_url + "/courses/" + str(course_view.course_id) + "/modules/", cookies_path, modules_dir, "modules_list.html")
|
||||||
|
|
||||||
# for module in tqdm(list(course_view.modules), desc='Downloading Module Pages'):
|
|
||||||
# bar = tqdm(list(module.items), leave=False, desc=module.name)
|
|
||||||
# for item in module.items:
|
|
||||||
# # bar.set_postfix({'title': item.title})
|
|
||||||
#
|
|
||||||
# # If problems arise due to long pathnames, changing module.name to module.id might help, this can also be done with item.title
|
|
||||||
# # A change would also have to be made in findCourseModules(course, course_view)
|
|
||||||
# module_name = make_valid_filename(str(module.name))
|
|
||||||
# module_name = shorten_file_name(module_name, len(module_name) - MAX_FOLDER_NAME_SIZE)
|
|
||||||
# items_dir = os.path.join(modules_dir, module_name)
|
|
||||||
#
|
|
||||||
# if item.url != "":
|
|
||||||
# if not os.path.exists(items_dir):
|
|
||||||
# os.makedirs(items_dir)
|
|
||||||
#
|
|
||||||
# filename = make_valid_filename(str(item.title)) + ".html"
|
|
||||||
# module_item_dir = os.path.join(items_dir, filename)
|
|
||||||
#
|
|
||||||
# # Download the module page.
|
|
||||||
# if not os.path.exists(module_item_dir):
|
|
||||||
# download_page(item.url, cookies_path, items_dir, filename)
|
|
||||||
# bar.update()
|
|
||||||
# bar.close()
|
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||||
for module in tqdm(list(course_view.modules), desc='Downloading Module Pages'):
|
for module in tqdm(list(course_view.modules), desc='Downloading Module Pages'):
|
||||||
bar = tqdm(list(module.items), leave=False, desc=module.name)
|
bar = tqdm(list(module.items), leave=False, desc=module.name)
|
||||||
|
|
|
@ -23,7 +23,7 @@ def find_course_modules(course, course_view):
|
||||||
try:
|
try:
|
||||||
modules = list(course.get_modules())
|
modules = list(course.get_modules())
|
||||||
|
|
||||||
for module in tqdm(modules, desc='Fetching Modules and Downloading Files'):
|
for module in tqdm(modules, desc='Downloading Module Files'):
|
||||||
module_view = ModuleView()
|
module_view = ModuleView()
|
||||||
module_view.id = module.id if hasattr(module, "id") else ""
|
module_view.id = module.id if hasattr(module, "id") else ""
|
||||||
module_view.name = str(module.name) if hasattr(module, "name") else ""
|
module_view.name = str(module.name) if hasattr(module, "name") else ""
|
||||||
|
@ -62,7 +62,7 @@ def find_course_modules(course, course_view):
|
||||||
if not os.path.exists(module_file_path):
|
if not os.path.exists(module_file_path):
|
||||||
module_file.download(module_file_path)
|
module_file.download(module_file_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
tqdm.write(f"Skipping module file download that gave the following error: {e}")
|
tqdm.write(f"Skipping module file download that gave the following error: {e} - {module_item}")
|
||||||
|
|
||||||
module_view.items.append(module_item_view)
|
module_view.items.append(module_item_view)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -88,9 +88,8 @@ def get_extra_assignment_files(html, cookie_jar: MozillaCookieJar):
|
||||||
extra_files = []
|
extra_files = []
|
||||||
for item in urls:
|
for item in urls:
|
||||||
r = s.get(item)
|
r = s.get(item)
|
||||||
if r.status_code == 404:
|
if r.status_code != 200:
|
||||||
continue
|
continue
|
||||||
r.raise_for_status()
|
|
||||||
j = r.json()
|
j = r.json()
|
||||||
extra_files.append((j['display_name'], j['url']))
|
extra_files.append((j['display_name'], j['url']))
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
from http.cookiejar import MozillaCookieJar
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
from module.helpers import make_valid_filename
|
from module.helpers import make_valid_filename
|
||||||
|
|
||||||
|
|
||||||
|
@ -104,3 +108,15 @@ class CourseView:
|
||||||
self.announcements = []
|
self.announcements = []
|
||||||
self.discussions = []
|
self.discussions = []
|
||||||
self.modules = []
|
self.modules = []
|
||||||
|
|
||||||
|
def test_course(self, base_url: str, cookie_jar: MozillaCookieJar):
|
||||||
|
s = requests.Session()
|
||||||
|
for cookie in cookie_jar:
|
||||||
|
s.cookies.set(cookie.name, cookie.value)
|
||||||
|
try:
|
||||||
|
r = s.get(f'{base_url}/api/v1/courses/{self.course_id}')
|
||||||
|
if not r.status_code == 200:
|
||||||
|
return False, r
|
||||||
|
return True, r
|
||||||
|
except Exception as e:
|
||||||
|
return False, e
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import canvasapi
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from module.helpers import make_valid_folder_path
|
||||||
|
|
||||||
|
|
||||||
|
def do_download(task):
|
||||||
|
task[1].parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
task[0].download(task[1])
|
||||||
|
|
||||||
|
|
||||||
|
def download_user_files(canvas: canvasapi.Canvas, base_path: str):
|
||||||
|
base_path = Path(base_path)
|
||||||
|
user = canvas.get_current_user()
|
||||||
|
folders = []
|
||||||
|
for folder in user.get_folders():
|
||||||
|
n = folder.full_name.lstrip('my files/')
|
||||||
|
if n:
|
||||||
|
c_n = make_valid_folder_path(n)
|
||||||
|
folders.append((folder, c_n))
|
||||||
|
|
||||||
|
files = []
|
||||||
|
for folder, folder_name in tqdm(folders, desc='Fetching User Files'):
|
||||||
|
for file in folder.get_files():
|
||||||
|
out_path = base_path / folder_name / file.display_name
|
||||||
|
files.append((file, out_path))
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
|
bar = tqdm(files, desc='Downloading User Files')
|
||||||
|
futures = [executor.submit(do_download, task) for task in files]
|
||||||
|
for future in as_completed(futures):
|
||||||
|
bar.update()
|
||||||
|
|
||||||
|
# for file, out_path in tqdm(files, desc='Downloading User Files'):
|
||||||
|
# if not out_path.exists():
|
||||||
|
# file.download(out_path)
|
Loading…
Reference in New Issue