diff --git a/downloader.py b/downloader.py index ff12e52..528652b 100755 --- a/downloader.py +++ b/downloader.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 import argparse -import automated_youtube_dl.yt_dlp as ydl import logging.config import math import os @@ -8,11 +7,14 @@ import re import subprocess import sys import time -from automated_youtube_dl.files import create_directories, resolve_path from multiprocessing import Manager, Pool, cpu_count + +from tqdm.auto import tqdm + +import automated_youtube_dl.yt_dlp as ydl +from automated_youtube_dl.files import create_directories, resolve_path from process.funcs import restart_program, setup_file_logger from process.threads import download_video -from tqdm.auto import tqdm urlRegex = re.compile( r'^(?:http|ftp)s?://' # http:// or https:// @@ -181,29 +183,47 @@ for i, target_url in tqdm(enumerate(url_list), total=len(url_list), position=0, playlist_bar.total = len(playlist['entries']) playlist_bar.set_description(playlist['title']) + # Remove already downloaded files from the to-do list. + download_queue = [] + for video in playlist['entries']: + if video['id'] not in download_archive: + download_queue.append(video) + else: + logger.info(f"{video['id']} already downloaded.") + playlist_bar.update(len(playlist['entries']) - len(download_queue)) + if args.backwards: - playlist['entries'].reverse() + download_queue.reverse() - with Pool(processes=args.threads) as pool: - status_bar.set_description_str('=' * os.get_terminal_size()[0]) - for result in pool.imap_unordered(download_video, - ((video, { - 'bars': video_bars, - 'download_archive': download_archive, - 'ydl_opts': thread_opts, - }) for video in playlist['entries'])): - if result['downloaded_video_id']: - download_archive_logger.info(result['downloaded_video_id']) - if len(result['video_error_logger_msg']): - for line in result['video_error_logger_msg']: - video_error_logger.info(line) - if len(result['status_msg']): - for line in result['status_msg']: - playlist_bar.write(f"{result['downloaded_video_id']}: {line}") - if len(result['logger_msg']): - for line in result['logger_msg']: - logger.info(line) - playlist_bar.update(1) + if len(download_queue): # Don't mess with multiprocessing if the list is empty + with Pool(processes=args.threads) as pool: + status_bar.set_description_str('=' * os.get_terminal_size()[0]) + for result in pool.imap_unordered(download_video, + ((video, { + 'bars': video_bars, + 'download_archive': download_archive, + 'ydl_opts': thread_opts, + }) for video in download_queue)): + if result['downloaded_video_id']: + download_archive_logger.info(result['downloaded_video_id']) + if len(result['video_error_logger_msg']): + for line in result['video_error_logger_msg']: + video_error_logger.info(line) + if len(result['status_msg']): + for line in result['status_msg']: + playlist_bar.write(f"{result['downloaded_video_id']}: {line}") + if len(result['logger_msg']): + for line in result['logger_msg']: + logger.info(line) + playlist_bar.update() + else: + playlist_bar.write(f"All videos already downloaded for '{playlist['title']}'") + # playlist_bar.update(playlist_bar.total - playlist_bar.n) logger.info(f"Finished item: '{playlist['title']}' {target_url}") - logger.info(f"Finished process in {round(math.ceil(time.time() - start_time) / 60, 2)} min.") + +# Clean up the remaining bars. Have to close them in order. +status_bar.set_description_str('\x1b[2KDone!') # erase the status bar +status_bar.refresh() +playlist_bar.close() +status_bar.close() diff --git a/process/threads.py b/process/threads.py index 87e4b27..2d4c6dd 100644 --- a/process/threads.py +++ b/process/threads.py @@ -21,6 +21,9 @@ def is_manager_lock_locked(lock) -> bool: def download_video(args) -> dict: + # Sleep for a little bit to space out the rush of workers flooding the bar locks. + # time.sleep(random.randint(1, 20) / 1000) + def progress_hook(d): # downloaded_bytes and total_bytes can be None if the download hasn't started yet. if d['status'] == 'downloading' and d.get('downloaded_bytes') and d.get('total_bytes'): @@ -45,7 +48,6 @@ def download_video(args) -> dict: locked = False # We're going to wait until a bar is available for us to use. - # TODO: this isn't perfect. Sometimes only 3/4 downloaders start and I think the final one had its lock stolen by someone else. while not locked: for item in bars: if not is_manager_lock_locked(item[1]): @@ -61,22 +63,22 @@ def download_video(args) -> dict: output_dict = {'downloaded_video_id': None, 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} start_time = time.time() - if video['id'] in download_archive: - output_dict['logger_msg'].append(f"{video['id']} already downloaded.") - else: - try: - error_code = yt_dlp(video['url']) # Do the download - if not error_code: - download_archive.append(video['id']) - elapsed = round(math.ceil(time.time() - start_time) / 60, 2) - output_dict['logger_msg'].append(f"{video['id']} downloaded in {elapsed} min.") - output_dict['downloaded_video_id'] = video['id'] - else: - m = f'Failed to download {video["id"]} {video["title"]}, error code: {error_code}' - output_dict['status_msg'].append(m) - output_dict['video_error_logger_msg'].append(m) - except Exception as e: - output_dict['video_error_logger_msg'].append(f"Error on video {video['id']} '{video['title']}' -> {e}") - bar.close() - bar_lock.release() + # if video['id'] in download_archive: + # output_dict['logger_msg'].append(f"{video['id']} already downloaded.") + # else: + try: + error_code = yt_dlp(video['url']) # Do the download + if not error_code: + download_archive.append(video['id']) + elapsed = round(math.ceil(time.time() - start_time) / 60, 2) + output_dict['logger_msg'].append(f"{video['id']} downloaded in {elapsed} min.") + output_dict['downloaded_video_id'] = video['id'] + else: + m = f'Failed to download {video["id"]} {video["title"]}, error code: {error_code}' + output_dict['status_msg'].append(m) + output_dict['video_error_logger_msg'].append(m) + except Exception as e: + output_dict['video_error_logger_msg'].append(f"Error on video {video['id']} '{video['title']}' -> {e}") + bar.close() + bar_lock.release() return output_dict