diff --git a/downloader.py b/downloader.py index 66a83af..90c807e 100755 --- a/downloader.py +++ b/downloader.py @@ -5,7 +5,6 @@ import math import os import re import shutil -import signal import subprocess import sys import tempfile @@ -19,7 +18,7 @@ from appdirs import user_data_dir from tqdm.auto import tqdm from process.funcs import get_silent_logger, remove_duplicates_from_playlist, restart_program, setup_file_logger -from process.threads import bar_eraser, download_video +from process.threads import download_video, bar_eraser from ydl.files import create_directories, resolve_path from ydl.yt_dlp import YDL, update_ytdlp @@ -30,8 +29,8 @@ def signal_handler(sig, frame): sys.exit(0) -signal.signal(signal.SIGTERM, signal_handler) -signal.signal(signal.SIGINT, signal_handler) +# signal.signal(signal.SIGTERM, signal_handler) +# signal.signal(signal.SIGINT, signal_handler) url_regex = re.compile(r'^(?:http|ftp)s?://' # http:// or https:// r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... @@ -47,17 +46,23 @@ parser.add_argument('--output', required=False, help='Output directory. Ignored parser.add_argument('--no-update', '-n', action='store_true', help='Don\'t update yt-dlp at launch.') parser.add_argument('--max-size', type=int, default=1100, help='Max allowed size of a video in MB.') parser.add_argument('--rm-cache', '-r', action='store_true', help='Delete the yt-dlp cache on start.') -parser.add_argument('--threads', type=int, default=(cpu_count() - 1), help=f'How many download processes to use. Default: number of CPU cores (for your machine, {cpu_count()}) - 1 = {cpu_count() - 1}') -parser.add_argument('--daemon', '-d', action='store_true', help="Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep.") +parser.add_argument('--threads', type=int, default=(cpu_count() - 1), + help=f'How many download processes to use. Default: number of CPU cores (for your machine, {cpu_count()}) - 1 = {cpu_count() - 1}') +parser.add_argument('--daemon', '-d', action='store_true', + help="Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep.") parser.add_argument('--sleep', type=float, default=60, help='How many minutes to sleep when in daemon mode.') -parser.add_argument('--download-cache-file-directory', default=user_data_dir('automated-youtube-dl', 'cyberes'), help='The path to the directory to track downloaded videos. Defaults to your appdata path.') -parser.add_argument('--silence-errors', '-s', action='store_true', help="Don't print any error messages to the console.") -parser.add_argument('--ignore-downloaded', '-i', action='store_true', help='Ignore videos that have been already downloaded and disable checks. Let youtube-dl handle everything.') +parser.add_argument('--download-cache-file-directory', default=user_data_dir('automated-youtube-dl', 'cyberes'), + help='The path to the directory to track downloaded videos. Defaults to your appdata path.') +parser.add_argument('--silence-errors', '-s', action='store_true', + help="Don't print any error messages to the console.") +parser.add_argument('--ignore-downloaded', '-i', action='store_true', + help='Ignore videos that have been already downloaded and disable checks. Let youtube-dl handle everything.') parser.add_argument('--erase-downloaded-tracker', '-e', action='store_true', help='Erase the tracked video file.') parser.add_argument('--ratelimit-sleep', type=int, default=5, help='How many seconds to sleep between items to prevent rate-limiting. Does not affect time between videos as you should be fine since it takes a few seconds to merge everything and clean up.') -parser.add_argument('--input-datatype', choices=['auto', 'txt', 'yaml'], default='auto', help='The datatype of the input file. If set to auto, the file will be scanned for a URL on the first line.' - 'If is a URL, the filetype will be set to txt. If it is a key: value pair then the filetype will be set to yaml.') +parser.add_argument('--input-datatype', choices=['auto', 'txt', 'yaml'], default='auto', + help='The datatype of the input file. If set to auto, the file will be scanned for a URL on the first line.' + 'If is a URL, the filetype will be set to txt. If it is a key: value pair then the filetype will be set to yaml.') parser.add_argument('--log-dir', default=None, help='Where to store the logs. Must be set when --output is not.') parser.add_argument('--verbose', '-v', action='store_true') parser.add_argument('--verify', '-z', action='store_true', help='Run ffprobe on the downloaded files.') @@ -275,7 +280,8 @@ ydl_opts = { 'merge_output_format': 'mkv', 'logtostderr': True, 'embedchapters': True, - 'writethumbnail': True, # Save the thumbnail to a file. Embedding seems to be broken right now so this is an alternative. + 'writethumbnail': True, + # Save the thumbnail to a file. Embedding seems to be broken right now so this is an alternative. 'embedthumbnail': True, 'embeddescription': True, 'writesubtitles': True, @@ -316,7 +322,8 @@ if not args.daemon: encountered_errors = 0 errored_videos = 0 -# The video progress bars have an issue where when a bar is closed it will shift its position back 1 then return to the correct position. +# The video progress bars have an issue where when a bar is closed it +# will shift its position back 1 then return to the correct position. # This thread will clear empty spots. if not args.daemon: eraser_exit = manager.Value(bool, False) @@ -326,7 +333,8 @@ already_erased_downloaded_tracker = False while True: # do_update() # this doesn't work very well. freezes - progress_bar = tqdm(total=url_count, position=0, desc='Inputs', disable=args.daemon, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}') + progress_bar = tqdm(total=url_count, position=0, desc='Inputs', disable=args.daemon, + bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}') for output_path, urls in url_list.items(): for target_url in urls: logger.info('Fetching playlist...') @@ -350,7 +358,8 @@ while True: logger.info(msg) else: progress_bar.write(msg) - download_archive_logger = setup_file_logger('download_archive', download_archive_file, format_str='%(message)s') + download_archive_logger = setup_file_logger('download_archive', download_archive_file, + format_str='%(message)s') playlist['entries'] = remove_duplicates_from_playlist(playlist['entries']) @@ -366,7 +375,8 @@ while True: elif args.ignore_downloaded: download_queue.append(video) - playlist_bar = tqdm(total=len(playlist['entries']), position=1, desc=f'"{playlist["title"]}" ({playlist["id"]})', disable=args.daemon, leave=False) + playlist_bar = tqdm(total=len(playlist['entries']), position=1, + desc=f'"{playlist["title"]}" ({playlist["id"]})', disable=args.daemon, leave=False) if not args.ignore_downloaded: playlist_bar.update(len(downloaded_videos)) @@ -381,7 +391,13 @@ while True: status_bar.set_description_str('=' * os.get_terminal_size()[0]) logger.info('Starting downloads...') for result in pool.imap_unordered(download_video, - ((video, {'bars': video_bars, 'ydl_opts': playlist_ydl_opts, 'output_dir': Path(output_path), 'ignore_downloaded': args.ignore_downloaded, 'verify': args.verify}) for video in download_queue)): + ((video, { + 'bars': video_bars, + 'ydl_opts': playlist_ydl_opts, + 'output_dir': Path(output_path), + 'ignore_downloaded': args.ignore_downloaded, + 'verify': args.verify + }) for video in download_queue)): # Save the video ID to the file if result['downloaded_video_id']: download_archive_logger.info(result['downloaded_video_id']) diff --git a/process/threads.py b/process/threads.py index f2d8284..6b905b6 100644 --- a/process/threads.py +++ b/process/threads.py @@ -1,6 +1,7 @@ import math import multiprocessing import os +import random import subprocess import sys import time @@ -106,19 +107,41 @@ def download_video(args) -> dict: # Clean of forign languages video['title'] = unidecode(video['title']) - # Get a bar - locked = False + if len(kwargs['bars']): - while not locked: # We're going to wait until a bar is available for us to use. + bar_enabled = True + got_lock = False + while not got_lock: # Get a bar for item in kwargs['bars']: - if not is_manager_lock_locked(item[1]): - locked = item[1].acquire(timeout=0.01) # get the lock ASAP and don't wait if we didn't get it. - offset = item[0] + if item[1].acquire(timeout=0.01): + got_lock = True + bar_offset = item[0] bar_lock = item[1] break + else: + time.sleep(random.uniform(0.1, 0.5)) kwargs['ydl_opts']['progress_hooks'] = [progress_hook] desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4))) - bar = tqdm(total=100, position=offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {elapsed}<{remaining}{postfix}', leave=False) + bar = tqdm(total=100, position=bar_offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {elapsed}<{remaining}{postfix}', leave=False) + else: + bar_enabled = False + + # got_lock = False + # # if len(kwargs['bars']): + # while not got_lock: # We're going to wait until a bar is available for us to use. + # for item in kwargs['bars']: + # # if not is_manager_lock_locked(item[1]): + # got_lock = item[1].acquire(timeout=0.01) # get the lock ASAP and don't wait if we didn't get it. + # + # if got_lock: + # print('GOT LOCK:', video['id']) + # # Now that we've gotten the lock, set some variables related to the bar + # offset = item[0] + # bar_lock = item[1] + # break + # else: + # print('WAITING FOR LOCK:', video['id']) + # time.sleep(uniform(0.1, 0.9)) start_time = time.time() @@ -126,9 +149,14 @@ def download_video(args) -> dict: kwargs['ydl_opts']['logger'] = ytdl_logger() # dummy silent logger yt_dlp = ydl.YDL(kwargs['ydl_opts']) video_n = yt_dlp.get_info(video['url']) + if not video_n: output_dict['video_critical_err_msg_short'].append('failed to get info. Unavailable?') + if bar_enabled: + bar.close() + bar_lock.release() return output_dict + video_n['url'] = video['url'] video = video_n del video_n @@ -137,7 +165,10 @@ def download_video(args) -> dict: video['title'] = unidecode(video['title']) video['uploader'] = unidecode(video['uploader']) # now this info is present since we fetched it - video_filename = remove_special_chars_linux(ydl.get_output_templ(video_id=video['id'], title=video['title'], uploader=video['uploader'], uploader_id=video['uploader_id'], include_ext=False), special_chars=['/']) + # TODO: do we also need to remove the @ char? + video_filename = remove_special_chars_linux( + ydl.get_output_templ(video_id=video['id'], title=video['title'], uploader=video['uploader'], uploader_id=video['uploader_id'], include_ext=False), special_chars=['/'] + ) # Make sure the video title isn't too long while len(video_filename) >= name_max - 3: # -3 so that I can add ... @@ -166,6 +197,7 @@ def download_video(args) -> dict: error_code = y.download(video['url']) # yt_dlp = ydl.YDL(kwargs['ydl_opts']) # recreate the object with the correct logging path # error_code = yt_dlp(video['url']) # Do the download + if not error_code: elapsed = round(math.ceil(time.time() - start_time) / 60, 2) output_dict['logger_msg'].append(f"'{video['title']}' - Downloaded in {elapsed} min.") @@ -174,9 +206,10 @@ def download_video(args) -> dict: output_dict['video_critical_err_msg'] = output_dict['video_critical_err_msg'] + ylogger.errors except Exception: output_dict['video_critical_err_msg'].append(f"EXCEPTION -> {traceback.format_exc()}") - if locked: + if bar_enabled: bar.update(100 - bar.n) - if locked: + + if bar_enabled: bar.close() bar_lock.release() return output_dict