diff --git a/README.md b/README.md index 9af7747..3045bc9 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ Videos will be saved using this name format: | `--max-size` | | Max allowed size of a video in MB. Default: 1100. | | `--rm-cache` | `-r` | Delete the yt-dlp cache on start. | | `--threads` | | How many download processes to use (threads). Default is how many CPU cores you have. You will want to find a good value that doesn't overload your connection. | -| `--daemon` | `-d` | Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep. | +| `--daemon` | `-d` | Run in daemon mode. Disables progress bars and sleeps for the amount of time specified in `--sleep`. | | `--sleep` | | How many minutes to sleep when in daemon mode. | -| `--silent` | `-s` | Don't print any error messages to the console. | -| `--ignore-downloaded` | `-i` | Ignore videos that have been already downloaded and let youtube-dl handle everything. Videos will not be re-downloaded, but metadata will be updated. | \ No newline at end of file +| `--silent` | `-s` | Don't print any error messages to the console. Errors will still be logged in the log files. | +| `--ignore-downloaded` | `-i` | Ignore videos that have been already downloaded and let youtube-dl handle everything. Videos will not be re-downloaded, but metadata will be updated. | diff --git a/downloader.py b/downloader.py index 4d5c533..15c63f7 100755 --- a/downloader.py +++ b/downloader.py @@ -20,6 +20,7 @@ import ydl.yt_dlp as ydl from process.funcs import get_silent_logger, remove_duplicates_from_playlist, restart_program, setup_file_logger from process.threads import bar_eraser, download_video from ydl.files import create_directories, resolve_path +import re def signal_handler(sig, frame): @@ -31,12 +32,13 @@ def signal_handler(sig, frame): signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) -urlRegex = re.compile(r'^(?:http|ftp)s?://' # http:// or https:// - r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... - r'localhost|' # localhost... - r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip - r'(?::\d+)?' # optional port - r'(?:/?|[/?]\S+)$', re.IGNORECASE) +url_regex = re.compile(r'^(?:http|ftp)s?://' # http:// or https:// + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... + r'localhost|' # localhost... + r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip + r'(?::\d+)?' # optional port + r'(?:/?|[/?]\S+)$', re.IGNORECASE) +ansi_escape_regex = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') parser = argparse.ArgumentParser() parser.add_argument('file', help='URL to download or path of a file containing the URLs of the videos to download.') @@ -82,7 +84,7 @@ log_time = time.time() # Get the URLs of the videos to download. Is the input a URL or file? url_list = {} -if not re.match(urlRegex, str(args.file)) or args.input_datatype in ('txt', 'yaml'): +if not re.match(url_regex, str(args.file)) or args.input_datatype in ('txt', 'yaml'): args.file = resolve_path(args.file) if not args.file.exists(): print('Input file does not exist:', args.file) @@ -95,7 +97,7 @@ if not re.match(urlRegex, str(args.file)) or args.input_datatype in ('txt', 'yam except yaml.YAMLError as e: print('Failed to load config file, error:', e) sys.exit(1) - elif args.input_datatype == 'txt' or (re.match(urlRegex, input_file[0]) and args.input_datatype == 'auto'): + elif args.input_datatype == 'txt' or (re.match(url_regex, input_file[0]) and args.input_datatype == 'auto'): if not args.output: print('You must specify an output path with --output when the input datatype is a text file.') sys.exit(1) @@ -108,7 +110,7 @@ if not re.match(urlRegex, str(args.file)) or args.input_datatype in ('txt', 'yam # Verify each line in the file is a valid URL. for directory, urls in url_list.items(): for item in urls: - if not re.match(urlRegex, str(item)): + if not re.match(url_regex, str(item)): print(f'Not a url:', item) sys.exit(1) else: @@ -135,6 +137,8 @@ def do_update(): if args.rm_cache: subprocess.run('yt-dlp --rm-cache-dir', shell=True) +# TODO: compress old log files + if args.daemon: print('Running in daemon mode.') @@ -143,13 +147,13 @@ create_directories(args.log_dir) # TODO: log file rotation https://www.blog.pythonlibrary.org/2014/02/11/python-how-to-create-rotating-logs/ # TODO: log to one file instead of one for each run file_logger = setup_file_logger('youtube_dl', args.log_dir / f'{str(int(log_time))}.log', level=logging.INFO) -video_error_logger = setup_file_logger('youtube_dl_video_errors', args.log_dir / f'{int(log_time)}-errors.log', level=logging.INFO) +video_error_logger = setup_file_logger('video_errors', args.log_dir / f'{int(log_time)}-errors.log', level=logging.INFO) logger = get_silent_logger('yt-dl', silent=not args.daemon) def log_info_twice(msg): logger.info(msg) - file_logger.info(msg) + file_logger.info(ansi_escape_regex.sub('', msg)) log_info_twice('Starting process.') @@ -171,14 +175,24 @@ def load_existing_videos(): status_bar = tqdm(position=2, bar_format='{desc}', disable=args.daemon, leave=False) -def log_bar(msg, level): - status_bar.write(f'[{level}] {msg}') +def log_bar(log_msg, level): + status_bar.write(f'[{level}] {log_msg}') if level == 'warning': - logger.warning(msg) + logger.warning(log_msg) elif level == 'error': - logger.error(msg) + logger.error(log_msg) else: - logger.info(msg) + logger.info(log_msg) + + +# def log_with_video_id(log_msg, video_id, level, logger_obj): +# log_msg = f'{video_id} - {log_msg}' +# if level == 'warning': +# logger_obj.warning(log_msg) +# elif level == 'error': +# logger_obj.error(log_msg) +# else: +# logger_obj.info(log_msg) def print_without_paths(msg): @@ -198,28 +212,35 @@ def print_without_paths(msg): class ytdl_logger(object): def debug(self, msg): - file_logger.debug(msg) + file_logger.debug(self.__clean_msg(msg)) # if msg.startswith('[debug] '): # pass if '[download]' not in msg: print_without_paths(msg) def info(self, msg): - file_logger.info(msg) + file_logger.info(self.__clean_msg(msg)) print_without_paths(msg) def warning(self, msg): - file_logger.warning(msg) + file_logger.warning(self.__clean_msg(msg)) log_bar(msg, 'warning') def error(self, msg): - file_logger.error(msg) + file_logger.error(self.__clean_msg(msg)) log_bar(msg, 'error') + def __clean_msg(self, msg): + return ansi_escape_regex.sub('', msg) + + +# TODO: https://github.com/TheFrenchGhosty/TheFrenchGhostys-Ultimate-YouTube-DL-Scripts-Collection/blob/master/docs/Scripts-Type.md#archivist-scripts # https://github.com/yt-dlp/yt-dlp#embedding-examples base_outtempl = '[%(id)s] [%(title)s] [%(uploader)s] [%(uploader_id)s].%(ext)s' # leading dash can cause issues due to bash args so we surround the variables in brackets ydl_opts = { + # TODO: https://github.com/TheFrenchGhosty/TheFrenchGhostys-Ultimate-YouTube-DL-Scripts-Collection/blob/master/docs/Details.md + # https://old.reddit.com/r/DataHoarder/comments/c6fh4x/after_hoarding_over_50k_youtube_videos_here_is/ 'format': f'(bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=1080]/bestvideo[filesize<{args.max_size}M][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=720]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=720]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=720]/bestvideo[filesize<{args.max_size}M][height>=720]/bestvideo[filesize<{args.max_size}M])+(bestaudio[acodec=opus]/bestaudio)/best', 'merge_output_format': 'mkv', 'logtostderr': True, @@ -333,19 +354,21 @@ while True: # Print stuff for line in result['video_error_logger_msg']: - video_error_logger.info(line) - file_logger.error(line) + file_msg = f"{result['video_id']} - {ansi_escape_regex.sub('', line)}" + term_msg = f"{result['video_id']} - {line}" + video_error_logger.error(msg) + file_logger.error(msg) encountered_errors += 1 if not args.silence_errors: if args.daemon: - logger.error(line) + logger.error(term_msg) else: - status_bar.write(line) + status_bar.write(term_msg) if len(result['video_error_logger_msg']): errored_videos += 1 if args.silence_errors and args.daemon: - logger.error(f"{result['video_id']} failed due to error.") + logger.error(f"{result['video_id']} - Failed due to error.") # for line in result['status_msg']: # playlist_bar.write(line) @@ -379,6 +402,23 @@ while True: # downloaded_videos = load_existing_videos() # reload the videos that have already been downloaded # Clean up the remaining bars. Have to close them in order. -eraser_exit.value = True -playlist_bar.close() -status_bar.close() +# These variables may be undefined so we will just ignore any errors. +# Not in one try/catch because we don't want to skip anything. +try: + eraser_exit.value = True +except NameError: + pass +except AttributeError: + pass +try: + playlist_bar.close() +except NameError: + pass +except AttributeError: + pass +try: + status_bar.close() +except NameError: + pass +except AttributeError: + pass diff --git a/process/threads.py b/process/threads.py index d97094f..9cceee8 100644 --- a/process/threads.py +++ b/process/threads.py @@ -108,7 +108,8 @@ def download_video(args) -> dict: output_dict['video_error_logger_msg'] = output_dict['video_error_logger_msg'] + ylogger.errors except Exception as e: output_dict['video_error_logger_msg'].append(f"EXCEPTION -> {e}") - bar.update(100 - bar.n) + if locked: + bar.update(100 - bar.n) if locked: bar.close() bar_lock.release()