diff --git a/downloader.py b/downloader.py index 23d4c06..a72fee7 100755 --- a/downloader.py +++ b/downloader.py @@ -16,10 +16,10 @@ import yaml from appdirs import user_data_dir from tqdm.auto import tqdm -import ydl.yt_dlp as ydl from process.funcs import get_silent_logger, remove_duplicates_from_playlist, restart_program, setup_file_logger from process.threads import bar_eraser, download_video from ydl.files import create_directories, resolve_path +from ydl.yt_dlp import YDL def signal_handler(sig, frame): @@ -259,6 +259,7 @@ ydl_opts = { 'continuedl': False, 'addmetadata': True, 'writeinfojson': True, + 'verbose': args.verbose, 'postprocessors': [ {'key': 'FFmpegEmbedSubtitle'}, {'key': 'FFmpegMetadata', 'add_metadata': True}, @@ -270,7 +271,7 @@ ydl_opts = { # 'external_downloader_args': ['-j 32', '-s 32', '-x 16', '--file-allocation=none', '--optimize-concurrent-downloads=true', '--http-accept-gzip=true', '--continue=true'], } -yt_dlp = ydl.YDL(dict(ydl_opts, **{'logger': ytdl_logger()})) +yt_dlp = YDL(dict(ydl_opts, **{'logger': ytdl_logger()})) url_count = 0 for k, v in url_list.items(): @@ -317,7 +318,7 @@ while True: if args.daemon: print(msg) else: - status_bar.write(msg) + progress_bar.write(msg) download_archive_logger = setup_file_logger('download_archive', download_archive_file, format_str='%(message)s') playlist['entries'] = remove_duplicates_from_playlist(playlist['entries']) diff --git a/process/threads.py b/process/threads.py index 9cceee8..12eb903 100644 --- a/process/threads.py +++ b/process/threads.py @@ -3,6 +3,7 @@ import multiprocessing import os import sys import time +import traceback from multiprocessing import Manager from threading import Thread @@ -91,8 +92,9 @@ def download_video(args) -> dict: try: kwargs['ydl_opts']['logger'] = ytdl_logger() # dummy silent logger yt_dlp = ydl.YDL(kwargs['ydl_opts']) + video = yt_dlp.sanitize_info(yt_dlp.extract_info(video['url'], download=False)) try: - base_path = os.path.splitext(yt_dlp.prepare_filename(yt_dlp.extract_info(video['url'], download=False)))[0] + base_path = os.path.splitext(yt_dlp.prepare_filename(video))[0] except AttributeError: # Sometimes we won't be able to pull the video info so just use the video's ID. base_path = kwargs['output_dir'] / video['id'] @@ -107,7 +109,7 @@ def download_video(args) -> dict: else: output_dict['video_error_logger_msg'] = output_dict['video_error_logger_msg'] + ylogger.errors except Exception as e: - output_dict['video_error_logger_msg'].append(f"EXCEPTION -> {e}") + output_dict['video_error_logger_msg'].append(f"EXCEPTION -> {traceback.format_exc()}") if locked: bar.update(100 - bar.n) if locked: diff --git a/requirements.txt b/requirements.txt index 3775d92..4548bb7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ mergedeep numpy pyyaml appdirs -phantomjs \ No newline at end of file +phantomjs +scrapetube \ No newline at end of file diff --git a/ydl/yt_dlp.py b/ydl/yt_dlp.py index 68d54c6..0353c54 100644 --- a/ydl/yt_dlp.py +++ b/ydl/yt_dlp.py @@ -7,8 +7,11 @@ from mergedeep import merge class YDL: - def __init__(self, ydl_opts): - self.ydl_opts = ydl_opts + def __init__(self, ydl_opts: dict = None, extra_ydlp_opts: dict = None): + self.ydl_opts = ydl_opts if ydl_opts else {} + extra_ydlp_opts = extra_ydlp_opts if extra_ydlp_opts else {} + self.ydl_opts = merge(ydl_opts, extra_ydlp_opts) + self.ydl_opts['logger'] = self.ydl_opts.get('logger') self.yt_dlp = yt_dlp.YoutubeDL(ydl_opts) def get_formats(self, url: Union[str, Path]) -> tuple: @@ -30,10 +33,12 @@ class YDL: return tuple(sizes) def playlist_contents(self, url: str) -> Union[dict, bool]: - ydl_opts = merge({ + ydl_opts = { 'extract_flat': True, - 'skip_download': True - }, self.ydl_opts) + 'skip_download': True, + 'ignoreerrors': True, + 'logger': self.ydl_opts['logger'], + } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.sanitize_info(ydl.extract_info(url, download=False)) if not info: @@ -41,7 +46,16 @@ class YDL: entries = [] if info['_type'] == 'playlist': if 'entries' in info.keys(): - entries = [x for x in info['entries']] + # When downloading a channel youtube-dl returns a playlist for videos and another for shorts. + # We need to combine all the videos into one list + for item in info['entries']: + if item['_type'] == 'video': + entries.append(item) + elif item['_type'] == 'playlist': + for video in ydl.sanitize_info(ydl.extract_info(item['webpage_url'], download=False))['entries']: + entries.append(video) + else: + raise ValueError(f"Unknown sub-media type: {item['_type']}") elif info['_type'] == 'video': # `info` doesn't seem to contain the `url` key so we'll add it manually. # If any issues arise in the future make sure to double check there isn't any weirdness going on here. @@ -69,6 +83,9 @@ class YDL: def process_info(self, *args, **kwargs): return self.yt_dlp.process_info(*args, **kwargs) + def sanitize_info(self, *args, **kwargs): + return self.yt_dlp.sanitize_info(*args, **kwargs) + def __call__(self, *args, **kwargs): return self.yt_dlp.download(*args, **kwargs)