diff --git a/downloader.py b/downloader.py index 0c3a2e8..179aefa 100755 --- a/downloader.py +++ b/downloader.py @@ -21,7 +21,7 @@ from tqdm.auto import tqdm from process.funcs import get_silent_logger, remove_duplicates_from_playlist, restart_program, setup_file_logger from process.threads import bar_eraser, download_video from ydl.files import create_directories, resolve_path -from ydl.yt_dlp import YDL, get_output_templ, update_ytdlp +from ydl.yt_dlp import YDL, update_ytdlp def signal_handler(sig, frame): @@ -60,6 +60,7 @@ parser.add_argument('--input-datatype', choices=['auto', 'txt', 'yaml'], default 'If is a URL, the filetype will be set to txt. If it is a key: value pair then the filetype will be set to yaml.') parser.add_argument('--log-dir', default=None, help='Where to store the logs. Must be set when --output is not.') parser.add_argument('--verbose', '-v', action='store_true') +parser.add_argument('--verify', '-z', action='store_true', help='Run ffprobe on the downloaded files.') args = parser.parse_args() if args.threads <= 0: @@ -369,7 +370,8 @@ while True: # OSError: [Errno 25] Inappropriate ioctl for device status_bar.set_description_str('=' * os.get_terminal_size()[0]) logger.info('Starting downloads...') - for result in pool.imap_unordered(download_video, ((video, {'bars': video_bars, 'ydl_opts': playlist_ydl_opts, 'output_dir': Path(output_path), 'ignore_downloaded': args.ignore_downloaded}) for video in download_queue)): + for result in pool.imap_unordered(download_video, + ((video, {'bars': video_bars, 'ydl_opts': playlist_ydl_opts, 'output_dir': Path(output_path), 'ignore_downloaded': args.ignore_downloaded, 'verify': args.verify}) for video in download_queue)): # Save the video ID to the file if result['downloaded_video_id']: download_archive_logger.info(result['downloaded_video_id']) @@ -410,9 +412,25 @@ while True: for line in result['logger_msg']: log_info_twice(f"{result['video_id']} - {line}") + + # TODO: if no error launch a verify multiprocess + # if kwargs['verify']: + # try: + # info = yt_dlp.extract_info(video['url']) + # except Exception as e: + # output_dict['video_critical_err_msg'].append(f'Failed to verify video, extract_info failed: {e}') + # file_path = base_path + info['ext'] + # result = ffprobe(file_path) + # if not result[0]: + # output_dict['video_critical_err_msg'].append(f'Failed to verify video: {result[4]}') + playlist_bar.update() else: - status_bar.write(f"All videos already downloaded for '{playlist['title']}'.") + msg = f"All videos already downloaded for '{playlist['title']}'." + if args.daemon: + logger.info(msg) + else: + status_bar.write(msg) log_info_twice(f"Finished item: '{playlist['title']}' {target_url}") # Sleep a bit to prevent rate-limiting diff --git a/process/funcs.py b/process/funcs.py index 6144f63..9d92170 100644 --- a/process/funcs.py +++ b/process/funcs.py @@ -2,6 +2,7 @@ import logging import os import sys +import ffmpeg import psutil @@ -40,6 +41,21 @@ def setup_file_logger(name, log_file, level=logging.INFO, format_str: str = '%(a return logger +def ffprobe(filename): + try: + # stream = stream.output('pipe:', format="null") + # stream.run(capture_stdout=True, capture_stderr=True) + test = ffmpeg.probe(filename) + except Exception as e: + err = [] + for x in e.stderr.decode().split('\n'): + if x.strip(' ') != '': + err.append(x) + err_msg = err[-1].split(': ')[-1] + return False, filename, str(e), None, err_msg + return True, filename, None, test, None + + def get_silent_logger(name, level=logging.INFO, format_str: str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s', silent: bool = True): logger = logging.getLogger(name) console = logging.StreamHandler() diff --git a/process/threads.py b/process/threads.py index 1edfef4..f54c6d4 100644 --- a/process/threads.py +++ b/process/threads.py @@ -5,6 +5,7 @@ import sys import time import traceback from multiprocessing import Manager +from pathlib import Path from threading import Thread import numpy as np @@ -108,14 +109,20 @@ def download_video(args) -> dict: video = video_n del video_n - try: - base_path = os.path.splitext(unidecode(yt_dlp.prepare_filename(video)))[0] - video['outtmpl'] = f"{kwargs['output_dir']}/{ydl.get_output_templ(title=unidecode(video['title']), uploader=unidecode(video['uploader']))}" # clean the filename - except AttributeError: - # Sometimes we won't be able to pull the video info so just use the video's ID. - base_path = kwargs['output_dir'] / video['id'] - video['outtmpl'] = f"{kwargs['output_dir']}/{ydl.get_output_templ()}" - ylogger = ytdl_logger(setup_file_logger(video['id'], str(base_path) + '.log')) + # Clean the strings of forign languages + video['title'] = unidecode(video['title']) + video['uploader'] = unidecode(video['uploader']) + video_filename = ydl.get_output_templ(video_id=video['id'], title=video['title'], uploader=video['uploader'], uploader_id=video['uploader_id'], include_ext=False) + base_path = str(Path(kwargs['output_dir'], video_filename)) + kwargs['ydl_opts']['outtmpl'] = f"{base_path}.%(ext)s" + + # try: + # base_path = os.path.splitext(Path(kwargs['output_dir'], yt_dlp.prepare_filename(video)))[0] + # except AttributeError: + # # Sometimes we won't be able to pull the video info so just use the video's ID. + # base_path = kwargs['output_dir'] / video['id'] + + ylogger = ytdl_logger(setup_file_logger(video['id'], base_path + '.log')) kwargs['ydl_opts']['logger'] = ylogger yt_dlp = ydl.YDL(kwargs['ydl_opts']) # recreate the object with the correct logging path error_code = yt_dlp(video['url']) # Do the download diff --git a/requirements.txt b/requirements.txt index 27449ed..caecef3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ numpy pyyaml appdirs phantomjs -unidecode \ No newline at end of file +unidecode +ffmpeg-python \ No newline at end of file diff --git a/ydl/yt_dlp.py b/ydl/yt_dlp.py index 30b7c67..58f528e 100644 --- a/ydl/yt_dlp.py +++ b/ydl/yt_dlp.py @@ -118,5 +118,5 @@ class ytdl_no_logger(object): return -def get_output_templ(title: str = None, uploader: str = None): - return f'[%(id)s] [{title if title else "%(title)s"}] [{uploader if uploader else "%(uploader)s"}] [%(uploader_id)s].%(ext)s' # leading dash can cause issues due to bash args so we surround the variables in brackets +def get_output_templ(video_id: str = None, title: str = None, uploader: str = None, uploader_id: str = None, include_ext: bool = True): + return f'[{video_id if video_id else "%(id)s"}] [{title if title else "%(title)s"}] [{uploader if uploader else "%(uploader)s"}] [{uploader_id if uploader_id else "%(uploader_id)s"}]{".%(ext)s" if include_ext else ""}' # leading dash can cause issues due to bash args so we surround the variables in brackets