better output handling

This commit is contained in:
Cyberes 2023-02-02 20:49:24 -07:00
parent 6d1a092f83
commit ad2ebe230f
1 changed files with 27 additions and 10 deletions

View File

@ -32,7 +32,7 @@ urlRegex = re.compile(r'^(?:http|ftp)s?://' # http:// or https://
parser = argparse.ArgumentParser()
parser.add_argument('file', help='URL to download or path of a file containing the URLs of the videos to download.')
parser.add_argument('output', help='Output directory. Ignored paths specified in a YAML file.')
parser.add_argument('--output', required=False, help='Output directory. Ignored paths specified in a YAML file.')
parser.add_argument('--no-update', '-n', action='store_true', help='Don\'t update yt-dlp at launch.')
parser.add_argument('--max-size', type=int, default=1100, help='Max allowed size of a video in MB.')
parser.add_argument('--rm-cache', '-r', action='store_true', help='Delete the yt-dlp cache on start.')
@ -46,13 +46,23 @@ parser.add_argument('--erase-downloaded-tracker', '-e', action='store_true', hel
parser.add_argument('--ratelimit-sleep', type=int, default=5, help='How many seconds to sleep to prevent rate-limiting.')
parser.add_argument('--input-datatype', choices=['auto', 'txt', 'yaml'], default='auto', help='The datatype of the input file. If set to auto, the file will be scanned for a URL on the firstline.'
'If is a URL, the filetype will be set to txt. If it is a key: value pair then the filetype will be set to yaml.')
parser.add_argument('--logs-dir', default=None, help='Where to store the logs. Must be set when --output is not.')
args = parser.parse_args()
if args.threads <= 0:
print("Can't have 0 threads!")
sys.exit(1)
args.output = resolve_path(args.output)
if args.output:
args.output = resolve_path(args.output)
if args.log_dir:
args.log_dir = resolve_path(args.log_dir)
elif not args.output and not args.log_dir:
print('Must set --logs-dir when --output is not.')
sys.exit(1)
else:
args.log_dir = args.output / 'logs'
args.download_cache_file_directory = resolve_path(args.download_cache_file_directory)
log_time = time.time()
@ -72,6 +82,9 @@ if not re.match(urlRegex, str(args.file)) or args.input_datatype in ('txt', 'yam
print('Failed to load config file, error:', e)
sys.exit(1)
elif args.input_datatype == 'txt' or (re.match(urlRegex, input_file[0]) and args.input_datatype == 'auto'):
if not args.output:
print('You must specify an output path with --output when the input datatype is a text file.')
sys.exit(1)
url_list[str(args.output)] = input_file
else:
print('Unknown file type:', args.input_datatype)
@ -85,6 +98,9 @@ if not re.match(urlRegex, str(args.file)) or args.input_datatype in ('txt', 'yam
print(f'Not a url:', item)
sys.exit(1)
else:
if not args.output:
print('You must specify an output path with --output when the input is a URL.')
sys.exit(1)
url_list[str(args.output)] = [args.file]
# Create directories AFTER loading the file
@ -106,13 +122,12 @@ if args.rm_cache:
if args.daemon:
print('Running in daemon mode.')
log_dir = args.output / 'logs'
create_directories(args.output, log_dir)
create_directories(args.log_dir)
# TODO: log file rotation https://www.blog.pythonlibrary.org/2014/02/11/python-how-to-create-rotating-logs/
# TODO: log to one file instead of one for each run
file_logger = setup_file_logger('youtube_dl', log_dir / f'youtube_dl-{str(int(log_time))}.log', level=logging.INFO)
video_error_logger = setup_file_logger('youtube_dl_video_errors', log_dir / f'youtube_dl-errors-{int(log_time)}.log', level=logging.INFO)
file_logger = setup_file_logger('youtube_dl', args.log_dir / f'youtube_dl-{str(int(log_time))}.log', level=logging.INFO)
video_error_logger = setup_file_logger('youtube_dl_video_errors', args.log_dir / f'youtube_dl-errors-{int(log_time)}.log', level=logging.INFO)
logger = get_silent_logger('yt-dl', silent=not args.daemon)
@ -187,9 +202,9 @@ class ytdl_logger(object):
# https://github.com/yt-dlp/yt-dlp#embedding-examples
base_outtempl = '[%(id)s] [%(title)s] [%(uploader)s] [%(uploader_id)s].%(ext)s' # leading dash can cause issues due to bash args so we surround the variables in brackets
ydl_opts = {
'format': f'(bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=1080]/bestvideo[filesize<{args.max_size}M][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=720]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=720]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=720]/bestvideo[filesize<{args.max_size}M][height>=720]/bestvideo[filesize<{args.max_size}M])+(bestaudio[acodec=opus]/bestaudio)/best',
'outtmpl': f'{args.output}/[%(id)s] [%(title)s] [%(uploader)s] [%(uploader_id)s].%(ext)s', # leading dash can cause issues due to bash args so we surround the variables in brackets
'merge_output_format': 'mkv',
'logtostderr': True,
'embedchapters': True,
@ -214,8 +229,7 @@ ydl_opts = {
],
}
main_opts = dict(ydl_opts, **{'logger': ytdl_logger()})
yt_dlp = ydl.YDL(main_opts)
yt_dlp = ydl.YDL(dict(ydl_opts, **{'logger': ytdl_logger()}))
url_count = 0
for k, v in url_list.items():
@ -279,6 +293,9 @@ while True:
if not args.ignore_downloaded:
playlist_bar.update(len(downloaded_videos))
playlist_ydl_opts = ydl_opts.copy()
playlist_ydl_opts['outtmpl'] = f'{output_path}/{base_outtempl}',
if len(download_queue): # Don't mess with multiprocessing if all videos are already downloaded
with Pool(processes=args.threads) as pool:
if sys.stdout.isatty():
@ -286,7 +303,7 @@ while True:
# OSError: [Errno 25] Inappropriate ioctl for device
status_bar.set_description_str('=' * os.get_terminal_size()[0])
logger.info('Starting downloads...')
for result in pool.imap_unordered(download_video, ((video, {'bars': video_bars, 'ydl_opts': ydl_opts, 'output_dir': Path(output_path), }) for video in download_queue)):
for result in pool.imap_unordered(download_video, ((video, {'bars': video_bars, 'ydl_opts': playlist_ydl_opts, 'output_dir': Path(output_path), }) for video in download_queue)):
# Save the video ID to the file
if result['downloaded_video_id']:
download_archive_logger.info(result['downloaded_video_id'])