fix stuck progress

This commit is contained in:
Cyberes 2023-05-12 15:33:04 -06:00
parent 623f3343b6
commit 71fc3001df
2 changed files with 76 additions and 27 deletions

View File

@ -5,7 +5,6 @@ import math
import os import os
import re import re
import shutil import shutil
import signal
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
@ -19,7 +18,7 @@ from appdirs import user_data_dir
from tqdm.auto import tqdm from tqdm.auto import tqdm
from process.funcs import get_silent_logger, remove_duplicates_from_playlist, restart_program, setup_file_logger from process.funcs import get_silent_logger, remove_duplicates_from_playlist, restart_program, setup_file_logger
from process.threads import bar_eraser, download_video from process.threads import download_video, bar_eraser
from ydl.files import create_directories, resolve_path from ydl.files import create_directories, resolve_path
from ydl.yt_dlp import YDL, update_ytdlp from ydl.yt_dlp import YDL, update_ytdlp
@ -30,8 +29,8 @@ def signal_handler(sig, frame):
sys.exit(0) sys.exit(0)
signal.signal(signal.SIGTERM, signal_handler) # signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler) # signal.signal(signal.SIGINT, signal_handler)
url_regex = re.compile(r'^(?:http|ftp)s?://' # http:// or https:// url_regex = re.compile(r'^(?:http|ftp)s?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
@ -47,17 +46,23 @@ parser.add_argument('--output', required=False, help='Output directory. Ignored
parser.add_argument('--no-update', '-n', action='store_true', help='Don\'t update yt-dlp at launch.') parser.add_argument('--no-update', '-n', action='store_true', help='Don\'t update yt-dlp at launch.')
parser.add_argument('--max-size', type=int, default=1100, help='Max allowed size of a video in MB.') parser.add_argument('--max-size', type=int, default=1100, help='Max allowed size of a video in MB.')
parser.add_argument('--rm-cache', '-r', action='store_true', help='Delete the yt-dlp cache on start.') parser.add_argument('--rm-cache', '-r', action='store_true', help='Delete the yt-dlp cache on start.')
parser.add_argument('--threads', type=int, default=(cpu_count() - 1), help=f'How many download processes to use. Default: number of CPU cores (for your machine, {cpu_count()}) - 1 = {cpu_count() - 1}') parser.add_argument('--threads', type=int, default=(cpu_count() - 1),
parser.add_argument('--daemon', '-d', action='store_true', help="Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep.") help=f'How many download processes to use. Default: number of CPU cores (for your machine, {cpu_count()}) - 1 = {cpu_count() - 1}')
parser.add_argument('--daemon', '-d', action='store_true',
help="Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep.")
parser.add_argument('--sleep', type=float, default=60, help='How many minutes to sleep when in daemon mode.') parser.add_argument('--sleep', type=float, default=60, help='How many minutes to sleep when in daemon mode.')
parser.add_argument('--download-cache-file-directory', default=user_data_dir('automated-youtube-dl', 'cyberes'), help='The path to the directory to track downloaded videos. Defaults to your appdata path.') parser.add_argument('--download-cache-file-directory', default=user_data_dir('automated-youtube-dl', 'cyberes'),
parser.add_argument('--silence-errors', '-s', action='store_true', help="Don't print any error messages to the console.") help='The path to the directory to track downloaded videos. Defaults to your appdata path.')
parser.add_argument('--ignore-downloaded', '-i', action='store_true', help='Ignore videos that have been already downloaded and disable checks. Let youtube-dl handle everything.') parser.add_argument('--silence-errors', '-s', action='store_true',
help="Don't print any error messages to the console.")
parser.add_argument('--ignore-downloaded', '-i', action='store_true',
help='Ignore videos that have been already downloaded and disable checks. Let youtube-dl handle everything.')
parser.add_argument('--erase-downloaded-tracker', '-e', action='store_true', help='Erase the tracked video file.') parser.add_argument('--erase-downloaded-tracker', '-e', action='store_true', help='Erase the tracked video file.')
parser.add_argument('--ratelimit-sleep', type=int, default=5, parser.add_argument('--ratelimit-sleep', type=int, default=5,
help='How many seconds to sleep between items to prevent rate-limiting. Does not affect time between videos as you should be fine since it takes a few seconds to merge everything and clean up.') help='How many seconds to sleep between items to prevent rate-limiting. Does not affect time between videos as you should be fine since it takes a few seconds to merge everything and clean up.')
parser.add_argument('--input-datatype', choices=['auto', 'txt', 'yaml'], default='auto', help='The datatype of the input file. If set to auto, the file will be scanned for a URL on the first line.' parser.add_argument('--input-datatype', choices=['auto', 'txt', 'yaml'], default='auto',
'If is a URL, the filetype will be set to txt. If it is a key: value pair then the filetype will be set to yaml.') help='The datatype of the input file. If set to auto, the file will be scanned for a URL on the first line.'
'If is a URL, the filetype will be set to txt. If it is a key: value pair then the filetype will be set to yaml.')
parser.add_argument('--log-dir', default=None, help='Where to store the logs. Must be set when --output is not.') parser.add_argument('--log-dir', default=None, help='Where to store the logs. Must be set when --output is not.')
parser.add_argument('--verbose', '-v', action='store_true') parser.add_argument('--verbose', '-v', action='store_true')
parser.add_argument('--verify', '-z', action='store_true', help='Run ffprobe on the downloaded files.') parser.add_argument('--verify', '-z', action='store_true', help='Run ffprobe on the downloaded files.')
@ -275,7 +280,8 @@ ydl_opts = {
'merge_output_format': 'mkv', 'merge_output_format': 'mkv',
'logtostderr': True, 'logtostderr': True,
'embedchapters': True, 'embedchapters': True,
'writethumbnail': True, # Save the thumbnail to a file. Embedding seems to be broken right now so this is an alternative. 'writethumbnail': True,
# Save the thumbnail to a file. Embedding seems to be broken right now so this is an alternative.
'embedthumbnail': True, 'embedthumbnail': True,
'embeddescription': True, 'embeddescription': True,
'writesubtitles': True, 'writesubtitles': True,
@ -316,7 +322,8 @@ if not args.daemon:
encountered_errors = 0 encountered_errors = 0
errored_videos = 0 errored_videos = 0
# The video progress bars have an issue where when a bar is closed it will shift its position back 1 then return to the correct position. # The video progress bars have an issue where when a bar is closed it
# will shift its position back 1 then return to the correct position.
# This thread will clear empty spots. # This thread will clear empty spots.
if not args.daemon: if not args.daemon:
eraser_exit = manager.Value(bool, False) eraser_exit = manager.Value(bool, False)
@ -326,7 +333,8 @@ already_erased_downloaded_tracker = False
while True: while True:
# do_update() # this doesn't work very well. freezes # do_update() # this doesn't work very well. freezes
progress_bar = tqdm(total=url_count, position=0, desc='Inputs', disable=args.daemon, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}') progress_bar = tqdm(total=url_count, position=0, desc='Inputs', disable=args.daemon,
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}')
for output_path, urls in url_list.items(): for output_path, urls in url_list.items():
for target_url in urls: for target_url in urls:
logger.info('Fetching playlist...') logger.info('Fetching playlist...')
@ -350,7 +358,8 @@ while True:
logger.info(msg) logger.info(msg)
else: else:
progress_bar.write(msg) progress_bar.write(msg)
download_archive_logger = setup_file_logger('download_archive', download_archive_file, format_str='%(message)s') download_archive_logger = setup_file_logger('download_archive', download_archive_file,
format_str='%(message)s')
playlist['entries'] = remove_duplicates_from_playlist(playlist['entries']) playlist['entries'] = remove_duplicates_from_playlist(playlist['entries'])
@ -366,7 +375,8 @@ while True:
elif args.ignore_downloaded: elif args.ignore_downloaded:
download_queue.append(video) download_queue.append(video)
playlist_bar = tqdm(total=len(playlist['entries']), position=1, desc=f'"{playlist["title"]}" ({playlist["id"]})', disable=args.daemon, leave=False) playlist_bar = tqdm(total=len(playlist['entries']), position=1,
desc=f'"{playlist["title"]}" ({playlist["id"]})', disable=args.daemon, leave=False)
if not args.ignore_downloaded: if not args.ignore_downloaded:
playlist_bar.update(len(downloaded_videos)) playlist_bar.update(len(downloaded_videos))
@ -381,7 +391,13 @@ while True:
status_bar.set_description_str('=' * os.get_terminal_size()[0]) status_bar.set_description_str('=' * os.get_terminal_size()[0])
logger.info('Starting downloads...') logger.info('Starting downloads...')
for result in pool.imap_unordered(download_video, for result in pool.imap_unordered(download_video,
((video, {'bars': video_bars, 'ydl_opts': playlist_ydl_opts, 'output_dir': Path(output_path), 'ignore_downloaded': args.ignore_downloaded, 'verify': args.verify}) for video in download_queue)): ((video, {
'bars': video_bars,
'ydl_opts': playlist_ydl_opts,
'output_dir': Path(output_path),
'ignore_downloaded': args.ignore_downloaded,
'verify': args.verify
}) for video in download_queue)):
# Save the video ID to the file # Save the video ID to the file
if result['downloaded_video_id']: if result['downloaded_video_id']:
download_archive_logger.info(result['downloaded_video_id']) download_archive_logger.info(result['downloaded_video_id'])

View File

@ -1,6 +1,7 @@
import math import math
import multiprocessing import multiprocessing
import os import os
import random
import subprocess import subprocess
import sys import sys
import time import time
@ -106,19 +107,41 @@ def download_video(args) -> dict:
# Clean of forign languages # Clean of forign languages
video['title'] = unidecode(video['title']) video['title'] = unidecode(video['title'])
# Get a bar
locked = False
if len(kwargs['bars']): if len(kwargs['bars']):
while not locked: # We're going to wait until a bar is available for us to use. bar_enabled = True
got_lock = False
while not got_lock: # Get a bar
for item in kwargs['bars']: for item in kwargs['bars']:
if not is_manager_lock_locked(item[1]): if item[1].acquire(timeout=0.01):
locked = item[1].acquire(timeout=0.01) # get the lock ASAP and don't wait if we didn't get it. got_lock = True
offset = item[0] bar_offset = item[0]
bar_lock = item[1] bar_lock = item[1]
break break
else:
time.sleep(random.uniform(0.1, 0.5))
kwargs['ydl_opts']['progress_hooks'] = [progress_hook] kwargs['ydl_opts']['progress_hooks'] = [progress_hook]
desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4))) desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4)))
bar = tqdm(total=100, position=offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {elapsed}<{remaining}{postfix}', leave=False) bar = tqdm(total=100, position=bar_offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {elapsed}<{remaining}{postfix}', leave=False)
else:
bar_enabled = False
# got_lock = False
# # if len(kwargs['bars']):
# while not got_lock: # We're going to wait until a bar is available for us to use.
# for item in kwargs['bars']:
# # if not is_manager_lock_locked(item[1]):
# got_lock = item[1].acquire(timeout=0.01) # get the lock ASAP and don't wait if we didn't get it.
#
# if got_lock:
# print('GOT LOCK:', video['id'])
# # Now that we've gotten the lock, set some variables related to the bar
# offset = item[0]
# bar_lock = item[1]
# break
# else:
# print('WAITING FOR LOCK:', video['id'])
# time.sleep(uniform(0.1, 0.9))
start_time = time.time() start_time = time.time()
@ -126,9 +149,14 @@ def download_video(args) -> dict:
kwargs['ydl_opts']['logger'] = ytdl_logger() # dummy silent logger kwargs['ydl_opts']['logger'] = ytdl_logger() # dummy silent logger
yt_dlp = ydl.YDL(kwargs['ydl_opts']) yt_dlp = ydl.YDL(kwargs['ydl_opts'])
video_n = yt_dlp.get_info(video['url']) video_n = yt_dlp.get_info(video['url'])
if not video_n: if not video_n:
output_dict['video_critical_err_msg_short'].append('failed to get info. Unavailable?') output_dict['video_critical_err_msg_short'].append('failed to get info. Unavailable?')
if bar_enabled:
bar.close()
bar_lock.release()
return output_dict return output_dict
video_n['url'] = video['url'] video_n['url'] = video['url']
video = video_n video = video_n
del video_n del video_n
@ -137,7 +165,10 @@ def download_video(args) -> dict:
video['title'] = unidecode(video['title']) video['title'] = unidecode(video['title'])
video['uploader'] = unidecode(video['uploader']) # now this info is present since we fetched it video['uploader'] = unidecode(video['uploader']) # now this info is present since we fetched it
video_filename = remove_special_chars_linux(ydl.get_output_templ(video_id=video['id'], title=video['title'], uploader=video['uploader'], uploader_id=video['uploader_id'], include_ext=False), special_chars=['/']) # TODO: do we also need to remove the @ char?
video_filename = remove_special_chars_linux(
ydl.get_output_templ(video_id=video['id'], title=video['title'], uploader=video['uploader'], uploader_id=video['uploader_id'], include_ext=False), special_chars=['/']
)
# Make sure the video title isn't too long # Make sure the video title isn't too long
while len(video_filename) >= name_max - 3: # -3 so that I can add ... while len(video_filename) >= name_max - 3: # -3 so that I can add ...
@ -166,6 +197,7 @@ def download_video(args) -> dict:
error_code = y.download(video['url']) error_code = y.download(video['url'])
# yt_dlp = ydl.YDL(kwargs['ydl_opts']) # recreate the object with the correct logging path # yt_dlp = ydl.YDL(kwargs['ydl_opts']) # recreate the object with the correct logging path
# error_code = yt_dlp(video['url']) # Do the download # error_code = yt_dlp(video['url']) # Do the download
if not error_code: if not error_code:
elapsed = round(math.ceil(time.time() - start_time) / 60, 2) elapsed = round(math.ceil(time.time() - start_time) / 60, 2)
output_dict['logger_msg'].append(f"'{video['title']}' - Downloaded in {elapsed} min.") output_dict['logger_msg'].append(f"'{video['title']}' - Downloaded in {elapsed} min.")
@ -174,9 +206,10 @@ def download_video(args) -> dict:
output_dict['video_critical_err_msg'] = output_dict['video_critical_err_msg'] + ylogger.errors output_dict['video_critical_err_msg'] = output_dict['video_critical_err_msg'] + ylogger.errors
except Exception: except Exception:
output_dict['video_critical_err_msg'].append(f"EXCEPTION -> {traceback.format_exc()}") output_dict['video_critical_err_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
if locked: if bar_enabled:
bar.update(100 - bar.n) bar.update(100 - bar.n)
if locked:
if bar_enabled:
bar.close() bar.close()
bar_lock.release() bar_lock.release()
return output_dict return output_dict