Improve: downloaded video handling, terminal cleanup, threading

This commit is contained in:
Cyberes 2023-01-20 22:47:18 -07:00
parent 251d9f7b33
commit 8e9d2dc8e8
2 changed files with 66 additions and 44 deletions

View File

@ -1,6 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import argparse import argparse
import automated_youtube_dl.yt_dlp as ydl
import logging.config import logging.config
import math import math
import os import os
@ -8,11 +7,14 @@ import re
import subprocess import subprocess
import sys import sys
import time import time
from automated_youtube_dl.files import create_directories, resolve_path
from multiprocessing import Manager, Pool, cpu_count from multiprocessing import Manager, Pool, cpu_count
from tqdm.auto import tqdm
import automated_youtube_dl.yt_dlp as ydl
from automated_youtube_dl.files import create_directories, resolve_path
from process.funcs import restart_program, setup_file_logger from process.funcs import restart_program, setup_file_logger
from process.threads import download_video from process.threads import download_video
from tqdm.auto import tqdm
urlRegex = re.compile( urlRegex = re.compile(
r'^(?:http|ftp)s?://' # http:// or https:// r'^(?:http|ftp)s?://' # http:// or https://
@ -181,29 +183,47 @@ for i, target_url in tqdm(enumerate(url_list), total=len(url_list), position=0,
playlist_bar.total = len(playlist['entries']) playlist_bar.total = len(playlist['entries'])
playlist_bar.set_description(playlist['title']) playlist_bar.set_description(playlist['title'])
# Remove already downloaded files from the to-do list.
download_queue = []
for video in playlist['entries']:
if video['id'] not in download_archive:
download_queue.append(video)
else:
logger.info(f"{video['id']} already downloaded.")
playlist_bar.update(len(playlist['entries']) - len(download_queue))
if args.backwards: if args.backwards:
playlist['entries'].reverse() download_queue.reverse()
with Pool(processes=args.threads) as pool: if len(download_queue): # Don't mess with multiprocessing if the list is empty
status_bar.set_description_str('=' * os.get_terminal_size()[0]) with Pool(processes=args.threads) as pool:
for result in pool.imap_unordered(download_video, status_bar.set_description_str('=' * os.get_terminal_size()[0])
((video, { for result in pool.imap_unordered(download_video,
'bars': video_bars, ((video, {
'download_archive': download_archive, 'bars': video_bars,
'ydl_opts': thread_opts, 'download_archive': download_archive,
}) for video in playlist['entries'])): 'ydl_opts': thread_opts,
if result['downloaded_video_id']: }) for video in download_queue)):
download_archive_logger.info(result['downloaded_video_id']) if result['downloaded_video_id']:
if len(result['video_error_logger_msg']): download_archive_logger.info(result['downloaded_video_id'])
for line in result['video_error_logger_msg']: if len(result['video_error_logger_msg']):
video_error_logger.info(line) for line in result['video_error_logger_msg']:
if len(result['status_msg']): video_error_logger.info(line)
for line in result['status_msg']: if len(result['status_msg']):
playlist_bar.write(f"{result['downloaded_video_id']}: {line}") for line in result['status_msg']:
if len(result['logger_msg']): playlist_bar.write(f"{result['downloaded_video_id']}: {line}")
for line in result['logger_msg']: if len(result['logger_msg']):
logger.info(line) for line in result['logger_msg']:
playlist_bar.update(1) logger.info(line)
playlist_bar.update()
else:
playlist_bar.write(f"All videos already downloaded for '{playlist['title']}'")
# playlist_bar.update(playlist_bar.total - playlist_bar.n)
logger.info(f"Finished item: '{playlist['title']}' {target_url}") logger.info(f"Finished item: '{playlist['title']}' {target_url}")
logger.info(f"Finished process in {round(math.ceil(time.time() - start_time) / 60, 2)} min.") logger.info(f"Finished process in {round(math.ceil(time.time() - start_time) / 60, 2)} min.")
# Clean up the remaining bars. Have to close them in order.
status_bar.set_description_str('\x1b[2KDone!') # erase the status bar
status_bar.refresh()
playlist_bar.close()
status_bar.close()

View File

@ -21,6 +21,9 @@ def is_manager_lock_locked(lock) -> bool:
def download_video(args) -> dict: def download_video(args) -> dict:
# Sleep for a little bit to space out the rush of workers flooding the bar locks.
# time.sleep(random.randint(1, 20) / 1000)
def progress_hook(d): def progress_hook(d):
# downloaded_bytes and total_bytes can be None if the download hasn't started yet. # downloaded_bytes and total_bytes can be None if the download hasn't started yet.
if d['status'] == 'downloading' and d.get('downloaded_bytes') and d.get('total_bytes'): if d['status'] == 'downloading' and d.get('downloaded_bytes') and d.get('total_bytes'):
@ -45,7 +48,6 @@ def download_video(args) -> dict:
locked = False locked = False
# We're going to wait until a bar is available for us to use. # We're going to wait until a bar is available for us to use.
# TODO: this isn't perfect. Sometimes only 3/4 downloaders start and I think the final one had its lock stolen by someone else.
while not locked: while not locked:
for item in bars: for item in bars:
if not is_manager_lock_locked(item[1]): if not is_manager_lock_locked(item[1]):
@ -61,22 +63,22 @@ def download_video(args) -> dict:
output_dict = {'downloaded_video_id': None, 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} output_dict = {'downloaded_video_id': None, 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []}
start_time = time.time() start_time = time.time()
if video['id'] in download_archive: # if video['id'] in download_archive:
output_dict['logger_msg'].append(f"{video['id']} already downloaded.") # output_dict['logger_msg'].append(f"{video['id']} already downloaded.")
else: # else:
try: try:
error_code = yt_dlp(video['url']) # Do the download error_code = yt_dlp(video['url']) # Do the download
if not error_code: if not error_code:
download_archive.append(video['id']) download_archive.append(video['id'])
elapsed = round(math.ceil(time.time() - start_time) / 60, 2) elapsed = round(math.ceil(time.time() - start_time) / 60, 2)
output_dict['logger_msg'].append(f"{video['id']} downloaded in {elapsed} min.") output_dict['logger_msg'].append(f"{video['id']} downloaded in {elapsed} min.")
output_dict['downloaded_video_id'] = video['id'] output_dict['downloaded_video_id'] = video['id']
else: else:
m = f'Failed to download {video["id"]} {video["title"]}, error code: {error_code}' m = f'Failed to download {video["id"]} {video["title"]}, error code: {error_code}'
output_dict['status_msg'].append(m) output_dict['status_msg'].append(m)
output_dict['video_error_logger_msg'].append(m) output_dict['video_error_logger_msg'].append(m)
except Exception as e: except Exception as e:
output_dict['video_error_logger_msg'].append(f"Error on video {video['id']} '{video['title']}' -> {e}") output_dict['video_error_logger_msg'].append(f"Error on video {video['id']} '{video['title']}' -> {e}")
bar.close() bar.close()
bar_lock.release() bar_lock.release()
return output_dict return output_dict