Improve: downloaded video handling, terminal cleanup, threading

This commit is contained in:
Cyberes 2023-01-20 22:47:18 -07:00
parent 251d9f7b33
commit 8e9d2dc8e8
2 changed files with 66 additions and 44 deletions

View File

@ -1,6 +1,5 @@
#!/usr/bin/env python3
import argparse
import automated_youtube_dl.yt_dlp as ydl
import logging.config
import math
import os
@ -8,11 +7,14 @@ import re
import subprocess
import sys
import time
from automated_youtube_dl.files import create_directories, resolve_path
from multiprocessing import Manager, Pool, cpu_count
from tqdm.auto import tqdm
import automated_youtube_dl.yt_dlp as ydl
from automated_youtube_dl.files import create_directories, resolve_path
from process.funcs import restart_program, setup_file_logger
from process.threads import download_video
from tqdm.auto import tqdm
urlRegex = re.compile(
r'^(?:http|ftp)s?://' # http:// or https://
@ -181,29 +183,47 @@ for i, target_url in tqdm(enumerate(url_list), total=len(url_list), position=0,
playlist_bar.total = len(playlist['entries'])
playlist_bar.set_description(playlist['title'])
# Remove already downloaded files from the to-do list.
download_queue = []
for video in playlist['entries']:
if video['id'] not in download_archive:
download_queue.append(video)
else:
logger.info(f"{video['id']} already downloaded.")
playlist_bar.update(len(playlist['entries']) - len(download_queue))
if args.backwards:
playlist['entries'].reverse()
download_queue.reverse()
with Pool(processes=args.threads) as pool:
status_bar.set_description_str('=' * os.get_terminal_size()[0])
for result in pool.imap_unordered(download_video,
((video, {
'bars': video_bars,
'download_archive': download_archive,
'ydl_opts': thread_opts,
}) for video in playlist['entries'])):
if result['downloaded_video_id']:
download_archive_logger.info(result['downloaded_video_id'])
if len(result['video_error_logger_msg']):
for line in result['video_error_logger_msg']:
video_error_logger.info(line)
if len(result['status_msg']):
for line in result['status_msg']:
playlist_bar.write(f"{result['downloaded_video_id']}: {line}")
if len(result['logger_msg']):
for line in result['logger_msg']:
logger.info(line)
playlist_bar.update(1)
if len(download_queue): # Don't mess with multiprocessing if the list is empty
with Pool(processes=args.threads) as pool:
status_bar.set_description_str('=' * os.get_terminal_size()[0])
for result in pool.imap_unordered(download_video,
((video, {
'bars': video_bars,
'download_archive': download_archive,
'ydl_opts': thread_opts,
}) for video in download_queue)):
if result['downloaded_video_id']:
download_archive_logger.info(result['downloaded_video_id'])
if len(result['video_error_logger_msg']):
for line in result['video_error_logger_msg']:
video_error_logger.info(line)
if len(result['status_msg']):
for line in result['status_msg']:
playlist_bar.write(f"{result['downloaded_video_id']}: {line}")
if len(result['logger_msg']):
for line in result['logger_msg']:
logger.info(line)
playlist_bar.update()
else:
playlist_bar.write(f"All videos already downloaded for '{playlist['title']}'")
# playlist_bar.update(playlist_bar.total - playlist_bar.n)
logger.info(f"Finished item: '{playlist['title']}' {target_url}")
logger.info(f"Finished process in {round(math.ceil(time.time() - start_time) / 60, 2)} min.")
# Clean up the remaining bars. Have to close them in order.
status_bar.set_description_str('\x1b[2KDone!') # erase the status bar
status_bar.refresh()
playlist_bar.close()
status_bar.close()

View File

@ -21,6 +21,9 @@ def is_manager_lock_locked(lock) -> bool:
def download_video(args) -> dict:
# Sleep for a little bit to space out the rush of workers flooding the bar locks.
# time.sleep(random.randint(1, 20) / 1000)
def progress_hook(d):
# downloaded_bytes and total_bytes can be None if the download hasn't started yet.
if d['status'] == 'downloading' and d.get('downloaded_bytes') and d.get('total_bytes'):
@ -45,7 +48,6 @@ def download_video(args) -> dict:
locked = False
# We're going to wait until a bar is available for us to use.
# TODO: this isn't perfect. Sometimes only 3/4 downloaders start and I think the final one had its lock stolen by someone else.
while not locked:
for item in bars:
if not is_manager_lock_locked(item[1]):
@ -61,22 +63,22 @@ def download_video(args) -> dict:
output_dict = {'downloaded_video_id': None, 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []}
start_time = time.time()
if video['id'] in download_archive:
output_dict['logger_msg'].append(f"{video['id']} already downloaded.")
else:
try:
error_code = yt_dlp(video['url']) # Do the download
if not error_code:
download_archive.append(video['id'])
elapsed = round(math.ceil(time.time() - start_time) / 60, 2)
output_dict['logger_msg'].append(f"{video['id']} downloaded in {elapsed} min.")
output_dict['downloaded_video_id'] = video['id']
else:
m = f'Failed to download {video["id"]} {video["title"]}, error code: {error_code}'
output_dict['status_msg'].append(m)
output_dict['video_error_logger_msg'].append(m)
except Exception as e:
output_dict['video_error_logger_msg'].append(f"Error on video {video['id']} '{video['title']}' -> {e}")
bar.close()
bar_lock.release()
# if video['id'] in download_archive:
# output_dict['logger_msg'].append(f"{video['id']} already downloaded.")
# else:
try:
error_code = yt_dlp(video['url']) # Do the download
if not error_code:
download_archive.append(video['id'])
elapsed = round(math.ceil(time.time() - start_time) / 60, 2)
output_dict['logger_msg'].append(f"{video['id']} downloaded in {elapsed} min.")
output_dict['downloaded_video_id'] = video['id']
else:
m = f'Failed to download {video["id"]} {video["title"]}, error code: {error_code}'
output_dict['status_msg'].append(m)
output_dict['video_error_logger_msg'].append(m)
except Exception as e:
output_dict['video_error_logger_msg'].append(f"Error on video {video['id']} '{video['title']}' -> {e}")
bar.close()
bar_lock.release()
return output_dict