2023-01-20 21:42:36 -07:00
#!/usr/bin/env python3
import argparse
import logging . config
import math
import os
import re
2023-02-07 00:13:44 -07:00
import shutil
2023-01-20 21:42:36 -07:00
import subprocess
import sys
2023-02-07 00:13:44 -07:00
import tempfile
2023-01-20 21:42:36 -07:00
import time
from multiprocessing import Manager , Pool , cpu_count
2023-02-02 20:35:37 -07:00
from pathlib import Path
from threading import Thread
2023-01-20 22:47:18 -07:00
2023-02-02 20:35:37 -07:00
import yaml
from appdirs import user_data_dir
2023-01-20 22:47:18 -07:00
from tqdm . auto import tqdm
2023-01-21 18:19:03 -07:00
from process . funcs import get_silent_logger , remove_duplicates_from_playlist , restart_program , setup_file_logger
2023-05-25 18:16:04 -06:00
from process . threads import bar_eraser , download_video
2023-01-21 18:19:03 -07:00
from ydl . files import create_directories , resolve_path
2023-02-07 20:52:13 -07:00
from ydl . yt_dlp import YDL , update_ytdlp
2023-01-21 18:19:03 -07:00
2023-02-03 01:10:04 -07:00
def signal_handler ( sig , frame ) :
# TODO: https://www.g-loaded.eu/2016/11/24/how-to-terminate-running-python-threads-using-signals/
# raise ServiceExit
sys . exit ( 0 )
2023-05-12 15:33:04 -06:00
# signal.signal(signal.SIGTERM, signal_handler)
# signal.signal(signal.SIGINT, signal_handler)
2023-01-20 21:42:36 -07:00
2023-02-06 21:33:24 -07:00
url_regex = re . compile ( r ' ^(?:http|ftp)s?:// ' # http:// or https://
r ' (?:(?:[A-Z0-9](?:[A-Z0-9-] { 0,61}[A-Z0-9])? \ .)+(?:[A-Z] { 2,6} \ .?|[A-Z0-9-] { 2,} \ .?)| ' # domain...
r ' localhost| ' # localhost...
r ' \ d { 1,3} \ . \ d { 1,3} \ . \ d { 1,3} \ . \ d { 1,3}) ' # ...or ip
r ' (?:: \ d+)? ' # optional port
r ' (?:/?|[/?] \ S+)$ ' , re . IGNORECASE )
ansi_escape_regex = re . compile ( r ' \ x1B(?:[@-Z \\ -_]| \ [[0-?]*[ -/]*[@-~]) ' )
2023-01-20 21:42:36 -07:00
parser = argparse . ArgumentParser ( )
parser . add_argument ( ' file ' , help = ' URL to download or path of a file containing the URLs of the videos to download. ' )
2023-02-02 20:49:24 -07:00
parser . add_argument ( ' --output ' , required = False , help = ' Output directory. Ignored paths specified in a YAML file. ' )
2023-01-20 21:42:36 -07:00
parser . add_argument ( ' --no-update ' , ' -n ' , action = ' store_true ' , help = ' Don \' t update yt-dlp at launch. ' )
parser . add_argument ( ' --max-size ' , type = int , default = 1100 , help = ' Max allowed size of a video in MB. ' )
parser . add_argument ( ' --rm-cache ' , ' -r ' , action = ' store_true ' , help = ' Delete the yt-dlp cache on start. ' )
2023-05-12 15:33:04 -06:00
parser . add_argument ( ' --threads ' , type = int , default = ( cpu_count ( ) - 1 ) ,
2023-05-25 18:16:04 -06:00
help = f ' How many download processes to use. Default: number of CPU cores (for your machine: { cpu_count ( ) } ) - 1 = { cpu_count ( ) - 1 } ' )
2023-05-12 15:33:04 -06:00
parser . add_argument ( ' --daemon ' , ' -d ' , action = ' store_true ' ,
help = " Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep. " )
2023-01-21 18:19:03 -07:00
parser . add_argument ( ' --sleep ' , type = float , default = 60 , help = ' How many minutes to sleep when in daemon mode. ' )
2023-05-12 15:33:04 -06:00
parser . add_argument ( ' --download-cache-file-directory ' , default = user_data_dir ( ' automated-youtube-dl ' , ' cyberes ' ) ,
help = ' The path to the directory to track downloaded videos. Defaults to your appdata path. ' )
parser . add_argument ( ' --silence-errors ' , ' -s ' , action = ' store_true ' ,
help = " Don ' t print any error messages to the console. " )
parser . add_argument ( ' --ignore-downloaded ' , ' -i ' , action = ' store_true ' ,
help = ' Ignore videos that have been already downloaded and disable checks. Let youtube-dl handle everything. ' )
2023-02-02 20:35:37 -07:00
parser . add_argument ( ' --erase-downloaded-tracker ' , ' -e ' , action = ' store_true ' , help = ' Erase the tracked video file. ' )
2023-02-06 23:07:20 -07:00
parser . add_argument ( ' --ratelimit-sleep ' , type = int , default = 5 ,
help = ' How many seconds to sleep between items to prevent rate-limiting. Does not affect time between videos as you should be fine since it takes a few seconds to merge everything and clean up. ' )
2023-05-12 15:33:04 -06:00
parser . add_argument ( ' --input-datatype ' , choices = [ ' auto ' , ' txt ' , ' yaml ' ] , default = ' auto ' ,
help = ' The datatype of the input file. If set to auto, the file will be scanned for a URL on the first line. '
' If is a URL, the filetype will be set to txt. If it is a key: value pair then the filetype will be set to yaml. ' )
2023-02-02 20:49:52 -07:00
parser . add_argument ( ' --log-dir ' , default = None , help = ' Where to store the logs. Must be set when --output is not. ' )
2023-02-03 01:10:04 -07:00
parser . add_argument ( ' --verbose ' , ' -v ' , action = ' store_true ' )
2023-02-07 20:52:13 -07:00
parser . add_argument ( ' --verify ' , ' -z ' , action = ' store_true ' , help = ' Run ffprobe on the downloaded files. ' )
2023-01-20 21:42:36 -07:00
args = parser . parse_args ( )
if args . threads < = 0 :
print ( " Can ' t have 0 threads! " )
sys . exit ( 1 )
2023-02-02 20:49:24 -07:00
if args . output :
args . output = resolve_path ( args . output )
if args . log_dir :
args . log_dir = resolve_path ( args . log_dir )
elif not args . output and not args . log_dir :
2023-02-06 21:35:57 -07:00
args . log_dir = resolve_path ( Path ( os . getcwd ( ) , ' automated-youtube-dl_logs ' ) )
2023-02-06 21:35:00 -07:00
# print('Must set --log-dir when --output is not.')
# sys.exit(1)
2023-02-02 20:49:24 -07:00
else :
args . log_dir = args . output / ' logs '
2023-02-02 20:35:37 -07:00
args . download_cache_file_directory = resolve_path ( args . download_cache_file_directory )
2023-02-03 01:10:04 -07:00
# TODO: use logging for this
if args . verbose :
print ( ' Cache directory: ' , args . download_cache_file_directory )
2023-01-20 21:42:36 -07:00
log_time = time . time ( )
2023-02-07 13:15:59 -07:00
def load_input_file ( ) :
2023-05-25 18:16:04 -06:00
"""
Get the URLs of the videos to download . Is the input a URL or file ?
"""
2023-02-07 13:15:59 -07:00
url_list = { }
if not re . match ( url_regex , str ( args . file ) ) or args . input_datatype in ( ' txt ' , ' yaml ' ) :
args . file = resolve_path ( args . file )
if not args . file . exists ( ) :
print ( ' Input file does not exist: ' , args . file )
2023-02-02 20:49:24 -07:00
sys . exit ( 1 )
2023-02-07 13:15:59 -07:00
input_file = [ x . strip ( ) . strip ( ' \n ' ) for x in list ( args . file . open ( ) ) ]
if args . input_datatype == ' yaml ' or ( re . match ( r ' ^.*?: \ w* ' , input_file [ 0 ] ) and args . input_datatype == ' auto ' ) :
with open ( args . file , ' r ' ) as file :
try :
url_list = yaml . safe_load ( file )
except yaml . YAMLError as e :
print ( ' Failed to load config file, error: ' , e )
sys . exit ( 1 )
elif args . input_datatype == ' txt ' or ( re . match ( url_regex , input_file [ 0 ] ) and args . input_datatype == ' auto ' ) :
if not args . output :
args . output = resolve_path ( Path ( os . getcwd ( ) , ' automated-youtube-dl_output ' ) )
# print('You must specify an output path with --output when the input datatype is a text file.')
# sys.exit(1)
url_list [ str ( args . output ) ] = input_file
else :
print ( ' Unknown file type: ' , args . input_datatype )
print ( input_file )
sys . exit ( 1 )
del input_file # release file object
# Verify each line in the file is a valid URL.
2023-05-06 18:24:47 -06:00
# Also resolve the paths
resolved_paths = { }
2023-02-07 13:15:59 -07:00
for directory , urls in url_list . items ( ) :
for item in urls :
if not re . match ( url_regex , str ( item ) ) :
print ( f ' Not a url: ' , item )
sys . exit ( 1 )
2023-05-06 18:24:47 -06:00
resolved_paths [ resolve_path ( directory ) ] = urls
url_list = resolved_paths
2023-02-02 20:35:37 -07:00
else :
2023-05-25 18:16:04 -06:00
# They gave us just a URL
2023-02-07 13:15:59 -07:00
if not args . output :
2023-05-25 18:16:04 -06:00
# Set a default path
2023-02-07 13:15:59 -07:00
args . output = resolve_path ( Path ( os . getcwd ( ) , ' automated-youtube-dl_output ' ) )
# print('You must specify an output path with --output when the input is a URL.')
# sys.exit(1)
url_list [ str ( args . output ) ] = [ args . file ]
return url_list
url_list = load_input_file ( )
2023-02-02 20:35:37 -07:00
# Create directories AFTER loading the file
create_directories ( * url_list . keys ( ) , args . download_cache_file_directory )
2023-01-20 21:42:36 -07:00
2023-02-01 13:00:48 -07:00
def do_update ( ) :
if not args . no_update :
2023-05-06 18:24:47 -06:00
print ( ' Updating yt-dlp... ' )
2023-02-06 23:14:02 -07:00
updated = update_ytdlp ( )
2023-02-01 13:00:48 -07:00
if updated :
print ( ' Restarting program... ' )
restart_program ( )
2023-02-03 01:10:04 -07:00
else :
print ( ' Up to date. ' )
2023-02-01 13:00:48 -07:00
2023-01-20 21:42:36 -07:00
if args . rm_cache :
subprocess . run ( ' yt-dlp --rm-cache-dir ' , shell = True )
2023-02-06 21:33:24 -07:00
# TODO: compress old log files
2023-01-21 18:19:03 -07:00
if args . daemon :
print ( ' Running in daemon mode. ' )
2023-02-02 20:49:24 -07:00
create_directories ( args . log_dir )
2023-01-20 21:42:36 -07:00
2023-01-22 12:40:46 -07:00
# TODO: log file rotation https://www.blog.pythonlibrary.org/2014/02/11/python-how-to-create-rotating-logs/
# TODO: log to one file instead of one for each run
2023-02-02 21:25:57 -07:00
file_logger = setup_file_logger ( ' youtube_dl ' , args . log_dir / f ' { str ( int ( log_time ) ) } .log ' , level = logging . INFO )
2023-02-06 21:33:24 -07:00
video_error_logger = setup_file_logger ( ' video_errors ' , args . log_dir / f ' { int ( log_time ) } -errors.log ' , level = logging . INFO )
2023-01-21 18:19:03 -07:00
logger = get_silent_logger ( ' yt-dl ' , silent = not args . daemon )
def log_info_twice ( msg ) :
logger . info ( msg )
2023-02-06 21:33:24 -07:00
file_logger . info ( ansi_escape_regex . sub ( ' ' , msg ) )
2023-01-21 18:19:03 -07:00
2023-01-20 21:42:36 -07:00
2023-01-21 18:19:03 -07:00
log_info_twice ( ' Starting process. ' )
2023-01-20 21:42:36 -07:00
start_time = time . time ( )
manager = Manager ( )
2023-01-21 18:19:03 -07:00
def load_existing_videos ( ) :
# Find existing videos.
output = set ( )
if not download_archive_file . exists ( ) :
download_archive_file . touch ( )
with open ( download_archive_file , ' r ' ) as file :
output . update ( ( [ line . rstrip ( ) for line in file ] ) )
2023-02-07 00:13:44 -07:00
# Remove duplicate lines.
# Something may have gone wrong in the past so we want to make sure everything is cleaned up.
with open ( download_archive_file ) as file :
uniqlines = set ( file . readlines ( ) )
fd , path = tempfile . mkstemp ( )
with os . fdopen ( fd , ' w ' ) as tmp :
tmp . writelines ( set ( uniqlines ) )
shutil . move ( path , download_archive_file )
2023-01-21 18:19:03 -07:00
return output
2023-02-02 20:35:37 -07:00
status_bar = tqdm ( position = 2 , bar_format = ' {desc} ' , disable = args . daemon , leave = False )
2023-01-20 21:42:36 -07:00
2023-02-06 21:33:24 -07:00
def log_bar ( log_msg , level ) :
status_bar . write ( f ' [ { level } ] { log_msg } ' )
2023-01-21 18:19:03 -07:00
if level == ' warning ' :
2023-02-06 21:33:24 -07:00
logger . warning ( log_msg )
2023-01-21 18:19:03 -07:00
elif level == ' error ' :
2023-02-06 21:33:24 -07:00
logger . error ( log_msg )
2023-01-21 18:19:03 -07:00
else :
2023-02-06 21:33:24 -07:00
logger . info ( log_msg )
# def log_with_video_id(log_msg, video_id, level, logger_obj):
# log_msg = f'{video_id} - {log_msg}'
# if level == 'warning':
# logger_obj.warning(log_msg)
# elif level == 'error':
# logger_obj.error(log_msg)
# else:
# logger_obj.info(log_msg)
2023-01-20 21:42:36 -07:00
def print_without_paths ( msg ) :
"""
Remove any filepaths or other stuff we don ' t want in the message.
"""
m = re . match ( r ' (^[^ \ /]+(?: \\ .[^ \ /]*)*) ' , msg )
if m :
msg = m . group ( 1 )
m1 = re . match ( r ' ^(.*?): ' , msg )
2023-01-21 18:19:03 -07:00
msg = msg . strip ( ' to " ' ) . strip ( ' to: ' ) . strip ( )
if args . daemon :
log_info_twice ( msg )
else :
status_bar . set_description_str ( msg )
2023-01-20 21:42:36 -07:00
class ytdl_logger ( object ) :
def debug ( self , msg ) :
2023-02-06 21:33:24 -07:00
file_logger . debug ( self . __clean_msg ( msg ) )
2023-01-20 21:42:36 -07:00
# if msg.startswith('[debug] '):
# pass
if ' [download] ' not in msg :
print_without_paths ( msg )
def info ( self , msg ) :
2023-02-06 21:33:24 -07:00
file_logger . info ( self . __clean_msg ( msg ) )
2023-01-20 21:42:36 -07:00
print_without_paths ( msg )
def warning ( self , msg ) :
2023-02-06 21:33:24 -07:00
file_logger . warning ( self . __clean_msg ( msg ) )
2023-02-10 13:15:28 -07:00
if args . daemon :
logger . warning ( msg )
else :
status_bar . write ( msg )
2023-01-20 21:42:36 -07:00
def error ( self , msg ) :
2023-02-06 21:33:24 -07:00
file_logger . error ( self . __clean_msg ( msg ) )
2023-02-10 13:15:28 -07:00
if args . daemon :
logger . error ( msg )
else :
status_bar . write ( msg )
2023-01-20 21:42:36 -07:00
2023-02-06 21:33:24 -07:00
def __clean_msg ( self , msg ) :
return ansi_escape_regex . sub ( ' ' , msg )
# TODO: https://github.com/TheFrenchGhosty/TheFrenchGhostys-Ultimate-YouTube-DL-Scripts-Collection/blob/master/docs/Scripts-Type.md#archivist-scripts
2023-01-20 21:42:36 -07:00
# https://github.com/yt-dlp/yt-dlp#embedding-examples
ydl_opts = {
2023-02-06 21:33:24 -07:00
# TODO: https://github.com/TheFrenchGhosty/TheFrenchGhostys-Ultimate-YouTube-DL-Scripts-Collection/blob/master/docs/Details.md
# https://old.reddit.com/r/DataHoarder/comments/c6fh4x/after_hoarding_over_50k_youtube_videos_here_is/
2023-01-20 21:42:36 -07:00
' format ' : f ' (bestvideo[filesize< { args . max_size } M][vcodec^=av01][height>=1080][fps>30]/bestvideo[filesize< { args . max_size } M][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[filesize< { args . max_size } M][vcodec=vp9][height>=1080][fps>30]/bestvideo[filesize< { args . max_size } M][vcodec^=av01][height>=1080]/bestvideo[filesize< { args . max_size } M][vcodec=vp9.2][height>=1080]/bestvideo[filesize< { args . max_size } M][vcodec=vp9][height>=1080]/bestvideo[filesize< { args . max_size } M][height>=1080]/bestvideo[filesize< { args . max_size } M][vcodec^=av01][height>=720][fps>30]/bestvideo[filesize< { args . max_size } M][vcodec=vp9.2][height>=720][fps>30]/bestvideo[filesize< { args . max_size } M][vcodec=vp9][height>=720][fps>30]/bestvideo[filesize< { args . max_size } M][vcodec^=av01][height>=720]/bestvideo[filesize< { args . max_size } M][vcodec=vp9.2][height>=720]/bestvideo[filesize< { args . max_size } M][vcodec=vp9][height>=720]/bestvideo[filesize< { args . max_size } M][height>=720]/bestvideo[filesize< { args . max_size } M])+(bestaudio[acodec=opus]/bestaudio)/best ' ,
' merge_output_format ' : ' mkv ' ,
' logtostderr ' : True ,
' embedchapters ' : True ,
2023-05-12 15:33:04 -06:00
' writethumbnail ' : True ,
# Save the thumbnail to a file. Embedding seems to be broken right now so this is an alternative.
2023-01-20 21:42:36 -07:00
' embedthumbnail ' : True ,
2023-01-21 18:19:03 -07:00
' embeddescription ' : True ,
2023-01-20 21:42:36 -07:00
' writesubtitles ' : True ,
# 'allsubtitles': True, # Download every language.
' subtitlesformat ' : ' vtt ' ,
' subtitleslangs ' : [ ' en ' ] ,
' writeautomaticsub ' : True ,
2023-02-01 13:00:48 -07:00
' writedescription ' : True ,
2023-01-21 18:19:03 -07:00
' ignoreerrors ' : True ,
' continuedl ' : False ,
' addmetadata ' : True ,
' writeinfojson ' : True ,
2023-02-06 23:01:47 -07:00
' verbose ' : args . verbose ,
2023-01-20 21:42:36 -07:00
' postprocessors ' : [
{ ' key ' : ' FFmpegEmbedSubtitle ' } ,
{ ' key ' : ' FFmpegMetadata ' , ' add_metadata ' : True } ,
{ ' key ' : ' EmbedThumbnail ' , ' already_have_thumbnail ' : True } ,
2023-02-03 01:10:04 -07:00
{ ' key ' : ' FFmpegThumbnailsConvertor ' , ' format ' : ' jpg ' , ' when ' : ' before_dl ' } ,
2023-01-21 18:19:03 -07:00
# {'key': 'FFmpegSubtitlesConvertor', 'format': 'srt'}
2023-01-20 21:42:36 -07:00
] ,
2023-02-03 01:10:04 -07:00
# 'external_downloader': 'aria2c',
# 'external_downloader_args': ['-j 32', '-s 32', '-x 16', '--file-allocation=none', '--optimize-concurrent-downloads=true', '--http-accept-gzip=true', '--continue=true'],
2023-01-20 21:42:36 -07:00
}
2023-02-06 23:01:47 -07:00
yt_dlp = YDL ( dict ( ydl_opts , * * { ' logger ' : ytdl_logger ( ) } ) )
2023-01-20 21:42:36 -07:00
2023-02-02 20:35:37 -07:00
url_count = 0
for k , v in url_list . items ( ) :
for item in v :
url_count + = 1
2023-01-20 21:42:36 -07:00
# Init bars
video_bars = manager . list ( )
2023-01-21 18:19:03 -07:00
if not args . daemon :
for i in range ( args . threads ) :
2023-02-02 20:35:37 -07:00
video_bars . append ( [ 3 + i , manager . Lock ( ) ] )
2023-01-21 18:19:03 -07:00
2023-02-02 20:35:37 -07:00
encountered_errors = 0
errored_videos = 0
2023-01-21 18:19:03 -07:00
2023-05-12 15:33:04 -06:00
# The video progress bars have an issue where when a bar is closed it
# will shift its position back 1 then return to the correct position.
2023-02-02 20:35:37 -07:00
# This thread will clear empty spots.
if not args . daemon :
eraser_exit = manager . Value ( bool , False )
Thread ( target = bar_eraser , args = ( video_bars , eraser_exit , ) ) . start ( )
2023-01-21 18:19:03 -07:00
2023-02-03 01:10:04 -07:00
already_erased_downloaded_tracker = False
2023-02-02 20:35:37 -07:00
while True :
2023-05-06 18:24:47 -06:00
# do_update() # this doesn't work very well. freezes
2023-05-12 15:33:04 -06:00
progress_bar = tqdm ( total = url_count , position = 0 , desc = ' Inputs ' , disable = args . daemon ,
bar_format = ' {l_bar} {bar} | {n_fmt} / {total_fmt} ' )
2023-02-02 20:35:37 -07:00
for output_path , urls in url_list . items ( ) :
for target_url in urls :
logger . info ( ' Fetching playlist... ' )
playlist = yt_dlp . playlist_contents ( str ( target_url ) )
2023-02-03 01:10:04 -07:00
2023-02-02 20:35:37 -07:00
if not playlist :
progress_bar . update ( )
continue
2023-02-07 13:15:59 -07:00
url_list = load_input_file ( )
2023-02-02 20:35:37 -07:00
download_archive_file = args . download_cache_file_directory / ( str ( playlist [ ' id ' ] ) + ' .log ' )
2023-02-03 01:10:04 -07:00
if args . erase_downloaded_tracker and not already_erased_downloaded_tracker :
2023-02-02 20:35:37 -07:00
if download_archive_file . exists ( ) :
os . remove ( download_archive_file )
2023-02-03 01:10:04 -07:00
already_erased_downloaded_tracker = True
2023-02-02 20:35:37 -07:00
downloaded_videos = load_existing_videos ( )
msg = f ' Found { len ( downloaded_videos ) } downloaded videos for playlist " { playlist [ " title " ] } " ( { playlist [ " id " ] } ). { " Ignoring. " if args . ignore_downloaded else " " } '
if args . daemon :
2023-02-06 23:20:28 -07:00
logger . info ( msg )
2023-02-02 20:35:37 -07:00
else :
2023-02-06 23:01:47 -07:00
progress_bar . write ( msg )
2023-05-12 15:33:04 -06:00
download_archive_logger = setup_file_logger ( ' download_archive ' , download_archive_file ,
format_str = ' %(message)s ' )
2023-02-02 20:35:37 -07:00
playlist [ ' entries ' ] = remove_duplicates_from_playlist ( playlist [ ' entries ' ] )
log_info_twice ( f ' Downloading item: " { playlist [ " title " ] } " ( { playlist [ " id " ] } ) { target_url } ' )
# Remove already downloaded files from the to-do list.
2023-02-01 13:06:11 -07:00
download_queue = [ ]
for p , video in enumerate ( playlist [ ' entries ' ] ) :
2023-02-02 20:35:37 -07:00
if video [ ' id ' ] not in download_queue :
if not args . ignore_downloaded and video [ ' id ' ] not in downloaded_videos :
download_queue . append ( video )
# downloaded_videos.add(video['id'])
elif args . ignore_downloaded :
download_queue . append ( video )
2023-05-12 15:33:04 -06:00
playlist_bar = tqdm ( total = len ( playlist [ ' entries ' ] ) , position = 1 ,
desc = f ' " { playlist [ " title " ] } " ( { playlist [ " id " ] } ) ' , disable = args . daemon , leave = False )
2023-02-02 20:35:37 -07:00
if not args . ignore_downloaded :
playlist_bar . update ( len ( downloaded_videos ) )
2023-02-02 20:49:24 -07:00
playlist_ydl_opts = ydl_opts . copy ( )
2023-02-07 19:45:11 -07:00
# playlist_ydl_opts['outtmpl'] = f'{output_path}/{get_output_templ()}'
2023-05-06 23:47:50 -06:00
2023-02-02 20:35:37 -07:00
if len ( download_queue ) : # Don't mess with multiprocessing if all videos are already downloaded
with Pool ( processes = args . threads ) as pool :
if sys . stdout . isatty ( ) :
# Doesn't work if not connected to a terminal:
# OSError: [Errno 25] Inappropriate ioctl for device
status_bar . set_description_str ( ' = ' * os . get_terminal_size ( ) [ 0 ] )
logger . info ( ' Starting downloads... ' )
2023-02-07 20:52:13 -07:00
for result in pool . imap_unordered ( download_video ,
2023-05-12 15:33:04 -06:00
( ( video , {
' bars ' : video_bars ,
' ydl_opts ' : playlist_ydl_opts ,
' output_dir ' : Path ( output_path ) ,
' ignore_downloaded ' : args . ignore_downloaded ,
' verify ' : args . verify
} ) for video in download_queue ) ) :
2023-02-02 20:35:37 -07:00
# Save the video ID to the file
if result [ ' downloaded_video_id ' ] :
download_archive_logger . info ( result [ ' downloaded_video_id ' ] )
2023-02-07 19:31:38 -07:00
# Print short error messages.
# An error should never be added to both video_critical_err_msg_short and video_critical_err_msg.
for line in result [ ' video_critical_err_msg_short ' ] :
# file_msg = f"{result['video_id']} - {ansi_escape_regex.sub('', line)}"
# term_msg = f"{result['video_id']} - {line}"
msg = f " { result [ ' video_id ' ] } - { line } "
video_error_logger . error ( msg )
file_logger . error ( msg )
encountered_errors + = 1
if args . daemon :
logger . error ( msg )
else :
status_bar . write ( msg )
# Print longer error messages.
# Won't print anything to console if the silence_errors arg is set.
for line in result [ ' video_critical_err_msg ' ] :
# file_msg = f"{result['video_id']} - {ansi_escape_regex.sub('', line)}"
# term_msg = f"{result['video_id']} - {line}"
msg = f " { result [ ' video_id ' ] } - { line } "
video_error_logger . error ( msg )
file_logger . error ( msg )
2023-02-02 20:35:37 -07:00
encountered_errors + = 1
if not args . silence_errors :
if args . daemon :
2023-02-07 19:31:38 -07:00
logger . error ( msg )
2023-02-02 20:35:37 -07:00
else :
2023-02-07 19:31:38 -07:00
status_bar . write ( msg )
2023-02-02 20:35:37 -07:00
2023-02-07 19:31:38 -07:00
# if len(result['video_critical_err_msg']):
# errored_videos += 1
# if args.silence_errors and args.daemon:
# logger.error(f"{result['video_id']} - Failed due to error.")
2023-02-02 20:35:37 -07:00
for line in result [ ' logger_msg ' ] :
2023-02-07 19:50:45 -07:00
log_info_twice ( f " { result [ ' video_id ' ] } - { line } " )
2023-02-07 20:52:13 -07:00
# TODO: if no error launch a verify multiprocess
# if kwargs['verify']:
# try:
# info = yt_dlp.extract_info(video['url'])
# except Exception as e:
# output_dict['video_critical_err_msg'].append(f'Failed to verify video, extract_info failed: {e}')
# file_path = base_path + info['ext']
# result = ffprobe(file_path)
# if not result[0]:
# output_dict['video_critical_err_msg'].append(f'Failed to verify video: {result[4]}')
2023-02-02 20:35:37 -07:00
playlist_bar . update ( )
else :
2023-05-25 18:16:04 -06:00
msg = f " All videos already downloaded for \" { playlist [ ' title ' ] } \" . "
2023-02-07 20:52:13 -07:00
if args . daemon :
logger . info ( msg )
else :
status_bar . write ( msg )
2023-02-02 20:35:37 -07:00
log_info_twice ( f " Finished item: ' { playlist [ ' title ' ] } ' { target_url } " )
# Sleep a bit to prevent rate-limiting
if progress_bar . n < len ( url_list . keys ( ) ) - 1 :
status_bar . set_description_str ( f ' Sleeping { args . ratelimit_sleep } s... ' )
time . sleep ( args . ratelimit_sleep )
progress_bar . update ( )
error_msg = f ' Encountered { encountered_errors } errors on { errored_videos } videos. '
if args . daemon :
logger . info ( error_msg )
else :
status_bar . write ( error_msg )
2023-01-21 18:19:03 -07:00
log_info_twice ( f " Finished process in { round ( math . ceil ( time . time ( ) - start_time ) / 60 , 2 ) } min. " )
if not args . daemon :
break
2023-01-20 22:47:18 -07:00
else :
2023-01-21 18:19:03 -07:00
logger . info ( f ' Sleeping for { args . sleep } min. ' )
try :
time . sleep ( args . sleep * 60 )
except KeyboardInterrupt :
2023-02-03 01:10:04 -07:00
sys . exit ( 0 )
2023-02-02 20:35:37 -07:00
# downloaded_videos = load_existing_videos() # reload the videos that have already been downloaded
2023-01-20 22:47:18 -07:00
# Clean up the remaining bars. Have to close them in order.
2023-02-06 21:33:24 -07:00
# These variables may be undefined so we will just ignore any errors.
# Not in one try/catch because we don't want to skip anything.
try :
eraser_exit . value = True
except NameError :
pass
except AttributeError :
pass
try :
playlist_bar . close ( )
except NameError :
pass
except AttributeError :
pass
try :
status_bar . close ( )
except NameError :
pass
except AttributeError :
pass