better config file, better printing
This commit is contained in:
parent
915d7f89f0
commit
cc29f29ae9
|
@ -1,4 +1,6 @@
|
||||||
.idea
|
.idea
|
||||||
|
targets.*
|
||||||
|
!targets.sample.*
|
||||||
|
|
||||||
# ---> Python
|
# ---> Python
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
|
|
21
README.md
21
README.md
|
@ -53,17 +53,18 @@ Output Directory/
|
||||||
Videos will be saved using this name format:
|
Videos will be saved using this name format:
|
||||||
|
|
||||||
```
|
```
|
||||||
%(title)s --- %(uploader)s --- %(uploader_id)s --- %(id)s
|
[%(id)s] [%(title)s] [%(uploader)s] [%(uploader_id)s]
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Arguments
|
#### Arguments
|
||||||
|
|
||||||
| Argument | Flag | Help |
|
| Argument | Flag | Help |
|
||||||
| ------------- | ---- | ------------------------------------------------------------ |
|
| --------------------- | ---- | ------------------------------------------------------------ |
|
||||||
| `--no-update` | `-n` | Don\'t update yt-dlp at launch. |
|
| `--no-update` | `-n` | Don\'t update yt-dlp at launch. |
|
||||||
| `--max-size` | | Max allowed size of a video in MB. Default: 1100. |
|
| `--max-size` | | Max allowed size of a video in MB. Default: 1100. |
|
||||||
| `--rm-cache` | `-r` | Delete the yt-dlp cache on start. |
|
| `--rm-cache` | `-r` | Delete the yt-dlp cache on start. |
|
||||||
| `--threads` | | How many download processes to use (threads). Default is how many CPU cores you have. You will want to find a good value that doesn't overload your connection. |
|
| `--threads` | | How many download processes to use (threads). Default is how many CPU cores you have. You will want to find a good value that doesn't overload your connection. |
|
||||||
| `--daemon` | `-d` | Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep. |
|
| `--daemon` | `-d` | Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep. |
|
||||||
| `--sleep` | | How many minutes to sleep when in daemon mode. |
|
| `--sleep` | | How many minutes to sleep when in daemon mode. |
|
||||||
| `--silent` | `-s` | Don't print any error messages to the console. |
|
| `--silent` | `-s` | Don't print any error messages to the console. |
|
||||||
|
| `--ignore-downloaded` | `-i` | Ignore videos that have been already downloaded and let youtube-dl handle everything. Videos will not be re-downloaded, but metadata will be updated. |
|
237
downloader.py
237
downloader.py
|
@ -8,36 +8,44 @@ import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from multiprocessing import Manager, Pool, cpu_count
|
from multiprocessing import Manager, Pool, cpu_count
|
||||||
|
from pathlib import Path
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from appdirs import user_data_dir
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
import ydl.yt_dlp as ydl
|
import ydl.yt_dlp as ydl
|
||||||
from process.funcs import get_silent_logger, remove_duplicates_from_playlist, restart_program, setup_file_logger
|
from process.funcs import get_silent_logger, remove_duplicates_from_playlist, restart_program, setup_file_logger
|
||||||
from process.threads import download_video
|
from process.threads import bar_eraser, download_video
|
||||||
from ydl.files import create_directories, resolve_path
|
from ydl.files import create_directories, resolve_path
|
||||||
|
|
||||||
# logging.basicConfig(level=1000)
|
# logging.basicConfig(level=1000)
|
||||||
# logging.getLogger().setLevel(1000)
|
# logging.getLogger().setLevel(1000)
|
||||||
|
|
||||||
urlRegex = re.compile(
|
urlRegex = re.compile(r'^(?:http|ftp)s?://' # http:// or https://
|
||||||
r'^(?:http|ftp)s?://' # http:// or https://
|
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
|
||||||
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
|
r'localhost|' # localhost...
|
||||||
r'localhost|' # localhost...
|
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
|
||||||
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
|
r'(?::\d+)?' # optional port
|
||||||
r'(?::\d+)?' # optional port
|
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
|
||||||
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('file', help='URL to download or path of a file containing the URLs of the videos to download.')
|
parser.add_argument('file', help='URL to download or path of a file containing the URLs of the videos to download.')
|
||||||
parser.add_argument('output', help='Output directory.')
|
parser.add_argument('output', help='Output directory. Ignored paths specified in a YAML file.')
|
||||||
parser.add_argument('--no-update', '-n', action='store_true', help='Don\'t update yt-dlp at launch.')
|
parser.add_argument('--no-update', '-n', action='store_true', help='Don\'t update yt-dlp at launch.')
|
||||||
parser.add_argument('--max-size', type=int, default=1100, help='Max allowed size of a video in MB.')
|
parser.add_argument('--max-size', type=int, default=1100, help='Max allowed size of a video in MB.')
|
||||||
parser.add_argument('--rm-cache', '-r', action='store_true', help='Delete the yt-dlp cache on start.')
|
parser.add_argument('--rm-cache', '-r', action='store_true', help='Delete the yt-dlp cache on start.')
|
||||||
parser.add_argument('--threads', type=int, default=cpu_count(), help='How many download processes to use.')
|
parser.add_argument('--threads', type=int, default=cpu_count(), help='How many download processes to use.')
|
||||||
parser.add_argument('--daemon', '-d', action='store_true', help="Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep.")
|
parser.add_argument('--daemon', '-d', action='store_true', help="Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep.")
|
||||||
parser.add_argument('--sleep', type=float, default=60, help='How many minutes to sleep when in daemon mode.')
|
parser.add_argument('--sleep', type=float, default=60, help='How many minutes to sleep when in daemon mode.')
|
||||||
|
parser.add_argument('--download-cache-file-directory', default=user_data_dir('automated-youtube-dl', 'cyberes'), help='The path to the directory to track downloaded videos. Defaults to your appdata path.')
|
||||||
parser.add_argument('--silence-errors', '-s', action='store_true', help="Don't print any error messages to the console.")
|
parser.add_argument('--silence-errors', '-s', action='store_true', help="Don't print any error messages to the console.")
|
||||||
parser.add_argument('--ignore-downloaded', '-i', action='store_true', help='Ignore videos that have been already downloaded and let YouTubeDL handle everything.')
|
parser.add_argument('--ignore-downloaded', '-i', action='store_true', help='Ignore videos that have been already downloaded and let youtube-dl handle everything.')
|
||||||
|
parser.add_argument('--erase-downloaded-tracker', '-e', action='store_true', help='Erase the tracked video file.')
|
||||||
|
parser.add_argument('--ratelimit-sleep', type=int, default=5, help='How many seconds to sleep to prevent rate-limiting.')
|
||||||
|
parser.add_argument('--input-datatype', choices=['auto', 'txt', 'yaml'], default='auto', help='The datatype of the input file. If set to auto, the file will be scanned for a URL on the firstline.'
|
||||||
|
'If is a URL, the filetype will be set to txt. If it is a key: value pair then the filetype will be set to yaml.')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.threads <= 0:
|
if args.threads <= 0:
|
||||||
|
@ -45,22 +53,42 @@ if args.threads <= 0:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
args.output = resolve_path(args.output)
|
args.output = resolve_path(args.output)
|
||||||
|
args.download_cache_file_directory = resolve_path(args.download_cache_file_directory)
|
||||||
log_time = time.time()
|
log_time = time.time()
|
||||||
|
|
||||||
# Get the URLs of the videos to download. Is the input a URL or file?
|
# Get the URLs of the videos to download. Is the input a URL or file?
|
||||||
if not re.match(urlRegex, str(args.file)):
|
url_list = {}
|
||||||
|
if not re.match(urlRegex, str(args.file)) or args.input_datatype in ('txt', 'yaml'):
|
||||||
args.file = resolve_path(args.file)
|
args.file = resolve_path(args.file)
|
||||||
if not args.file.exists():
|
if not args.file.exists():
|
||||||
print('Input file does not exist:', args.file)
|
print('Input file does not exist:', args.file)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
url_list = [x.strip().strip('\n') for x in list(args.file.open())]
|
input_file = [x.strip().strip('\n') for x in list(args.file.open())]
|
||||||
|
if args.input_datatype == 'yaml' or (re.match(r'^.*?:\w*', input_file[0]) and args.input_datatype == 'auto'):
|
||||||
|
with open(args.file, 'r') as file:
|
||||||
|
try:
|
||||||
|
url_list = yaml.safe_load(file)
|
||||||
|
except yaml.YAMLError as e:
|
||||||
|
print('Failed to load config file, error:', e)
|
||||||
|
sys.exit(1)
|
||||||
|
elif args.input_datatype == 'txt' or (re.match(urlRegex, input_file[0]) and args.input_datatype == 'auto'):
|
||||||
|
url_list[str(args.output)] = input_file
|
||||||
|
else:
|
||||||
|
print('Unknown file type:', args.input_datatype)
|
||||||
|
print(input_file)
|
||||||
|
sys.exit(1)
|
||||||
|
del input_file # release file object
|
||||||
# Verify each line in the file is a valid URL.
|
# Verify each line in the file is a valid URL.
|
||||||
for i, line in enumerate(url_list):
|
for directory, urls in url_list.items():
|
||||||
if not re.match(urlRegex, line):
|
for item in urls:
|
||||||
print(f'Line {i} not a url:', line)
|
if not re.match(urlRegex, str(item)):
|
||||||
sys.exit(1)
|
print(f'Not a url:', item)
|
||||||
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
url_list = [args.file]
|
url_list[str(args.output)] = [args.file]
|
||||||
|
|
||||||
|
# Create directories AFTER loading the file
|
||||||
|
create_directories(*url_list.keys(), args.download_cache_file_directory)
|
||||||
|
|
||||||
|
|
||||||
def do_update():
|
def do_update():
|
||||||
|
@ -98,8 +126,6 @@ start_time = time.time()
|
||||||
|
|
||||||
manager = Manager()
|
manager = Manager()
|
||||||
|
|
||||||
download_archive_file = args.output / 'download-archive.log'
|
|
||||||
|
|
||||||
|
|
||||||
def load_existing_videos():
|
def load_existing_videos():
|
||||||
# Find existing videos.
|
# Find existing videos.
|
||||||
|
@ -111,13 +137,7 @@ def load_existing_videos():
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
downloaded_videos = load_existing_videos()
|
status_bar = tqdm(position=2, bar_format='{desc}', disable=args.daemon, leave=False)
|
||||||
print('Found', len(downloaded_videos), 'downloaded videos.')
|
|
||||||
|
|
||||||
# Create this object AFTER reading in the download_archive.
|
|
||||||
download_archive_logger = setup_file_logger('download_archive', download_archive_file, format_str='%(message)s')
|
|
||||||
|
|
||||||
status_bar = tqdm(position=2, bar_format='{desc}', disable=args.daemon)
|
|
||||||
|
|
||||||
|
|
||||||
def log_bar(msg, level):
|
def log_bar(msg, level):
|
||||||
|
@ -195,94 +215,118 @@ ydl_opts = {
|
||||||
}
|
}
|
||||||
|
|
||||||
main_opts = dict(ydl_opts, **{'logger': ytdl_logger()})
|
main_opts = dict(ydl_opts, **{'logger': ytdl_logger()})
|
||||||
# thread_opts = dict(ydl_opts, **{'logger': ydl.ytdl_no_logger()})
|
|
||||||
yt_dlp = ydl.YDL(main_opts)
|
yt_dlp = ydl.YDL(main_opts)
|
||||||
|
|
||||||
|
url_count = 0
|
||||||
|
for k, v in url_list.items():
|
||||||
|
for item in v:
|
||||||
|
url_count += 1
|
||||||
|
|
||||||
# Init bars
|
# Init bars
|
||||||
playlist_bar = tqdm(position=1, desc='Playlist', disable=args.daemon)
|
progress_bar = tqdm(total=url_count, position=0, desc='Inputs', disable=args.daemon)
|
||||||
video_bars = manager.list()
|
video_bars = manager.list()
|
||||||
if not args.daemon:
|
if not args.daemon:
|
||||||
for i in range(args.threads):
|
for i in range(args.threads):
|
||||||
video_bars.append([
|
video_bars.append([3 + i, manager.Lock()])
|
||||||
3 + i,
|
|
||||||
manager.Lock()
|
encountered_errors = 0
|
||||||
])
|
errored_videos = 0
|
||||||
|
|
||||||
|
# The video progress bars have an issue where when a bar is closed it will shift its position back 1 then return to the correct position.
|
||||||
|
# This thread will clear empty spots.
|
||||||
|
if not args.daemon:
|
||||||
|
eraser_exit = manager.Value(bool, False)
|
||||||
|
Thread(target=bar_eraser, args=(video_bars, eraser_exit,)).start()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
do_update()
|
do_update()
|
||||||
for i, target_url in tqdm(enumerate(url_list), total=len(url_list), position=0, desc='Inputs', disable=args.daemon):
|
for output_path, urls in url_list.items():
|
||||||
logger.info('Fetching playlist...')
|
for target_url in urls:
|
||||||
playlist = yt_dlp.playlist_contents(target_url)
|
logger.info('Fetching playlist...')
|
||||||
playlist['entries'] = remove_duplicates_from_playlist(playlist['entries'])
|
playlist = yt_dlp.playlist_contents(str(target_url))
|
||||||
encountered_errors = 0
|
if not playlist:
|
||||||
errored_videos = 0
|
progress_bar.update()
|
||||||
|
continue
|
||||||
|
|
||||||
log_info_twice(f"Downloading item: '{playlist['title']}' {target_url}")
|
download_archive_file = args.download_cache_file_directory / (str(playlist['id']) + '.log')
|
||||||
|
if args.erase_downloaded_tracker:
|
||||||
|
if download_archive_file.exists():
|
||||||
|
os.remove(download_archive_file)
|
||||||
|
downloaded_videos = load_existing_videos()
|
||||||
|
|
||||||
playlist_bar.total = len(playlist['entries'])
|
msg = f'Found {len(downloaded_videos)} downloaded videos for playlist "{playlist["title"]}" ({playlist["id"]}). {"Ignoring." if args.ignore_downloaded else ""}'
|
||||||
playlist_bar.set_description(playlist['title'])
|
if args.daemon:
|
||||||
|
print(msg)
|
||||||
|
else:
|
||||||
|
status_bar.write(msg)
|
||||||
|
download_archive_logger = setup_file_logger('download_archive', download_archive_file, format_str='%(message)s')
|
||||||
|
|
||||||
# print(playlist['entries'][0])
|
playlist['entries'] = remove_duplicates_from_playlist(playlist['entries'])
|
||||||
# sys.exit()
|
|
||||||
|
|
||||||
# Remove already downloaded files from the to-do list.
|
log_info_twice(f'Downloading item: "{playlist["title"]}" ({playlist["id"]}) {target_url}')
|
||||||
if not args.ignore_downloaded:
|
|
||||||
|
# Remove already downloaded files from the to-do list.
|
||||||
download_queue = []
|
download_queue = []
|
||||||
s = set()
|
|
||||||
for p, video in enumerate(playlist['entries']):
|
for p, video in enumerate(playlist['entries']):
|
||||||
if video['id'] not in downloaded_videos and video['id'] not in s:
|
if video['id'] not in download_queue:
|
||||||
download_queue.append(video)
|
if not args.ignore_downloaded and video['id'] not in downloaded_videos:
|
||||||
s.add(video['id'])
|
download_queue.append(video)
|
||||||
playlist_bar.update(len(downloaded_videos))
|
# downloaded_videos.add(video['id'])
|
||||||
|
elif args.ignore_downloaded:
|
||||||
|
download_queue.append(video)
|
||||||
|
|
||||||
if len(download_queue): # Don't mess with multiprocessing if all videos are already downloaded
|
playlist_bar = tqdm(total=len(playlist['entries']), position=1, desc=f'"{playlist["title"]}" ({playlist["id"]})', disable=args.daemon, leave=False)
|
||||||
with Pool(processes=args.threads) as pool:
|
if not args.ignore_downloaded:
|
||||||
if sys.stdout.isatty():
|
playlist_bar.update(len(downloaded_videos))
|
||||||
# Doesn't work if not connected to a terminal:
|
|
||||||
# OSError: [Errno 25] Inappropriate ioctl for device
|
|
||||||
status_bar.set_description_str('=' * os.get_terminal_size()[0])
|
|
||||||
logger.info('Starting downloads...')
|
|
||||||
for result in pool.imap_unordered(download_video,
|
|
||||||
((video, {
|
|
||||||
'bars': video_bars,
|
|
||||||
'ydl_opts': ydl_opts,
|
|
||||||
'output_dir': args.output,
|
|
||||||
}) for video in download_queue)):
|
|
||||||
# Save the video ID to the file
|
|
||||||
if result['downloaded_video_id']:
|
|
||||||
download_archive_logger.info(result['downloaded_video_id'])
|
|
||||||
|
|
||||||
# Print stuff
|
if len(download_queue): # Don't mess with multiprocessing if all videos are already downloaded
|
||||||
for line in result['video_error_logger_msg']:
|
with Pool(processes=args.threads) as pool:
|
||||||
video_error_logger.info(line)
|
if sys.stdout.isatty():
|
||||||
file_logger.error(line)
|
# Doesn't work if not connected to a terminal:
|
||||||
encountered_errors += 1
|
# OSError: [Errno 25] Inappropriate ioctl for device
|
||||||
if not args.silence_errors:
|
status_bar.set_description_str('=' * os.get_terminal_size()[0])
|
||||||
if args.daemon:
|
logger.info('Starting downloads...')
|
||||||
logger.error(line)
|
for result in pool.imap_unordered(download_video, ((video, {'bars': video_bars, 'ydl_opts': ydl_opts, 'output_dir': Path(output_path), }) for video in download_queue)):
|
||||||
else:
|
# Save the video ID to the file
|
||||||
playlist_bar.write(line)
|
if result['downloaded_video_id']:
|
||||||
|
download_archive_logger.info(result['downloaded_video_id'])
|
||||||
|
|
||||||
if len(result['video_error_logger_msg']):
|
# Print stuff
|
||||||
errored_videos += 1
|
for line in result['video_error_logger_msg']:
|
||||||
if args.silence_errors and args.daemon:
|
video_error_logger.info(line)
|
||||||
logger.error(f"{result['video_id']} failed due to error.")
|
file_logger.error(line)
|
||||||
|
encountered_errors += 1
|
||||||
|
if not args.silence_errors:
|
||||||
|
if args.daemon:
|
||||||
|
logger.error(line)
|
||||||
|
else:
|
||||||
|
status_bar.write(line)
|
||||||
|
|
||||||
# for line in result['status_msg']:
|
if len(result['video_error_logger_msg']):
|
||||||
# playlist_bar.write(line)
|
errored_videos += 1
|
||||||
for line in result['logger_msg']:
|
if args.silence_errors and args.daemon:
|
||||||
log_info_twice(line)
|
logger.error(f"{result['video_id']} failed due to error.")
|
||||||
playlist_bar.update()
|
|
||||||
else:
|
|
||||||
playlist_bar.write(f"All videos already downloaded for '{playlist['title']}'.")
|
|
||||||
|
|
||||||
error_msg = f'Encountered {encountered_errors} errors on {errored_videos} videos.'
|
# for line in result['status_msg']:
|
||||||
if args.daemon:
|
# playlist_bar.write(line)
|
||||||
logger.info(error_msg)
|
for line in result['logger_msg']:
|
||||||
else:
|
log_info_twice(line)
|
||||||
playlist_bar.write(error_msg)
|
playlist_bar.update()
|
||||||
|
else:
|
||||||
|
status_bar.write(f"All videos already downloaded for '{playlist['title']}'.")
|
||||||
|
log_info_twice(f"Finished item: '{playlist['title']}' {target_url}")
|
||||||
|
|
||||||
log_info_twice(f"Finished item: '{playlist['title']}' {target_url}")
|
# Sleep a bit to prevent rate-limiting
|
||||||
|
if progress_bar.n < len(url_list.keys()) - 1:
|
||||||
|
status_bar.set_description_str(f'Sleeping {args.ratelimit_sleep}s...')
|
||||||
|
time.sleep(args.ratelimit_sleep)
|
||||||
|
|
||||||
|
progress_bar.update()
|
||||||
|
error_msg = f'Encountered {encountered_errors} errors on {errored_videos} videos.'
|
||||||
|
if args.daemon:
|
||||||
|
logger.info(error_msg)
|
||||||
|
else:
|
||||||
|
status_bar.write(error_msg)
|
||||||
log_info_twice(f"Finished process in {round(math.ceil(time.time() - start_time) / 60, 2)} min.")
|
log_info_twice(f"Finished process in {round(math.ceil(time.time() - start_time) / 60, 2)} min.")
|
||||||
if not args.daemon:
|
if not args.daemon:
|
||||||
break
|
break
|
||||||
|
@ -292,12 +336,9 @@ while True:
|
||||||
time.sleep(args.sleep * 60)
|
time.sleep(args.sleep * 60)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
sys.exit()
|
sys.exit()
|
||||||
downloaded_videos = load_existing_videos() # reload the videos that have already been downloaded
|
# downloaded_videos = load_existing_videos() # reload the videos that have already been downloaded
|
||||||
|
|
||||||
# Erase the status bar.
|
|
||||||
status_bar.set_description_str('\x1b[2KDone!')
|
|
||||||
status_bar.refresh()
|
|
||||||
|
|
||||||
# Clean up the remaining bars. Have to close them in order.
|
# Clean up the remaining bars. Have to close them in order.
|
||||||
|
eraser_exit.value = True
|
||||||
playlist_bar.close()
|
playlist_bar.close()
|
||||||
status_bar.close()
|
status_bar.close()
|
||||||
|
|
|
@ -22,7 +22,7 @@ def restart_program():
|
||||||
os.execl(python, python, *sys.argv)
|
os.execl(python, python, *sys.argv)
|
||||||
|
|
||||||
|
|
||||||
def setup_file_logger(name, log_file, level=logging.INFO, format_str: str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s', filemode='a', no_console: bool = True):
|
def setup_file_logger(name, log_file, level=logging.INFO, format_str: str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s', filemode='a'):
|
||||||
formatter = logging.Formatter(format_str)
|
formatter = logging.Formatter(format_str)
|
||||||
|
|
||||||
logger = logging.getLogger(name)
|
logger = logging.getLogger(name)
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
from multiprocessing import Manager
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
@ -12,21 +14,25 @@ from process.funcs import setup_file_logger
|
||||||
class ytdl_logger(object):
|
class ytdl_logger(object):
|
||||||
errors = []
|
errors = []
|
||||||
|
|
||||||
def __init__(self, logger):
|
def __init__(self, logger=None):
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
|
|
||||||
def debug(self, msg):
|
def debug(self, msg):
|
||||||
self.logger.info(msg)
|
if self.logger:
|
||||||
|
self.logger.info(msg)
|
||||||
|
|
||||||
def info(self, msg):
|
def info(self, msg):
|
||||||
self.logger.info(msg)
|
if self.logger:
|
||||||
|
self.logger.info(msg)
|
||||||
|
|
||||||
def warning(self, msg):
|
def warning(self, msg):
|
||||||
self.logger.warning(msg)
|
if self.logger:
|
||||||
|
self.logger.warning(msg)
|
||||||
|
|
||||||
def error(self, msg):
|
def error(self, msg):
|
||||||
self.logger.error(msg)
|
if self.logger:
|
||||||
self.errors.append(msg)
|
self.logger.error(msg)
|
||||||
|
self.errors.append(msg)
|
||||||
|
|
||||||
|
|
||||||
def is_manager_lock_locked(lock) -> bool:
|
def is_manager_lock_locked(lock) -> bool:
|
||||||
|
@ -57,6 +63,7 @@ def download_video(args) -> dict:
|
||||||
bar.set_postfix({
|
bar.set_postfix({
|
||||||
'speed': d['_speed_str'],
|
'speed': d['_speed_str'],
|
||||||
'size': f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}",
|
'size': f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}",
|
||||||
|
'offset': offset
|
||||||
})
|
})
|
||||||
|
|
||||||
video = args[0]
|
video = args[0]
|
||||||
|
@ -69,21 +76,28 @@ def download_video(args) -> dict:
|
||||||
while not locked:
|
while not locked:
|
||||||
for item in kwargs['bars']:
|
for item in kwargs['bars']:
|
||||||
if not is_manager_lock_locked(item[1]):
|
if not is_manager_lock_locked(item[1]):
|
||||||
locked = item[1].acquire(timeout=0.1) # get the lock ASAP and don't wait if we didn't get it.
|
locked = item[1].acquire(timeout=0.2) # get the lock ASAP and don't wait if we didn't get it.
|
||||||
offset = item[0]
|
offset = item[0]
|
||||||
bar_lock = item[1]
|
bar_lock = item[1]
|
||||||
break
|
break
|
||||||
kwargs['ydl_opts']['progress_hooks'] = [progress_hook]
|
kwargs['ydl_opts']['progress_hooks'] = [progress_hook]
|
||||||
desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4)))
|
desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4)))
|
||||||
bar = tqdm(total=100, position=(offset if locked else None), desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]', leave=False)
|
bar = tqdm(total=100, position=offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]', leave=False)
|
||||||
|
|
||||||
ylogger = ytdl_logger(setup_file_logger(video['id'], kwargs['output_dir'] / f"[{video['id']}].log"))
|
|
||||||
kwargs['ydl_opts']['logger'] = ylogger
|
|
||||||
yt_dlp = ydl.YDL(kwargs['ydl_opts'])
|
|
||||||
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object
|
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
kwargs['ydl_opts']['logger'] = ytdl_logger() # dummy silent logger
|
||||||
|
yt_dlp = ydl.YDL(kwargs['ydl_opts'])
|
||||||
|
try:
|
||||||
|
base_path = os.path.splitext(yt_dlp.prepare_filename(yt_dlp.extract_info(video['url'], download=False)))[0]
|
||||||
|
except AttributeError:
|
||||||
|
# Sometimes we won't be able to pull the video info so just use the video's ID
|
||||||
|
base_path = kwargs['output_dir'] / video['id']
|
||||||
|
ylogger = ytdl_logger(setup_file_logger(video['id'], str(base_path) + '.log'))
|
||||||
|
kwargs['ydl_opts']['logger'] = ylogger
|
||||||
|
yt_dlp = ydl.YDL(kwargs['ydl_opts']) # recreate the object with the correct logging path
|
||||||
error_code = yt_dlp(video['url']) # Do the download
|
error_code = yt_dlp(video['url']) # Do the download
|
||||||
if not error_code:
|
if not error_code:
|
||||||
elapsed = round(math.ceil(time.time() - start_time) / 60, 2)
|
elapsed = round(math.ceil(time.time() - start_time) / 60, 2)
|
||||||
|
@ -96,7 +110,43 @@ def download_video(args) -> dict:
|
||||||
output_dict['video_error_logger_msg'] = output_dict['video_error_logger_msg'] + ylogger.errors
|
output_dict['video_error_logger_msg'] = output_dict['video_error_logger_msg'] + ylogger.errors
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
output_dict['video_error_logger_msg'].append(f"EXCEPTION -> {e}")
|
output_dict['video_error_logger_msg'].append(f"EXCEPTION -> {e}")
|
||||||
|
bar.update(100 - bar.n)
|
||||||
if locked:
|
if locked:
|
||||||
bar.close()
|
bar.close()
|
||||||
bar_lock.release()
|
bar_lock.release()
|
||||||
return output_dict
|
return output_dict
|
||||||
|
|
||||||
|
|
||||||
|
def bar_eraser(video_bars, eraser_exit):
|
||||||
|
manager = Manager()
|
||||||
|
queue = manager.dict()
|
||||||
|
queue_lock = manager.Lock()
|
||||||
|
|
||||||
|
def eraser():
|
||||||
|
nonlocal queue
|
||||||
|
while not eraser_exit.value:
|
||||||
|
for i in queue.keys():
|
||||||
|
if eraser_exit.value:
|
||||||
|
return
|
||||||
|
i = int(i)
|
||||||
|
lock = video_bars[i][1].acquire(timeout=0.1)
|
||||||
|
bar_lock = video_bars[i][1]
|
||||||
|
if lock:
|
||||||
|
bar = tqdm(position=video_bars[i][0], leave=False, bar_format='\x1b[2K')
|
||||||
|
bar.close()
|
||||||
|
with queue_lock:
|
||||||
|
del queue_dict[i]
|
||||||
|
queue = queue_dict
|
||||||
|
bar_lock.release()
|
||||||
|
|
||||||
|
Thread(target=eraser).start()
|
||||||
|
|
||||||
|
while not eraser_exit.value:
|
||||||
|
for i, item in enumerate(video_bars):
|
||||||
|
if eraser_exit.value:
|
||||||
|
return
|
||||||
|
if is_manager_lock_locked(item[1]):
|
||||||
|
with queue_lock:
|
||||||
|
queue_dict = queue
|
||||||
|
queue_dict[i] = True
|
||||||
|
queue = queue_dict
|
||||||
|
|
|
@ -2,4 +2,6 @@ yt-dlp
|
||||||
psutil
|
psutil
|
||||||
tqdm
|
tqdm
|
||||||
mergedeep
|
mergedeep
|
||||||
numpy
|
numpy
|
||||||
|
pyyaml
|
||||||
|
appdirs
|
|
@ -0,0 +1 @@
|
||||||
|
https://www.youtube.com/playlist?list=example1234
|
|
@ -0,0 +1,5 @@
|
||||||
|
/path/to/storage/Example Playlist:
|
||||||
|
- https://www.youtube.com/playlist?list=ExamplePlaylist1234
|
||||||
|
|
||||||
|
/path/to/storage/Music:
|
||||||
|
- https://www.youtube.com/MyMusicPlaylist1234
|
|
@ -29,13 +29,15 @@ class YDL:
|
||||||
sizes.append(d)
|
sizes.append(d)
|
||||||
return tuple(sizes)
|
return tuple(sizes)
|
||||||
|
|
||||||
def playlist_contents(self, url: str) -> dict:
|
def playlist_contents(self, url: str) -> dict | bool:
|
||||||
ydl_opts = merge({
|
ydl_opts = merge({
|
||||||
'extract_flat': True,
|
'extract_flat': True,
|
||||||
'skip_download': True
|
'skip_download': True
|
||||||
}, self.ydl_opts)
|
}, self.ydl_opts)
|
||||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||||
info = ydl.sanitize_info(ydl.extract_info(url, download=False))
|
info = ydl.sanitize_info(ydl.extract_info(url, download=False))
|
||||||
|
if not info:
|
||||||
|
return False
|
||||||
entries = []
|
entries = []
|
||||||
if info['_type'] == 'playlist':
|
if info['_type'] == 'playlist':
|
||||||
if 'entries' in info.keys():
|
if 'entries' in info.keys():
|
||||||
|
@ -53,14 +55,23 @@ class YDL:
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
def __call__(self, *args, **kwargs):
|
|
||||||
return self.yt_dlp.download(*args, **kwargs)
|
|
||||||
|
|
||||||
# def filter_filesize(self, info, *, incomplete):
|
# def filter_filesize(self, info, *, incomplete):
|
||||||
# duration = info.get('duration')
|
# duration = info.get('duration')
|
||||||
# if duration and duration < 60:
|
# if duration and duration < 60:
|
||||||
# return 'The video is too short'
|
# return 'The video is too short'
|
||||||
|
|
||||||
|
def extract_info(self, *args, **kwargs):
|
||||||
|
return self.yt_dlp.extract_info(*args, **kwargs)
|
||||||
|
|
||||||
|
def prepare_filename(self, *args, **kwargs):
|
||||||
|
return self.yt_dlp.prepare_filename(*args, **kwargs)
|
||||||
|
|
||||||
|
def process_info(self, *args, **kwargs):
|
||||||
|
return self.yt_dlp.process_info(*args, **kwargs)
|
||||||
|
|
||||||
|
def __call__(self, *args, **kwargs):
|
||||||
|
return self.yt_dlp.download(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def update_ytdlp():
|
def update_ytdlp():
|
||||||
old = subprocess.check_output('pip freeze | grep yt-dlp', shell=True).decode().strip('\n')
|
old = subprocess.check_output('pip freeze | grep yt-dlp', shell=True).decode().strip('\n')
|
||||||
|
|
Reference in New Issue