diff --git a/README.md b/README.md index ca5bba7..9af7747 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ I have a single, very large playlist that I add any videos I like to. On my NAS ### Installation ```bash -sudo apt update && sudo apt install ffmpeg atomicparsley +sudo apt update && sudo apt install ffmpeg atomicparsley phantomjs pip install -r requirements.txt ``` diff --git a/downloader.py b/downloader.py index b24350d..4d5c533 100755 --- a/downloader.py +++ b/downloader.py @@ -4,6 +4,7 @@ import logging.config import math import os import re +import signal import subprocess import sys import time @@ -20,8 +21,15 @@ from process.funcs import get_silent_logger, remove_duplicates_from_playlist, re from process.threads import bar_eraser, download_video from ydl.files import create_directories, resolve_path -# logging.basicConfig(level=1000) -# logging.getLogger().setLevel(1000) + +def signal_handler(sig, frame): + # TODO: https://www.g-loaded.eu/2016/11/24/how-to-terminate-running-python-threads-using-signals/ + # raise ServiceExit + sys.exit(0) + + +signal.signal(signal.SIGTERM, signal_handler) +signal.signal(signal.SIGINT, signal_handler) urlRegex = re.compile(r'^(?:http|ftp)s?://' # http:// or https:// r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... @@ -47,6 +55,7 @@ parser.add_argument('--ratelimit-sleep', type=int, default=5, help='How many sec parser.add_argument('--input-datatype', choices=['auto', 'txt', 'yaml'], default='auto', help='The datatype of the input file. If set to auto, the file will be scanned for a URL on the firstline.' 'If is a URL, the filetype will be set to txt. If it is a key: value pair then the filetype will be set to yaml.') parser.add_argument('--log-dir', default=None, help='Where to store the logs. Must be set when --output is not.') +parser.add_argument('--verbose', '-v', action='store_true') args = parser.parse_args() if args.threads <= 0: @@ -64,6 +73,11 @@ else: args.log_dir = args.output / 'logs' args.download_cache_file_directory = resolve_path(args.download_cache_file_directory) + +# TODO: use logging for this +if args.verbose: + print('Cache directory:', args.download_cache_file_directory) + log_time = time.time() # Get the URLs of the videos to download. Is the input a URL or file? @@ -114,6 +128,8 @@ def do_update(): if updated: print('Restarting program...') restart_program() + else: + print('Up to date.') if args.rm_cache: @@ -225,8 +241,11 @@ ydl_opts = { {'key': 'FFmpegEmbedSubtitle'}, {'key': 'FFmpegMetadata', 'add_metadata': True}, {'key': 'EmbedThumbnail', 'already_have_thumbnail': True}, + {'key': 'FFmpegThumbnailsConvertor', 'format': 'jpg', 'when': 'before_dl'}, # {'key': 'FFmpegSubtitlesConvertor', 'format': 'srt'} ], + # 'external_downloader': 'aria2c', + # 'external_downloader_args': ['-j 32', '-s 32', '-x 16', '--file-allocation=none', '--optimize-concurrent-downloads=true', '--http-accept-gzip=true', '--continue=true'], } yt_dlp = ydl.YDL(dict(ydl_opts, **{'logger': ytdl_logger()})) @@ -251,21 +270,25 @@ if not args.daemon: eraser_exit = manager.Value(bool, False) Thread(target=bar_eraser, args=(video_bars, eraser_exit,)).start() +already_erased_downloaded_tracker = False + while True: do_update() - progress_bar = tqdm(total=url_count, position=0, desc='Inputs', disable=args.daemon) + progress_bar = tqdm(total=url_count, position=0, desc='Inputs', disable=args.daemon, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}') for output_path, urls in url_list.items(): for target_url in urls: logger.info('Fetching playlist...') playlist = yt_dlp.playlist_contents(str(target_url)) + if not playlist: progress_bar.update() continue download_archive_file = args.download_cache_file_directory / (str(playlist['id']) + '.log') - if args.erase_downloaded_tracker: + if args.erase_downloaded_tracker and not already_erased_downloaded_tracker: if download_archive_file.exists(): os.remove(download_archive_file) + already_erased_downloaded_tracker = True downloaded_videos = load_existing_videos() msg = f'Found {len(downloaded_videos)} downloaded videos for playlist "{playlist["title"]}" ({playlist["id"]}). {"Ignoring." if args.ignore_downloaded else ""}' @@ -352,7 +375,7 @@ while True: try: time.sleep(args.sleep * 60) except KeyboardInterrupt: - sys.exit() + sys.exit(0) # downloaded_videos = load_existing_videos() # reload the videos that have already been downloaded # Clean up the remaining bars. Have to close them in order. diff --git a/process/threads.py b/process/threads.py index adadf8d..d97094f 100644 --- a/process/threads.py +++ b/process/threads.py @@ -1,5 +1,7 @@ import math +import multiprocessing import os +import sys import time from multiprocessing import Manager from threading import Thread @@ -81,7 +83,7 @@ def download_video(args) -> dict: break kwargs['ydl_opts']['progress_hooks'] = [progress_hook] desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4))) - bar = tqdm(total=100, position=offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]', leave=False) + bar = tqdm(total=100, position=offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {elapsed}<{remaining}{postfix}', leave=False) output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object start_time = time.time() @@ -120,29 +122,50 @@ def bar_eraser(video_bars, eraser_exit): def eraser(): nonlocal queue + try: + while not eraser_exit.value: + for i in queue.keys(): + if eraser_exit.value: + return + i = int(i) + lock = video_bars[i][1].acquire(timeout=0.1) + bar_lock = video_bars[i][1] + if lock: + bar = tqdm(position=video_bars[i][0], leave=False, bar_format='\x1b[2K') + bar.close() + with queue_lock: + del queue_dict[i] + queue = queue_dict + bar_lock.release() + except KeyboardInterrupt: + sys.exit(0) + except multiprocessing.managers.RemoteError: + sys.exit(0) + except SystemExit: + sys.exit(0) + + try: + Thread(target=eraser).start() while not eraser_exit.value: - for i in queue.keys(): + for i, item in enumerate(video_bars): if eraser_exit.value: return - i = int(i) - lock = video_bars[i][1].acquire(timeout=0.1) - bar_lock = video_bars[i][1] - if lock: - bar = tqdm(position=video_bars[i][0], leave=False, bar_format='\x1b[2K') - bar.close() + if is_manager_lock_locked(item[1]): with queue_lock: - del queue_dict[i] + queue_dict = queue + queue_dict[i] = True queue = queue_dict - bar_lock.release() + except KeyboardInterrupt: + sys.exit(0) + except multiprocessing.managers.RemoteError: + sys.exit(0) + except SystemExit: + sys.exit(0) - Thread(target=eraser).start() - while not eraser_exit.value: - for i, item in enumerate(video_bars): - if eraser_exit.value: - return - if is_manager_lock_locked(item[1]): - with queue_lock: - queue_dict = queue - queue_dict[i] = True - queue = queue_dict +class ServiceExit(Exception): + """ + Custom exception which is used to trigger the clean exit + of all running threads and the main program. + """ + pass diff --git a/requirements.txt b/requirements.txt index 297a27f..3775d92 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ tqdm mergedeep numpy pyyaml -appdirs \ No newline at end of file +appdirs +phantomjs \ No newline at end of file