diff --git a/Example systemd Service.md b/Example systemd Service.md index c7c75a6..ef2572a 100644 --- a/Example systemd Service.md +++ b/Example systemd Service.md @@ -3,13 +3,7 @@ `/home/user/youtubedl-daemon.sh` ```bash #!/bin/bash -SLEEP_TIME="60m" -while true; do - /usr/bin/python3 /home/user/automated-youtube-dl/downloader.py --daemon "https://www.youtube.com/playlist?list=example12345" "/mnt/nfs/archive/YouTube/Example Playlist/" - echo -e "\nSleeping for $SLEEP_TIME" - sleep $SLEEP_TIME - echo -e "\n" -done +/usr/bin/python3 /home/user/automated-youtube-dl/downloader.py --daemon --sleep 60 "https://www.youtube.com/playlist?list=example12345" "/mnt/nfs/archive/YouTube/Example Playlist/" ``` diff --git a/README.md b/README.md index dad83ca..2225f98 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ I have a single, very large playlist that I add any videos I like to. On my NAS - Simple display using `tqdm`. - Limit the size of the downloaded videos. - Parallel downloads. +- Daemon mode. ### Installation @@ -30,6 +31,12 @@ pip install -r requirements.txt `./downloader.py ` +To run as a daemon, do: + +`/usr/bin/python3 /home/user/automated-youtube-dl/downloader.py --daemon --sleep 60 ` + +`--sleep` is how many minutes to sleep after completing all downloads. + #### Folder Structure ``` @@ -56,7 +63,6 @@ Videos will be saved using this name format: | `--no-update` | `-n` | Don\'t update yt-dlp at launch. | | `--max-size` | | Max allowed size of a video in MB. Default: 1100. | | `--rm-cache` | `-r` | Delete the yt-dlp cache on start. | -| `--backwards` | `-b` | Reverse all playlists and download in backwards order. | | `--threads` | | How many download processes to use (threads). Default is how many CPU cores you have. You will want to find a good value that doesn't overload your connection. | -| `--daemon` | `-d` | Run in daemon mode. Disables progress bars. | - \ No newline at end of file +| `--daemon` | `-d` | Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep. | +| `--sleep` | | How many minutes to sleep when in daemon mode. | \ No newline at end of file diff --git a/downloader.py b/downloader.py index b5445fc..184118b 100755 --- a/downloader.py +++ b/downloader.py @@ -11,10 +11,13 @@ from multiprocessing import Manager, Pool, cpu_count from tqdm.auto import tqdm -import automated_youtube_dl.yt_dlp as ydl -from automated_youtube_dl.files import create_directories, resolve_path -from process.funcs import restart_program, setup_file_logger +import ydl.yt_dlp as ydl +from process.funcs import get_silent_logger, remove_duplicates_from_playlist, restart_program, setup_file_logger from process.threads import download_video +from ydl.files import create_directories, resolve_path + +# logging.basicConfig(level=1000) +# logging.getLogger().setLevel(1000) urlRegex = re.compile( r'^(?:http|ftp)s?://' # http:// or https:// @@ -30,9 +33,10 @@ parser.add_argument('output', help='Output directory.') parser.add_argument('--no-update', '-n', action='store_true', help='Don\'t update yt-dlp at launch.') parser.add_argument('--max-size', type=int, default=1100, help='Max allowed size of a video in MB.') parser.add_argument('--rm-cache', '-r', action='store_true', help='Delete the yt-dlp cache on start.') -parser.add_argument('--backwards', '-b', action='store_true', help='Reverse all playlists and download in backwards order.') parser.add_argument('--threads', type=int, default=cpu_count(), help='How many download processes to use.') -parser.add_argument('--daemon', '-d', action='store_true', help="Run in daemon mode. Disables progress bars and prints output that's good for journalctl.") +parser.add_argument('--daemon', '-d', action='store_true', help="Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep.") +parser.add_argument('--sleep', type=float, default=60, help='How many minutes to sleep when in daemon mode.') +parser.add_argument('--silence-errors', '-s', action='store_true', help="Don't print any error messages to the console.") args = parser.parse_args() if args.threads <= 0: @@ -67,33 +71,57 @@ if not args.no_update: if args.rm_cache: subprocess.run('yt-dlp --rm-cache-dir', shell=True) +if args.daemon: + print('Running in daemon mode.') + log_dir = args.output / 'logs' create_directories(args.output, log_dir) -logger = setup_file_logger('youtube_dl', log_dir / f'youtube_dl-{str(int(log_time))}.log', level=logging.INFO) +file_logger = setup_file_logger('youtube_dl', log_dir / f'youtube_dl-{str(int(log_time))}.log', level=logging.INFO) video_error_logger = setup_file_logger('youtube_dl_video_errors', log_dir / f'youtube_dl-errors-{int(log_time)}.log', level=logging.INFO) +logger = get_silent_logger('yt-dl', silent=not args.daemon) -logger.info(f'Starting process.') + +def log_info_twice(msg): + logger.info(msg) + file_logger.info(msg) + + +log_info_twice('Starting process.') start_time = time.time() manager = Manager() -# Find existing videos to skip. download_archive_file = args.output / 'download-archive.log' -if not download_archive_file.exists(): - download_archive_file.touch() -with open(download_archive_file, 'r') as file: - download_archive = manager.list([line.rstrip() for line in file]) -print('Found', len(download_archive), 'downloaded videos.') + + +def load_existing_videos(): + # Find existing videos. + output = set() + if not download_archive_file.exists(): + download_archive_file.touch() + with open(download_archive_file, 'r') as file: + output.update(([line.rstrip() for line in file])) + return output + + +downloaded_videos = load_existing_videos() +print('Found', len(downloaded_videos), 'downloaded videos.') # Create this object AFTER reading in the download_archive. download_archive_logger = setup_file_logger('download_archive', download_archive_file, format_str='%(message)s') -status_bar = tqdm(position=2, bar_format='{desc}') +status_bar = tqdm(position=2, bar_format='{desc}', disable=args.daemon) def log_bar(msg, level): status_bar.write(f'[{level}] {msg}') + if level == 'warning': + logger.warning(msg) + elif level == 'error': + logger.error(msg) + else: + logger.info(msg) def print_without_paths(msg): @@ -104,127 +132,158 @@ def print_without_paths(msg): if m: msg = m.group(1) m1 = re.match(r'^(.*?): ', msg) - status_bar.set_description_str(msg.strip('to "').strip('to: ').strip()) + msg = msg.strip('to "').strip('to: ').strip() + if args.daemon: + log_info_twice(msg) + else: + status_bar.set_description_str(msg) class ytdl_logger(object): def debug(self, msg): - logger.debug(msg) + file_logger.debug(msg) # if msg.startswith('[debug] '): # pass if '[download]' not in msg: print_without_paths(msg) def info(self, msg): - logger.info(msg) + file_logger.info(msg) print_without_paths(msg) def warning(self, msg): - logger.warning(msg) + file_logger.warning(msg) log_bar(msg, 'warning') def error(self, msg): - logger.error(msg) + file_logger.error(msg) log_bar(msg, 'error') -class ytdl_no_logger(object): - def debug(self, msg): - return - - def info(self, msg): - return - - def warning(self, msg): - return - - def error(self, msg): - return - - # https://github.com/yt-dlp/yt-dlp#embedding-examples ydl_opts = { 'format': f'(bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=1080]/bestvideo[filesize<{args.max_size}M][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=720]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=720]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=720]/bestvideo[filesize<{args.max_size}M][height>=720]/bestvideo[filesize<{args.max_size}M])+(bestaudio[acodec=opus]/bestaudio)/best', - 'outtmpl': f'{args.output}/%(title)s --- %(uploader)s --- %(uploader_id)s --- %(id)s', + 'outtmpl': f'{args.output}/[%(id)s] [%(title)s] [%(uploader)s] [%(uploader_id)s].%(ext)s', # leading dash can cause issues due to bash args so we surround the variables in brackets 'merge_output_format': 'mkv', 'logtostderr': True, 'embedchapters': True, - # 'writethumbnail': True, # Save the thumbnail to a file. Embedding seems to be broken right now so this is an alternative. + 'writethumbnail': True, # Save the thumbnail to a file. Embedding seems to be broken right now so this is an alternative. 'embedthumbnail': True, + 'embeddescription': True, 'writesubtitles': True, # 'allsubtitles': True, # Download every language. 'subtitlesformat': 'vtt', 'subtitleslangs': ['en'], 'writeautomaticsub': True, + # 'writedescription': True, + 'ignoreerrors': True, + 'continuedl': False, + 'addmetadata': True, + 'writeinfojson': True, 'postprocessors': [ {'key': 'FFmpegEmbedSubtitle'}, {'key': 'FFmpegMetadata', 'add_metadata': True}, {'key': 'EmbedThumbnail', 'already_have_thumbnail': True}, + # {'key': 'FFmpegSubtitlesConvertor', 'format': 'srt'} ], - } main_opts = dict(ydl_opts, **{'logger': ytdl_logger()}) -thread_opts = dict(ydl_opts, **{'logger': ytdl_no_logger()}) +# thread_opts = dict(ydl_opts, **{'logger': ydl.ytdl_no_logger()}) yt_dlp = ydl.YDL(main_opts) # Init bars -playlist_bar = tqdm(position=1, desc='Playlist') +playlist_bar = tqdm(position=1, desc='Playlist', disable=args.daemon) video_bars = manager.list() -for i in range(args.threads): - video_bars.append([ - 3 + i, - manager.Lock() - ]) +if not args.daemon: + for i in range(args.threads): + video_bars.append([ + 3 + i, + manager.Lock() + ]) -for i, target_url in tqdm(enumerate(url_list), total=len(url_list), position=0, desc='Inputs'): - playlist = yt_dlp.playlist_contents(target_url) - logger.info(f"Downloading item: '{playlist['title']}' {target_url}") - playlist_bar.total = len(playlist['entries']) - playlist_bar.set_description(playlist['title']) +while True: + for i, target_url in tqdm(enumerate(url_list), total=len(url_list), position=0, desc='Inputs', disable=args.daemon): + logger.info('Fetching playlist...') + playlist = yt_dlp.playlist_contents(target_url) + playlist['entries'] = remove_duplicates_from_playlist(playlist['entries']) + encountered_errors = 0 + errored_videos = 0 - # Remove already downloaded files from the to-do list. - download_queue = [] - for video in playlist['entries']: - if video['id'] not in download_archive: - download_queue.append(video) - else: - logger.info(f"{video['id']} already downloaded.") - playlist_bar.update(len(playlist['entries']) - len(download_queue)) + log_info_twice(f"Downloading item: '{playlist['title']}' {target_url}") - if args.backwards: - download_queue.reverse() + playlist_bar.total = len(playlist['entries']) + playlist_bar.set_description(playlist['title']) - if len(download_queue): # Don't mess with multiprocessing if the list is empty - with Pool(processes=args.threads) as pool: - status_bar.set_description_str('=' * os.get_terminal_size()[0]) - for result in pool.imap_unordered(download_video, - ((video, { - 'bars': video_bars, - 'download_archive': download_archive, - 'ydl_opts': thread_opts, - }) for video in download_queue)): - # Print stuff - if result['downloaded_video_id']: - download_archive_logger.info(result['downloaded_video_id']) - if len(result['video_error_logger_msg']): + # print(playlist['entries'][0]) + # sys.exit() + + # Remove already downloaded files from the to-do list. + download_queue = [] + s = set() + for p, video in enumerate(playlist['entries']): + if video['id'] not in downloaded_videos and video['id'] not in s: + download_queue.append(video) + s.add(video['id']) + playlist_bar.update(len(downloaded_videos)) + + if len(download_queue): # Don't mess with multiprocessing if all videos are already downloaded + with Pool(processes=args.threads) as pool: + status_bar.set_description_str('=' * os.get_terminal_size()[0]) + logger.info('Starting downloads...') + for result in pool.imap_unordered(download_video, + ((video, { + 'bars': video_bars, + 'ydl_opts': ydl_opts, + 'output_dir': args.output, + }) for video in download_queue)): + # Save the video ID to the file + if result['downloaded_video_id']: + download_archive_logger.info(result['downloaded_video_id']) + + # Print stuff for line in result['video_error_logger_msg']: video_error_logger.info(line) - if len(result['status_msg']): - for line in result['status_msg']: - playlist_bar.write(f"{result['downloaded_video_id']}: {line}") - if len(result['logger_msg']): + file_logger.error(line) + encountered_errors += 1 + if not args.silence_errors: + if args.daemon: + logger.error(line) + else: + playlist_bar.write(line) + if len(result['video_error_logger_msg']): + errored_videos += 1 + + # for line in result['status_msg']: + # playlist_bar.write(line) for line in result['logger_msg']: - logger.info(line) - playlist_bar.update() + log_info_twice(line) + playlist_bar.update() + else: + playlist_bar.write(f"All videos already downloaded for '{playlist['title']}'.") + + error_msg = f'Encountered {encountered_errors} errors on {errored_videos} videos.' + if args.daemon: + logger.info(error_msg) + else: + playlist_bar.write(error_msg) + + log_info_twice(f"Finished item: '{playlist['title']}' {target_url}") + log_info_twice(f"Finished process in {round(math.ceil(time.time() - start_time) / 60, 2)} min.") + if not args.daemon: + break else: - playlist_bar.write(f"All videos already downloaded for '{playlist['title']}'") - # playlist_bar.update(playlist_bar.total - playlist_bar.n) - logger.info(f"Finished item: '{playlist['title']}' {target_url}") -logger.info(f"Finished process in {round(math.ceil(time.time() - start_time) / 60, 2)} min.") + logger.info(f'Sleeping for {args.sleep} min.') + try: + time.sleep(args.sleep * 60) + except KeyboardInterrupt: + sys.exit() + downloaded_videos = load_existing_videos() # reload the videos that have already been downloaded + +# Erase the status bar. +status_bar.set_description_str('\x1b[2KDone!') +status_bar.refresh() # Clean up the remaining bars. Have to close them in order. -status_bar.set_description_str('\x1b[2KDone!') # erase the status bar -status_bar.refresh() playlist_bar.close() status_bar.close() diff --git a/process/funcs.py b/process/funcs.py index 3301509..045338e 100644 --- a/process/funcs.py +++ b/process/funcs.py @@ -22,17 +22,41 @@ def restart_program(): os.execl(python, python, *sys.argv) -def setup_file_logger(name, log_file, level=logging.INFO, format_str: str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s', filemode='a'): +def setup_file_logger(name, log_file, level=logging.INFO, format_str: str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s', filemode='a', no_console: bool = True): formatter = logging.Formatter(format_str) - handler = logging.FileHandler(log_file, mode=filemode) - handler.setFormatter(formatter) logger = logging.getLogger(name) logger.setLevel(level) + handler = logging.FileHandler(log_file, mode=filemode) + handler.setLevel(level) + handler.setFormatter(formatter) logger.addHandler(handler) # Silence console logging - console = logging.StreamHandler(sys.stdout) - console.setLevel(100) + # if no_console: + # console = logging.StreamHandler() + # console.setLevel(100) return logger + + +def get_silent_logger(name, level=logging.INFO, format_str: str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s', silent: bool = True): + logger = logging.getLogger(name) + console = logging.StreamHandler() + console.setFormatter(logging.Formatter(format_str)) + logger.addHandler(console) + if silent: + logger.setLevel(100) + else: + logger.setLevel(level) + return logger + + +def remove_duplicates_from_playlist(entries): + videos = [] + s = set() + for p, video in enumerate(entries): + if video['id'] not in s: + videos.append(video) + s.add(video['id']) + return videos diff --git a/process/threads.py b/process/threads.py index 2d4c6dd..c1f1984 100644 --- a/process/threads.py +++ b/process/threads.py @@ -5,7 +5,28 @@ import time import numpy as np from tqdm.auto import tqdm -import automated_youtube_dl.yt_dlp as ydl +import ydl.yt_dlp as ydl +from process.funcs import setup_file_logger + + +class ytdl_logger(object): + errors = [] + + def __init__(self, logger): + self.logger = logger + + def debug(self, msg): + self.logger.info(msg) + + def info(self, msg): + self.logger.info(msg) + + def warning(self, msg): + self.logger.warning(msg) + + def error(self, msg): + self.logger.error(msg) + self.errors.append(msg) def is_manager_lock_locked(lock) -> bool: @@ -26,59 +47,56 @@ def download_video(args) -> dict: def progress_hook(d): # downloaded_bytes and total_bytes can be None if the download hasn't started yet. - if d['status'] == 'downloading' and d.get('downloaded_bytes') and d.get('total_bytes'): - downloaded_bytes = int(d['downloaded_bytes']) - total_bytes = int(d['total_bytes']) - if total_bytes > 0: - percent = (downloaded_bytes / total_bytes) * 100 - bar.update(int(np.round(percent - bar.n))) # If the progress bar doesn't end at 100% then round to 1 decimal place - bar.set_postfix({ - 'speed': d['_speed_str'], - 'size': f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}", - }) + if d['status'] == 'downloading': + if d.get('downloaded_bytes') and d.get('total_bytes'): + downloaded_bytes = int(d['downloaded_bytes']) + total_bytes = int(d['total_bytes']) + if total_bytes > 0: + percent = (downloaded_bytes / total_bytes) * 100 + bar.update(int(np.round(percent - bar.n))) # If the progress bar doesn't end at 100% then round to 1 decimal place + bar.set_postfix({ + 'speed': d['_speed_str'], + 'size': f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}", + }) video = args[0] kwargs = args[1] - bars = kwargs['bars'] - download_archive = kwargs['download_archive'] - - ydl_opts = kwargs['ydl_opts'] - ydl_opts['progress_hooks'] = [progress_hook] - yt_dlp = ydl.YDL(ydl_opts) + # Get a bar locked = False - # We're going to wait until a bar is available for us to use. - while not locked: - for item in bars: - if not is_manager_lock_locked(item[1]): - locked = item[1].acquire(timeout=0.1) # get the lock ASAP and don't wait if we didn't get it. - offset = item[0] - bar_lock = item[1] - break + if len(kwargs['bars']): + # We're going to wait until a bar is available for us to use. + while not locked: + for item in kwargs['bars']: + if not is_manager_lock_locked(item[1]): + locked = item[1].acquire(timeout=0.1) # get the lock ASAP and don't wait if we didn't get it. + offset = item[0] + bar_lock = item[1] + break + kwargs['ydl_opts']['progress_hooks'] = [progress_hook] + desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4))) + bar = tqdm(total=100, position=(offset if locked else None), desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]', leave=False) - # with bar_lock: - width, _ = os.get_terminal_size() - desc_with = int(np.round(width * (1 / 4))) - bar = tqdm(total=100, position=offset, desc=video['title'].ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]', leave=False) - output_dict = {'downloaded_video_id': None, 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} + ylogger = ytdl_logger(setup_file_logger(video['id'], kwargs['output_dir'] / f"[{video['id']}].log")) + kwargs['ydl_opts']['logger'] = ylogger + yt_dlp = ydl.YDL(kwargs['ydl_opts']) + output_dict = {'downloaded_video_id': None, 'blacklist_video_id': None, 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object start_time = time.time() - # if video['id'] in download_archive: - # output_dict['logger_msg'].append(f"{video['id']} already downloaded.") - # else: try: error_code = yt_dlp(video['url']) # Do the download if not error_code: - download_archive.append(video['id']) elapsed = round(math.ceil(time.time() - start_time) / 60, 2) - output_dict['logger_msg'].append(f"{video['id']} downloaded in {elapsed} min.") + output_dict['logger_msg'].append(f"{video['id']} '{video['title']}' downloaded in {elapsed} min.") output_dict['downloaded_video_id'] = video['id'] else: - m = f'Failed to download {video["id"]} {video["title"]}, error code: {error_code}' - output_dict['status_msg'].append(m) - output_dict['video_error_logger_msg'].append(m) + # m = f'{video["id"]} {video["title"]} -> Failed to download, error code: {error_code}' + # output_dict['status_msg'].append(m) + # output_dict['video_error_logger_msg'].append(m) + output_dict['video_error_logger_msg'] = output_dict['video_error_logger_msg'] + ylogger.errors except Exception as e: - output_dict['video_error_logger_msg'].append(f"Error on video {video['id']} '{video['title']}' -> {e}") - bar.close() - bar_lock.release() + output_dict['video_error_logger_msg'].append(f"EXCEPTION -> {e}") + if locked: + bar.close() + bar_lock.release() return output_dict diff --git a/automated_youtube_dl/__init__.py b/ydl/__init__.py similarity index 100% rename from automated_youtube_dl/__init__.py rename to ydl/__init__.py diff --git a/automated_youtube_dl/files.py b/ydl/files.py similarity index 100% rename from automated_youtube_dl/files.py rename to ydl/files.py diff --git a/automated_youtube_dl/yt_dlp.py b/ydl/yt_dlp.py similarity index 88% rename from automated_youtube_dl/yt_dlp.py rename to ydl/yt_dlp.py index 226b6ea..640bf58 100644 --- a/automated_youtube_dl/yt_dlp.py +++ b/ydl/yt_dlp.py @@ -43,7 +43,8 @@ class YDL: elif info['_type'] == 'video': # `info` doesn't seem to contain the `url` key so we'll add it manually. # If any issues arise in the future make sure to double check there isn't any weirdness going on here. - entries.append(dict(**info, **{'url': f"https://www.youtube.com/watch?v={info['id']}"})) + entries.append(info) + entries[0]['url'] = f"https://www.youtube.com/watch?v={info['id']}" else: raise ValueError(f"Unknown media type: {info['_type']}") return { @@ -66,3 +67,17 @@ def update_ytdlp(): subprocess.run('if pip list --outdated | grep -q yt-dlp; then pip install --upgrade yt-dlp; fi', shell=True) new = subprocess.check_output('pip freeze | grep yt-dlp', shell=True).decode().strip('\n') return old != new + + +class ytdl_no_logger(object): + def debug(self, msg): + return + + def info(self, msg): + return + + def warning(self, msg): + return + + def error(self, msg): + return