daemon mode, fix filename issues, log to individual files, better logging

This commit is contained in:
Cyberes 2023-01-21 18:19:03 -07:00
parent 1a2746ce1e
commit 6929e36c57
8 changed files with 256 additions and 140 deletions

View File

@ -3,13 +3,7 @@
`/home/user/youtubedl-daemon.sh`
```bash
#!/bin/bash
SLEEP_TIME="60m"
while true; do
/usr/bin/python3 /home/user/automated-youtube-dl/downloader.py --daemon "https://www.youtube.com/playlist?list=example12345" "/mnt/nfs/archive/YouTube/Example Playlist/"
echo -e "\nSleeping for $SLEEP_TIME"
sleep $SLEEP_TIME
echo -e "\n"
done
/usr/bin/python3 /home/user/automated-youtube-dl/downloader.py --daemon --sleep 60 "https://www.youtube.com/playlist?list=example12345" "/mnt/nfs/archive/YouTube/Example Playlist/"
```

View File

@ -18,6 +18,7 @@ I have a single, very large playlist that I add any videos I like to. On my NAS
- Simple display using `tqdm`.
- Limit the size of the downloaded videos.
- Parallel downloads.
- Daemon mode.
### Installation
@ -30,6 +31,12 @@ pip install -r requirements.txt
`./downloader.py <URL to download or path of a file containing the URLs of the videos to download> <output directory>`
To run as a daemon, do:
`/usr/bin/python3 /home/user/automated-youtube-dl/downloader.py --daemon --sleep 60 <url> <ouput folder>`
`--sleep` is how many minutes to sleep after completing all downloads.
#### Folder Structure
```
@ -56,7 +63,6 @@ Videos will be saved using this name format:
| `--no-update` | `-n` | Don\'t update yt-dlp at launch. |
| `--max-size` | | Max allowed size of a video in MB. Default: 1100. |
| `--rm-cache` | `-r` | Delete the yt-dlp cache on start. |
| `--backwards` | `-b` | Reverse all playlists and download in backwards order. |
| `--threads` | | How many download processes to use (threads). Default is how many CPU cores you have. You will want to find a good value that doesn't overload your connection. |
| `--daemon` | `-d` | Run in daemon mode. Disables progress bars. |
| `--daemon` | `-d` | Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep. |
| `--sleep` | | How many minutes to sleep when in daemon mode. |

View File

@ -11,10 +11,13 @@ from multiprocessing import Manager, Pool, cpu_count
from tqdm.auto import tqdm
import automated_youtube_dl.yt_dlp as ydl
from automated_youtube_dl.files import create_directories, resolve_path
from process.funcs import restart_program, setup_file_logger
import ydl.yt_dlp as ydl
from process.funcs import get_silent_logger, remove_duplicates_from_playlist, restart_program, setup_file_logger
from process.threads import download_video
from ydl.files import create_directories, resolve_path
# logging.basicConfig(level=1000)
# logging.getLogger().setLevel(1000)
urlRegex = re.compile(
r'^(?:http|ftp)s?://' # http:// or https://
@ -30,9 +33,10 @@ parser.add_argument('output', help='Output directory.')
parser.add_argument('--no-update', '-n', action='store_true', help='Don\'t update yt-dlp at launch.')
parser.add_argument('--max-size', type=int, default=1100, help='Max allowed size of a video in MB.')
parser.add_argument('--rm-cache', '-r', action='store_true', help='Delete the yt-dlp cache on start.')
parser.add_argument('--backwards', '-b', action='store_true', help='Reverse all playlists and download in backwards order.')
parser.add_argument('--threads', type=int, default=cpu_count(), help='How many download processes to use.')
parser.add_argument('--daemon', '-d', action='store_true', help="Run in daemon mode. Disables progress bars and prints output that's good for journalctl.")
parser.add_argument('--daemon', '-d', action='store_true', help="Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep.")
parser.add_argument('--sleep', type=float, default=60, help='How many minutes to sleep when in daemon mode.')
parser.add_argument('--silence-errors', '-s', action='store_true', help="Don't print any error messages to the console.")
args = parser.parse_args()
if args.threads <= 0:
@ -67,33 +71,57 @@ if not args.no_update:
if args.rm_cache:
subprocess.run('yt-dlp --rm-cache-dir', shell=True)
if args.daemon:
print('Running in daemon mode.')
log_dir = args.output / 'logs'
create_directories(args.output, log_dir)
logger = setup_file_logger('youtube_dl', log_dir / f'youtube_dl-{str(int(log_time))}.log', level=logging.INFO)
file_logger = setup_file_logger('youtube_dl', log_dir / f'youtube_dl-{str(int(log_time))}.log', level=logging.INFO)
video_error_logger = setup_file_logger('youtube_dl_video_errors', log_dir / f'youtube_dl-errors-{int(log_time)}.log', level=logging.INFO)
logger = get_silent_logger('yt-dl', silent=not args.daemon)
logger.info(f'Starting process.')
def log_info_twice(msg):
logger.info(msg)
file_logger.info(msg)
log_info_twice('Starting process.')
start_time = time.time()
manager = Manager()
# Find existing videos to skip.
download_archive_file = args.output / 'download-archive.log'
if not download_archive_file.exists():
download_archive_file.touch()
with open(download_archive_file, 'r') as file:
download_archive = manager.list([line.rstrip() for line in file])
print('Found', len(download_archive), 'downloaded videos.')
def load_existing_videos():
# Find existing videos.
output = set()
if not download_archive_file.exists():
download_archive_file.touch()
with open(download_archive_file, 'r') as file:
output.update(([line.rstrip() for line in file]))
return output
downloaded_videos = load_existing_videos()
print('Found', len(downloaded_videos), 'downloaded videos.')
# Create this object AFTER reading in the download_archive.
download_archive_logger = setup_file_logger('download_archive', download_archive_file, format_str='%(message)s')
status_bar = tqdm(position=2, bar_format='{desc}')
status_bar = tqdm(position=2, bar_format='{desc}', disable=args.daemon)
def log_bar(msg, level):
status_bar.write(f'[{level}] {msg}')
if level == 'warning':
logger.warning(msg)
elif level == 'error':
logger.error(msg)
else:
logger.info(msg)
def print_without_paths(msg):
@ -104,127 +132,158 @@ def print_without_paths(msg):
if m:
msg = m.group(1)
m1 = re.match(r'^(.*?): ', msg)
status_bar.set_description_str(msg.strip('to "').strip('to: ').strip())
msg = msg.strip('to "').strip('to: ').strip()
if args.daemon:
log_info_twice(msg)
else:
status_bar.set_description_str(msg)
class ytdl_logger(object):
def debug(self, msg):
logger.debug(msg)
file_logger.debug(msg)
# if msg.startswith('[debug] '):
# pass
if '[download]' not in msg:
print_without_paths(msg)
def info(self, msg):
logger.info(msg)
file_logger.info(msg)
print_without_paths(msg)
def warning(self, msg):
logger.warning(msg)
file_logger.warning(msg)
log_bar(msg, 'warning')
def error(self, msg):
logger.error(msg)
file_logger.error(msg)
log_bar(msg, 'error')
class ytdl_no_logger(object):
def debug(self, msg):
return
def info(self, msg):
return
def warning(self, msg):
return
def error(self, msg):
return
# https://github.com/yt-dlp/yt-dlp#embedding-examples
ydl_opts = {
'format': f'(bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=1080]/bestvideo[filesize<{args.max_size}M][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=720]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=720]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=720]/bestvideo[filesize<{args.max_size}M][height>=720]/bestvideo[filesize<{args.max_size}M])+(bestaudio[acodec=opus]/bestaudio)/best',
'outtmpl': f'{args.output}/%(title)s --- %(uploader)s --- %(uploader_id)s --- %(id)s',
'outtmpl': f'{args.output}/[%(id)s] [%(title)s] [%(uploader)s] [%(uploader_id)s].%(ext)s', # leading dash can cause issues due to bash args so we surround the variables in brackets
'merge_output_format': 'mkv',
'logtostderr': True,
'embedchapters': True,
# 'writethumbnail': True, # Save the thumbnail to a file. Embedding seems to be broken right now so this is an alternative.
'writethumbnail': True, # Save the thumbnail to a file. Embedding seems to be broken right now so this is an alternative.
'embedthumbnail': True,
'embeddescription': True,
'writesubtitles': True,
# 'allsubtitles': True, # Download every language.
'subtitlesformat': 'vtt',
'subtitleslangs': ['en'],
'writeautomaticsub': True,
# 'writedescription': True,
'ignoreerrors': True,
'continuedl': False,
'addmetadata': True,
'writeinfojson': True,
'postprocessors': [
{'key': 'FFmpegEmbedSubtitle'},
{'key': 'FFmpegMetadata', 'add_metadata': True},
{'key': 'EmbedThumbnail', 'already_have_thumbnail': True},
# {'key': 'FFmpegSubtitlesConvertor', 'format': 'srt'}
],
}
main_opts = dict(ydl_opts, **{'logger': ytdl_logger()})
thread_opts = dict(ydl_opts, **{'logger': ytdl_no_logger()})
# thread_opts = dict(ydl_opts, **{'logger': ydl.ytdl_no_logger()})
yt_dlp = ydl.YDL(main_opts)
# Init bars
playlist_bar = tqdm(position=1, desc='Playlist')
playlist_bar = tqdm(position=1, desc='Playlist', disable=args.daemon)
video_bars = manager.list()
for i in range(args.threads):
video_bars.append([
3 + i,
manager.Lock()
])
if not args.daemon:
for i in range(args.threads):
video_bars.append([
3 + i,
manager.Lock()
])
for i, target_url in tqdm(enumerate(url_list), total=len(url_list), position=0, desc='Inputs'):
playlist = yt_dlp.playlist_contents(target_url)
logger.info(f"Downloading item: '{playlist['title']}' {target_url}")
playlist_bar.total = len(playlist['entries'])
playlist_bar.set_description(playlist['title'])
while True:
for i, target_url in tqdm(enumerate(url_list), total=len(url_list), position=0, desc='Inputs', disable=args.daemon):
logger.info('Fetching playlist...')
playlist = yt_dlp.playlist_contents(target_url)
playlist['entries'] = remove_duplicates_from_playlist(playlist['entries'])
encountered_errors = 0
errored_videos = 0
# Remove already downloaded files from the to-do list.
download_queue = []
for video in playlist['entries']:
if video['id'] not in download_archive:
download_queue.append(video)
else:
logger.info(f"{video['id']} already downloaded.")
playlist_bar.update(len(playlist['entries']) - len(download_queue))
log_info_twice(f"Downloading item: '{playlist['title']}' {target_url}")
if args.backwards:
download_queue.reverse()
playlist_bar.total = len(playlist['entries'])
playlist_bar.set_description(playlist['title'])
if len(download_queue): # Don't mess with multiprocessing if the list is empty
with Pool(processes=args.threads) as pool:
status_bar.set_description_str('=' * os.get_terminal_size()[0])
for result in pool.imap_unordered(download_video,
((video, {
'bars': video_bars,
'download_archive': download_archive,
'ydl_opts': thread_opts,
}) for video in download_queue)):
# Print stuff
if result['downloaded_video_id']:
download_archive_logger.info(result['downloaded_video_id'])
if len(result['video_error_logger_msg']):
# print(playlist['entries'][0])
# sys.exit()
# Remove already downloaded files from the to-do list.
download_queue = []
s = set()
for p, video in enumerate(playlist['entries']):
if video['id'] not in downloaded_videos and video['id'] not in s:
download_queue.append(video)
s.add(video['id'])
playlist_bar.update(len(downloaded_videos))
if len(download_queue): # Don't mess with multiprocessing if all videos are already downloaded
with Pool(processes=args.threads) as pool:
status_bar.set_description_str('=' * os.get_terminal_size()[0])
logger.info('Starting downloads...')
for result in pool.imap_unordered(download_video,
((video, {
'bars': video_bars,
'ydl_opts': ydl_opts,
'output_dir': args.output,
}) for video in download_queue)):
# Save the video ID to the file
if result['downloaded_video_id']:
download_archive_logger.info(result['downloaded_video_id'])
# Print stuff
for line in result['video_error_logger_msg']:
video_error_logger.info(line)
if len(result['status_msg']):
for line in result['status_msg']:
playlist_bar.write(f"{result['downloaded_video_id']}: {line}")
if len(result['logger_msg']):
file_logger.error(line)
encountered_errors += 1
if not args.silence_errors:
if args.daemon:
logger.error(line)
else:
playlist_bar.write(line)
if len(result['video_error_logger_msg']):
errored_videos += 1
# for line in result['status_msg']:
# playlist_bar.write(line)
for line in result['logger_msg']:
logger.info(line)
playlist_bar.update()
log_info_twice(line)
playlist_bar.update()
else:
playlist_bar.write(f"All videos already downloaded for '{playlist['title']}'.")
error_msg = f'Encountered {encountered_errors} errors on {errored_videos} videos.'
if args.daemon:
logger.info(error_msg)
else:
playlist_bar.write(error_msg)
log_info_twice(f"Finished item: '{playlist['title']}' {target_url}")
log_info_twice(f"Finished process in {round(math.ceil(time.time() - start_time) / 60, 2)} min.")
if not args.daemon:
break
else:
playlist_bar.write(f"All videos already downloaded for '{playlist['title']}'")
# playlist_bar.update(playlist_bar.total - playlist_bar.n)
logger.info(f"Finished item: '{playlist['title']}' {target_url}")
logger.info(f"Finished process in {round(math.ceil(time.time() - start_time) / 60, 2)} min.")
logger.info(f'Sleeping for {args.sleep} min.')
try:
time.sleep(args.sleep * 60)
except KeyboardInterrupt:
sys.exit()
downloaded_videos = load_existing_videos() # reload the videos that have already been downloaded
# Erase the status bar.
status_bar.set_description_str('\x1b[2KDone!')
status_bar.refresh()
# Clean up the remaining bars. Have to close them in order.
status_bar.set_description_str('\x1b[2KDone!') # erase the status bar
status_bar.refresh()
playlist_bar.close()
status_bar.close()

View File

@ -22,17 +22,41 @@ def restart_program():
os.execl(python, python, *sys.argv)
def setup_file_logger(name, log_file, level=logging.INFO, format_str: str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s', filemode='a'):
def setup_file_logger(name, log_file, level=logging.INFO, format_str: str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s', filemode='a', no_console: bool = True):
formatter = logging.Formatter(format_str)
handler = logging.FileHandler(log_file, mode=filemode)
handler.setFormatter(formatter)
logger = logging.getLogger(name)
logger.setLevel(level)
handler = logging.FileHandler(log_file, mode=filemode)
handler.setLevel(level)
handler.setFormatter(formatter)
logger.addHandler(handler)
# Silence console logging
console = logging.StreamHandler(sys.stdout)
console.setLevel(100)
# if no_console:
# console = logging.StreamHandler()
# console.setLevel(100)
return logger
def get_silent_logger(name, level=logging.INFO, format_str: str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s', silent: bool = True):
logger = logging.getLogger(name)
console = logging.StreamHandler()
console.setFormatter(logging.Formatter(format_str))
logger.addHandler(console)
if silent:
logger.setLevel(100)
else:
logger.setLevel(level)
return logger
def remove_duplicates_from_playlist(entries):
videos = []
s = set()
for p, video in enumerate(entries):
if video['id'] not in s:
videos.append(video)
s.add(video['id'])
return videos

View File

@ -5,7 +5,28 @@ import time
import numpy as np
from tqdm.auto import tqdm
import automated_youtube_dl.yt_dlp as ydl
import ydl.yt_dlp as ydl
from process.funcs import setup_file_logger
class ytdl_logger(object):
errors = []
def __init__(self, logger):
self.logger = logger
def debug(self, msg):
self.logger.info(msg)
def info(self, msg):
self.logger.info(msg)
def warning(self, msg):
self.logger.warning(msg)
def error(self, msg):
self.logger.error(msg)
self.errors.append(msg)
def is_manager_lock_locked(lock) -> bool:
@ -26,59 +47,56 @@ def download_video(args) -> dict:
def progress_hook(d):
# downloaded_bytes and total_bytes can be None if the download hasn't started yet.
if d['status'] == 'downloading' and d.get('downloaded_bytes') and d.get('total_bytes'):
downloaded_bytes = int(d['downloaded_bytes'])
total_bytes = int(d['total_bytes'])
if total_bytes > 0:
percent = (downloaded_bytes / total_bytes) * 100
bar.update(int(np.round(percent - bar.n))) # If the progress bar doesn't end at 100% then round to 1 decimal place
bar.set_postfix({
'speed': d['_speed_str'],
'size': f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}",
})
if d['status'] == 'downloading':
if d.get('downloaded_bytes') and d.get('total_bytes'):
downloaded_bytes = int(d['downloaded_bytes'])
total_bytes = int(d['total_bytes'])
if total_bytes > 0:
percent = (downloaded_bytes / total_bytes) * 100
bar.update(int(np.round(percent - bar.n))) # If the progress bar doesn't end at 100% then round to 1 decimal place
bar.set_postfix({
'speed': d['_speed_str'],
'size': f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}",
})
video = args[0]
kwargs = args[1]
bars = kwargs['bars']
download_archive = kwargs['download_archive']
ydl_opts = kwargs['ydl_opts']
ydl_opts['progress_hooks'] = [progress_hook]
yt_dlp = ydl.YDL(ydl_opts)
# Get a bar
locked = False
# We're going to wait until a bar is available for us to use.
while not locked:
for item in bars:
if not is_manager_lock_locked(item[1]):
locked = item[1].acquire(timeout=0.1) # get the lock ASAP and don't wait if we didn't get it.
offset = item[0]
bar_lock = item[1]
break
if len(kwargs['bars']):
# We're going to wait until a bar is available for us to use.
while not locked:
for item in kwargs['bars']:
if not is_manager_lock_locked(item[1]):
locked = item[1].acquire(timeout=0.1) # get the lock ASAP and don't wait if we didn't get it.
offset = item[0]
bar_lock = item[1]
break
kwargs['ydl_opts']['progress_hooks'] = [progress_hook]
desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4)))
bar = tqdm(total=100, position=(offset if locked else None), desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]', leave=False)
# with bar_lock:
width, _ = os.get_terminal_size()
desc_with = int(np.round(width * (1 / 4)))
bar = tqdm(total=100, position=offset, desc=video['title'].ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]', leave=False)
output_dict = {'downloaded_video_id': None, 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []}
ylogger = ytdl_logger(setup_file_logger(video['id'], kwargs['output_dir'] / f"[{video['id']}].log"))
kwargs['ydl_opts']['logger'] = ylogger
yt_dlp = ydl.YDL(kwargs['ydl_opts'])
output_dict = {'downloaded_video_id': None, 'blacklist_video_id': None, 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object
start_time = time.time()
# if video['id'] in download_archive:
# output_dict['logger_msg'].append(f"{video['id']} already downloaded.")
# else:
try:
error_code = yt_dlp(video['url']) # Do the download
if not error_code:
download_archive.append(video['id'])
elapsed = round(math.ceil(time.time() - start_time) / 60, 2)
output_dict['logger_msg'].append(f"{video['id']} downloaded in {elapsed} min.")
output_dict['logger_msg'].append(f"{video['id']} '{video['title']}' downloaded in {elapsed} min.")
output_dict['downloaded_video_id'] = video['id']
else:
m = f'Failed to download {video["id"]} {video["title"]}, error code: {error_code}'
output_dict['status_msg'].append(m)
output_dict['video_error_logger_msg'].append(m)
# m = f'{video["id"]} {video["title"]} -> Failed to download, error code: {error_code}'
# output_dict['status_msg'].append(m)
# output_dict['video_error_logger_msg'].append(m)
output_dict['video_error_logger_msg'] = output_dict['video_error_logger_msg'] + ylogger.errors
except Exception as e:
output_dict['video_error_logger_msg'].append(f"Error on video {video['id']} '{video['title']}' -> {e}")
bar.close()
bar_lock.release()
output_dict['video_error_logger_msg'].append(f"EXCEPTION -> {e}")
if locked:
bar.close()
bar_lock.release()
return output_dict

View File

@ -43,7 +43,8 @@ class YDL:
elif info['_type'] == 'video':
# `info` doesn't seem to contain the `url` key so we'll add it manually.
# If any issues arise in the future make sure to double check there isn't any weirdness going on here.
entries.append(dict(**info, **{'url': f"https://www.youtube.com/watch?v={info['id']}"}))
entries.append(info)
entries[0]['url'] = f"https://www.youtube.com/watch?v={info['id']}"
else:
raise ValueError(f"Unknown media type: {info['_type']}")
return {
@ -66,3 +67,17 @@ def update_ytdlp():
subprocess.run('if pip list --outdated | grep -q yt-dlp; then pip install --upgrade yt-dlp; fi', shell=True)
new = subprocess.check_output('pip freeze | grep yt-dlp', shell=True).decode().strip('\n')
return old != new
class ytdl_no_logger(object):
def debug(self, msg):
return
def info(self, msg):
return
def warning(self, msg):
return
def error(self, msg):
return