This commit is contained in:
parent
ad1d57d443
commit
b38532926b
|
@ -3,13 +3,7 @@
|
|||
`/home/user/youtubedl-daemon.sh`
|
||||
```bash
|
||||
#!/bin/bash
|
||||
SLEEP_TIME="60m"
|
||||
while true; do
|
||||
/usr/bin/python3 /home/user/automated-youtube-dl/downloader.py --daemon "https://www.youtube.com/playlist?list=example12345" "/mnt/nfs/archive/YouTube/Example Playlist/"
|
||||
echo -e "\nSleeping for $SLEEP_TIME"
|
||||
sleep $SLEEP_TIME
|
||||
echo -e "\n"
|
||||
done
|
||||
/usr/bin/python3 /home/user/automated-youtube-dl/downloader.py --daemon --sleep 60 "https://www.youtube.com/playlist?list=example12345" "/mnt/nfs/archive/YouTube/Example Playlist/"
|
||||
```
|
||||
|
||||
|
||||
|
|
12
README.md
12
README.md
|
@ -18,6 +18,7 @@ I have a single, very large playlist that I add any videos I like to. On my NAS
|
|||
- Simple display using `tqdm`.
|
||||
- Limit the size of the downloaded videos.
|
||||
- Parallel downloads.
|
||||
- Daemon mode.
|
||||
|
||||
### Installation
|
||||
|
||||
|
@ -30,6 +31,12 @@ pip install -r requirements.txt
|
|||
|
||||
`./downloader.py <URL to download or path of a file containing the URLs of the videos to download> <output directory>`
|
||||
|
||||
To run as a daemon, do:
|
||||
|
||||
`/usr/bin/python3 /home/user/automated-youtube-dl/downloader.py --daemon --sleep 60 <url> <ouput folder>`
|
||||
|
||||
`--sleep` is how many minutes to sleep after completing all downloads.
|
||||
|
||||
#### Folder Structure
|
||||
|
||||
```
|
||||
|
@ -56,7 +63,6 @@ Videos will be saved using this name format:
|
|||
| `--no-update` | `-n` | Don\'t update yt-dlp at launch. |
|
||||
| `--max-size` | | Max allowed size of a video in MB. Default: 1100. |
|
||||
| `--rm-cache` | `-r` | Delete the yt-dlp cache on start. |
|
||||
| `--backwards` | `-b` | Reverse all playlists and download in backwards order. |
|
||||
| `--threads` | | How many download processes to use (threads). Default is how many CPU cores you have. You will want to find a good value that doesn't overload your connection. |
|
||||
| `--daemon` | `-d` | Run in daemon mode. Disables progress bars. |
|
||||
|
||||
| `--daemon` | `-d` | Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep. |
|
||||
| `--sleep` | | How many minutes to sleep when in daemon mode. |
|
114
downloader.py
114
downloader.py
|
@ -11,10 +11,10 @@ from multiprocessing import Manager, Pool, cpu_count
|
|||
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
import automated_youtube_dl.yt_dlp as ydl
|
||||
from automated_youtube_dl.files import create_directories, resolve_path
|
||||
from process.funcs import restart_program, setup_file_logger
|
||||
import ydl.yt_dlp as ydl
|
||||
from process.funcs import get_silent_logger, restart_program, setup_file_logger
|
||||
from process.threads import download_video
|
||||
from ydl.files import create_directories, resolve_path
|
||||
|
||||
urlRegex = re.compile(
|
||||
r'^(?:http|ftp)s?://' # http:// or https://
|
||||
|
@ -30,9 +30,9 @@ parser.add_argument('output', help='Output directory.')
|
|||
parser.add_argument('--no-update', '-n', action='store_true', help='Don\'t update yt-dlp at launch.')
|
||||
parser.add_argument('--max-size', type=int, default=1100, help='Max allowed size of a video in MB.')
|
||||
parser.add_argument('--rm-cache', '-r', action='store_true', help='Delete the yt-dlp cache on start.')
|
||||
parser.add_argument('--backwards', '-b', action='store_true', help='Reverse all playlists and download in backwards order.')
|
||||
parser.add_argument('--threads', type=int, default=cpu_count(), help='How many download processes to use.')
|
||||
parser.add_argument('--daemon', '-d', action='store_true', help="Run in daemon mode. Disables progress bars and prints output that's good for journalctl.")
|
||||
parser.add_argument('--daemon', '-d', action='store_true', help="Run in daemon mode. Disables progress bars sleeps for the amount of time specified in --sleep.")
|
||||
parser.add_argument('--sleep', type=int, default=60, help='How many minutes to sleep when in daemon mode.')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.threads <= 0:
|
||||
|
@ -70,32 +70,39 @@ if args.rm_cache:
|
|||
log_dir = args.output / 'logs'
|
||||
create_directories(args.output, log_dir)
|
||||
|
||||
logger = setup_file_logger('youtube_dl', log_dir / f'youtube_dl-{str(int(log_time))}.log', level=logging.INFO)
|
||||
file_logger = setup_file_logger('youtube_dl', log_dir / f'youtube_dl-{str(int(log_time))}.log', level=logging.INFO)
|
||||
video_error_logger = setup_file_logger('youtube_dl_video_errors', log_dir / f'youtube_dl-errors-{int(log_time)}.log', level=logging.INFO)
|
||||
logger = get_silent_logger('yt-dl', silent=not args.daemon)
|
||||
|
||||
logger.info(f'Starting process.')
|
||||
file_logger.info(f'Starting process.')
|
||||
start_time = time.time()
|
||||
|
||||
manager = Manager()
|
||||
|
||||
# Find existing videos to skip.
|
||||
downloaded_videos = set()
|
||||
download_archive_file = args.output / 'download-archive.log'
|
||||
if not download_archive_file.exists():
|
||||
download_archive_file.touch()
|
||||
with open(download_archive_file, 'r') as file:
|
||||
download_archive = manager.list([line.rstrip() for line in file])
|
||||
print('Found', len(download_archive), 'downloaded videos.')
|
||||
downloaded_videos.update(([line.rstrip() for line in file]))
|
||||
print('Found', len(downloaded_videos), 'downloaded videos.')
|
||||
|
||||
# Create this object AFTER reading in the download_archive.
|
||||
download_archive_logger = setup_file_logger('download_archive', download_archive_file, format_str='%(message)s')
|
||||
|
||||
status_bar = tqdm(position=2, bar_format='{desc}')
|
||||
status_bar = tqdm(position=2, bar_format='{desc}', disable=not args.daemon)
|
||||
|
||||
|
||||
def log_bar(msg, level):
|
||||
status_bar.write(f'[{level}] {msg}')
|
||||
|
||||
|
||||
def log_info_twice(msg):
|
||||
logger.info(msg)
|
||||
file_logger.info(msg)
|
||||
|
||||
|
||||
def print_without_paths(msg):
|
||||
"""
|
||||
Remove any filepaths or other stuff we don't want in the message.
|
||||
|
@ -104,44 +111,34 @@ def print_without_paths(msg):
|
|||
if m:
|
||||
msg = m.group(1)
|
||||
m1 = re.match(r'^(.*?): ', msg)
|
||||
status_bar.set_description_str(msg.strip('to "').strip('to: ').strip())
|
||||
msg = msg.strip('to "').strip('to: ').strip()
|
||||
if args.daemon:
|
||||
log_info_twice(msg)
|
||||
else:
|
||||
status_bar.set_description_str(msg)
|
||||
|
||||
|
||||
class ytdl_logger(object):
|
||||
def debug(self, msg):
|
||||
logger.debug(msg)
|
||||
file_logger.debug(msg)
|
||||
# if msg.startswith('[debug] '):
|
||||
# pass
|
||||
if '[download]' not in msg:
|
||||
print_without_paths(msg)
|
||||
|
||||
def info(self, msg):
|
||||
logger.info(msg)
|
||||
file_logger.info(msg)
|
||||
print_without_paths(msg)
|
||||
|
||||
def warning(self, msg):
|
||||
logger.warning(msg)
|
||||
file_logger.warning(msg)
|
||||
log_bar(msg, 'warning')
|
||||
|
||||
def error(self, msg):
|
||||
logger.error(msg)
|
||||
file_logger.error(msg)
|
||||
log_bar(msg, 'error')
|
||||
|
||||
|
||||
class ytdl_no_logger(object):
|
||||
def debug(self, msg):
|
||||
return
|
||||
|
||||
def info(self, msg):
|
||||
return
|
||||
|
||||
def warning(self, msg):
|
||||
return
|
||||
|
||||
def error(self, msg):
|
||||
return
|
||||
|
||||
|
||||
# https://github.com/yt-dlp/yt-dlp#embedding-examples
|
||||
ydl_opts = {
|
||||
'format': f'(bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=1080][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=1080]/bestvideo[filesize<{args.max_size}M][height>=1080]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=720][fps>30]/bestvideo[filesize<{args.max_size}M][vcodec^=av01][height>=720]/bestvideo[filesize<{args.max_size}M][vcodec=vp9.2][height>=720]/bestvideo[filesize<{args.max_size}M][vcodec=vp9][height>=720]/bestvideo[filesize<{args.max_size}M][height>=720]/bestvideo[filesize<{args.max_size}M])+(bestaudio[acodec=opus]/bestaudio)/best',
|
||||
|
@ -165,48 +162,48 @@ ydl_opts = {
|
|||
}
|
||||
|
||||
main_opts = dict(ydl_opts, **{'logger': ytdl_logger()})
|
||||
thread_opts = dict(ydl_opts, **{'logger': ytdl_no_logger()})
|
||||
thread_opts = dict(ydl_opts, **{'logger': ydl.ytdl_no_logger()})
|
||||
yt_dlp = ydl.YDL(main_opts)
|
||||
|
||||
# Init bars
|
||||
playlist_bar = tqdm(position=1, desc='Playlist')
|
||||
playlist_bar = tqdm(position=1, desc='Playlist', disable=not args.daemon)
|
||||
video_bars = manager.list()
|
||||
for i in range(args.threads):
|
||||
video_bars.append([
|
||||
3 + i,
|
||||
manager.Lock()
|
||||
])
|
||||
if args.daemon:
|
||||
for i in range(args.threads):
|
||||
video_bars.append([
|
||||
3 + i,
|
||||
manager.Lock()
|
||||
])
|
||||
|
||||
for i, target_url in tqdm(enumerate(url_list), total=len(url_list), position=0, desc='Inputs'):
|
||||
for i, target_url in tqdm(enumerate(url_list), total=len(url_list), position=0, desc='Inputs', disable=not args.daemon):
|
||||
playlist = yt_dlp.playlist_contents(target_url)
|
||||
logger.info(f"Downloading item: '{playlist['title']}' {target_url}")
|
||||
playlist_bar.total = len(playlist['entries'])
|
||||
download_queue = playlist['entries']
|
||||
|
||||
file_logger.info(f"Downloading item: '{playlist['title']}' {target_url}")
|
||||
|
||||
playlist_bar.total = len(download_queue)
|
||||
playlist_bar.set_description(playlist['title'])
|
||||
|
||||
# Remove already downloaded files from the to-do list.
|
||||
download_queue = []
|
||||
for video in playlist['entries']:
|
||||
if video['id'] not in download_archive:
|
||||
download_queue.append(video)
|
||||
else:
|
||||
logger.info(f"{video['id']} already downloaded.")
|
||||
playlist_bar.update(len(playlist['entries']) - len(download_queue))
|
||||
for p, video in enumerate(playlist):
|
||||
if video['id'] in downloaded_videos:
|
||||
del playlist[p]
|
||||
playlist_bar.update(len(download_queue))
|
||||
|
||||
if args.backwards:
|
||||
download_queue.reverse()
|
||||
|
||||
if len(download_queue): # Don't mess with multiprocessing if the list is empty
|
||||
if len(download_queue): # Don't mess with multiprocessing if all videos are already downloaded
|
||||
with Pool(processes=args.threads) as pool:
|
||||
status_bar.set_description_str('=' * os.get_terminal_size()[0])
|
||||
for result in pool.imap_unordered(download_video,
|
||||
((video, {
|
||||
'bars': video_bars,
|
||||
'download_archive': download_archive,
|
||||
'download_archive': downloaded_videos,
|
||||
'ydl_opts': thread_opts,
|
||||
}) for video in download_queue)):
|
||||
# Print stuff
|
||||
# Save the video ID to the file
|
||||
if result['downloaded_video_id']:
|
||||
download_archive_logger.info(result['downloaded_video_id'])
|
||||
|
||||
# Print stuff
|
||||
if len(result['video_error_logger_msg']):
|
||||
for line in result['video_error_logger_msg']:
|
||||
video_error_logger.info(line)
|
||||
|
@ -215,16 +212,17 @@ for i, target_url in tqdm(enumerate(url_list), total=len(url_list), position=0,
|
|||
playlist_bar.write(f"{result['downloaded_video_id']}: {line}")
|
||||
if len(result['logger_msg']):
|
||||
for line in result['logger_msg']:
|
||||
logger.info(line)
|
||||
log_info_twice(line)
|
||||
playlist_bar.update()
|
||||
else:
|
||||
playlist_bar.write(f"All videos already downloaded for '{playlist['title']}'")
|
||||
# playlist_bar.update(playlist_bar.total - playlist_bar.n)
|
||||
logger.info(f"Finished item: '{playlist['title']}' {target_url}")
|
||||
logger.info(f"Finished process in {round(math.ceil(time.time() - start_time) / 60, 2)} min.")
|
||||
file_logger.info(f"Finished item: '{playlist['title']}' {target_url}")
|
||||
file_logger.info(f"Finished process in {round(math.ceil(time.time() - start_time) / 60, 2)} min.")
|
||||
|
||||
# Erase the status bar.
|
||||
status_bar.set_description_str('\x1b[2KDone!')
|
||||
status_bar.refresh()
|
||||
|
||||
# Clean up the remaining bars. Have to close them in order.
|
||||
status_bar.set_description_str('\x1b[2KDone!') # erase the status bar
|
||||
status_bar.refresh()
|
||||
playlist_bar.close()
|
||||
status_bar.close()
|
||||
|
|
|
@ -22,7 +22,7 @@ def restart_program():
|
|||
os.execl(python, python, *sys.argv)
|
||||
|
||||
|
||||
def setup_file_logger(name, log_file, level=logging.INFO, format_str: str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s', filemode='a'):
|
||||
def setup_file_logger(name, log_file, level=logging.INFO, format_str: str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s', filemode='a', no_console: bool = True):
|
||||
formatter = logging.Formatter(format_str)
|
||||
handler = logging.FileHandler(log_file, mode=filemode)
|
||||
handler.setFormatter(formatter)
|
||||
|
@ -32,7 +32,18 @@ def setup_file_logger(name, log_file, level=logging.INFO, format_str: str = '%(a
|
|||
logger.addHandler(handler)
|
||||
|
||||
# Silence console logging
|
||||
console = logging.StreamHandler(sys.stdout)
|
||||
console.setLevel(100)
|
||||
if no_console:
|
||||
console = logging.StreamHandler(sys.stdout)
|
||||
console.setLevel(100)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
def get_silent_logger(name, level=logging.INFO, silent: bool = True):
|
||||
if silent:
|
||||
logger = logging.StreamHandler(sys.stdout)
|
||||
logger.setLevel(100)
|
||||
else:
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(level)
|
||||
return logger
|
||||
|
|
|
@ -5,7 +5,7 @@ import time
|
|||
import numpy as np
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
import automated_youtube_dl.yt_dlp as ydl
|
||||
import ydl.yt_dlp as ydl
|
||||
|
||||
|
||||
def is_manager_lock_locked(lock) -> bool:
|
||||
|
@ -40,36 +40,32 @@ def download_video(args) -> dict:
|
|||
video = args[0]
|
||||
kwargs = args[1]
|
||||
bars = kwargs['bars']
|
||||
download_archive = kwargs['download_archive']
|
||||
|
||||
ydl_opts = kwargs['ydl_opts']
|
||||
ydl_opts['progress_hooks'] = [progress_hook]
|
||||
yt_dlp = ydl.YDL(ydl_opts)
|
||||
|
||||
locked = False
|
||||
# We're going to wait until a bar is available for us to use.
|
||||
while not locked:
|
||||
for item in bars:
|
||||
if not is_manager_lock_locked(item[1]):
|
||||
locked = item[1].acquire(timeout=0.1) # get the lock ASAP and don't wait if we didn't get it.
|
||||
offset = item[0]
|
||||
bar_lock = item[1]
|
||||
break
|
||||
# Get a bar
|
||||
if len(bars):
|
||||
locked = False
|
||||
# We're going to wait until a bar is available for us to use.
|
||||
while not locked:
|
||||
for item in bars:
|
||||
if not is_manager_lock_locked(item[1]):
|
||||
locked = item[1].acquire(timeout=0.1) # get the lock ASAP and don't wait if we didn't get it.
|
||||
offset = item[0]
|
||||
bar_lock = item[1]
|
||||
break
|
||||
|
||||
# with bar_lock:
|
||||
width, _ = os.get_terminal_size()
|
||||
desc_with = int(np.round(width * (1 / 4)))
|
||||
bar = tqdm(total=100, position=offset, desc=video['title'].ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]', leave=False)
|
||||
output_dict = {'downloaded_video_id': None, 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []}
|
||||
bar = tqdm(total=100, position=(offset if len(bars) else None), desc=video['title'].ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]', leave=False, disable=not len(bars))
|
||||
output_dict = {'downloaded_video_id': None, 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object
|
||||
start_time = time.time()
|
||||
|
||||
# if video['id'] in download_archive:
|
||||
# output_dict['logger_msg'].append(f"{video['id']} already downloaded.")
|
||||
# else:
|
||||
try:
|
||||
error_code = yt_dlp(video['url']) # Do the download
|
||||
if not error_code:
|
||||
download_archive.append(video['id'])
|
||||
elapsed = round(math.ceil(time.time() - start_time) / 60, 2)
|
||||
output_dict['logger_msg'].append(f"{video['id']} downloaded in {elapsed} min.")
|
||||
output_dict['downloaded_video_id'] = video['id']
|
||||
|
|
|
@ -66,3 +66,17 @@ def update_ytdlp():
|
|||
subprocess.run('if pip list --outdated | grep -q yt-dlp; then pip install --upgrade yt-dlp; fi', shell=True)
|
||||
new = subprocess.check_output('pip freeze | grep yt-dlp', shell=True).decode().strip('\n')
|
||||
return old != new
|
||||
|
||||
|
||||
class ytdl_no_logger(object):
|
||||
def debug(self, msg):
|
||||
return
|
||||
|
||||
def info(self, msg):
|
||||
return
|
||||
|
||||
def warning(self, msg):
|
||||
return
|
||||
|
||||
def error(self, msg):
|
||||
return
|
Reference in New Issue