2023-01-20 21:42:36 -07:00
|
|
|
import math
|
2023-02-03 01:10:04 -07:00
|
|
|
import multiprocessing
|
2023-01-20 21:42:36 -07:00
|
|
|
import os
|
2023-05-06 15:30:34 -06:00
|
|
|
import subprocess
|
2023-02-03 01:10:04 -07:00
|
|
|
import sys
|
2023-01-20 21:42:36 -07:00
|
|
|
import time
|
2023-02-06 23:01:47 -07:00
|
|
|
import traceback
|
2023-02-02 20:35:37 -07:00
|
|
|
from multiprocessing import Manager
|
2023-02-07 20:52:13 -07:00
|
|
|
from pathlib import Path
|
2023-02-02 20:35:37 -07:00
|
|
|
from threading import Thread
|
2023-01-20 21:42:36 -07:00
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
from tqdm.auto import tqdm
|
2023-02-07 19:45:11 -07:00
|
|
|
from unidecode import unidecode
|
2023-01-20 21:42:36 -07:00
|
|
|
|
2023-01-21 18:19:03 -07:00
|
|
|
import ydl.yt_dlp as ydl
|
2023-05-06 14:23:20 -06:00
|
|
|
from process.funcs import remove_special_chars_linux, setup_file_logger
|
2023-01-21 18:19:03 -07:00
|
|
|
|
|
|
|
|
|
|
|
class ytdl_logger(object):
|
|
|
|
errors = []
|
|
|
|
|
2023-02-02 20:35:37 -07:00
|
|
|
def __init__(self, logger=None):
|
2023-01-21 18:19:03 -07:00
|
|
|
self.logger = logger
|
2023-05-06 18:24:47 -06:00
|
|
|
# logging.basicConfig(level=logging.DEBUG)
|
|
|
|
# self.logger = logging
|
|
|
|
# self.logger.info('testlog')
|
2023-01-21 18:19:03 -07:00
|
|
|
|
|
|
|
def debug(self, msg):
|
2023-02-02 20:35:37 -07:00
|
|
|
if self.logger:
|
|
|
|
self.logger.info(msg)
|
2023-01-21 18:19:03 -07:00
|
|
|
|
|
|
|
def info(self, msg):
|
2023-02-02 20:35:37 -07:00
|
|
|
if self.logger:
|
|
|
|
self.logger.info(msg)
|
2023-01-21 18:19:03 -07:00
|
|
|
|
|
|
|
def warning(self, msg):
|
2023-02-02 20:35:37 -07:00
|
|
|
if self.logger:
|
|
|
|
self.logger.warning(msg)
|
2023-01-21 18:19:03 -07:00
|
|
|
|
|
|
|
def error(self, msg):
|
2023-02-02 20:35:37 -07:00
|
|
|
if self.logger:
|
|
|
|
self.logger.error(msg)
|
|
|
|
self.errors.append(msg)
|
2023-01-20 21:42:36 -07:00
|
|
|
|
|
|
|
|
|
|
|
def is_manager_lock_locked(lock) -> bool:
|
|
|
|
"""
|
|
|
|
Manager().Lock().aquire() takes blocking, not block.
|
|
|
|
"""
|
|
|
|
locked = lock.acquire(blocking=False)
|
|
|
|
if not locked:
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
lock.release()
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2023-05-06 18:24:47 -06:00
|
|
|
name_max = int(subprocess.check_output("getconf NAME_MAX /", shell=True).decode()) - 30
|
2023-05-06 15:30:34 -06:00
|
|
|
|
|
|
|
|
2023-01-20 21:42:36 -07:00
|
|
|
def download_video(args) -> dict:
|
2023-01-20 22:47:18 -07:00
|
|
|
# Sleep for a little bit to space out the rush of workers flooding the bar locks.
|
|
|
|
# time.sleep(random.randint(1, 20) / 1000)
|
|
|
|
|
2023-01-20 21:42:36 -07:00
|
|
|
def progress_hook(d):
|
|
|
|
# downloaded_bytes and total_bytes can be None if the download hasn't started yet.
|
2023-01-21 18:19:03 -07:00
|
|
|
if d['status'] == 'downloading':
|
|
|
|
if d.get('downloaded_bytes') and d.get('total_bytes'):
|
|
|
|
downloaded_bytes = int(d['downloaded_bytes'])
|
|
|
|
total_bytes = int(d['total_bytes'])
|
|
|
|
if total_bytes > 0:
|
|
|
|
percent = (downloaded_bytes / total_bytes) * 100
|
|
|
|
bar.update(int(np.round(percent - bar.n))) # If the progress bar doesn't end at 100% then round to 1 decimal place
|
|
|
|
bar.set_postfix({
|
|
|
|
'speed': d['_speed_str'],
|
|
|
|
'size': f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}",
|
|
|
|
})
|
2023-01-20 21:42:36 -07:00
|
|
|
|
|
|
|
video = args[0]
|
|
|
|
kwargs = args[1]
|
2023-05-06 18:24:47 -06:00
|
|
|
|
|
|
|
# Clean the strings of forign languages
|
|
|
|
video['title'] = unidecode(video['title'])
|
|
|
|
video['uploader'] = unidecode(video['uploader'])
|
|
|
|
|
2023-02-07 19:31:38 -07:00
|
|
|
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_critical_err_msg': [], 'video_critical_err_msg_short': [], 'status_msg': [], 'logger_msg': []} # empty object
|
2023-02-07 00:13:44 -07:00
|
|
|
|
2023-02-07 13:15:59 -07:00
|
|
|
if not kwargs['ignore_downloaded'] and not video['channel_id'] or not video['channel'] or not video['channel_url']:
|
2023-02-07 00:13:44 -07:00
|
|
|
if video['duration'] or isinstance(video['view_count'], int):
|
|
|
|
# Sometimes videos don't have channel_id, channel, or channel_url but are actually valid. Like shorts.
|
|
|
|
pass
|
|
|
|
else:
|
2023-02-07 19:31:38 -07:00
|
|
|
output_dict['video_critical_err_msg_short'].append('unavailable.')
|
2023-02-07 00:13:44 -07:00
|
|
|
return output_dict
|
2023-01-20 21:42:36 -07:00
|
|
|
|
2023-01-21 18:19:03 -07:00
|
|
|
# Get a bar
|
2023-01-20 21:42:36 -07:00
|
|
|
locked = False
|
2023-01-21 18:19:03 -07:00
|
|
|
if len(kwargs['bars']):
|
2023-02-07 00:13:44 -07:00
|
|
|
while not locked: # We're going to wait until a bar is available for us to use.
|
2023-01-21 18:19:03 -07:00
|
|
|
for item in kwargs['bars']:
|
|
|
|
if not is_manager_lock_locked(item[1]):
|
2023-02-07 00:13:44 -07:00
|
|
|
locked = item[1].acquire(timeout=0.01) # get the lock ASAP and don't wait if we didn't get it.
|
2023-01-21 18:19:03 -07:00
|
|
|
offset = item[0]
|
|
|
|
bar_lock = item[1]
|
|
|
|
break
|
|
|
|
kwargs['ydl_opts']['progress_hooks'] = [progress_hook]
|
|
|
|
desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4)))
|
2023-02-03 01:10:04 -07:00
|
|
|
bar = tqdm(total=100, position=offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {elapsed}<{remaining}{postfix}', leave=False)
|
2023-01-21 18:19:03 -07:00
|
|
|
|
2023-01-20 21:42:36 -07:00
|
|
|
start_time = time.time()
|
|
|
|
|
2023-01-20 22:47:18 -07:00
|
|
|
try:
|
2023-02-02 20:35:37 -07:00
|
|
|
kwargs['ydl_opts']['logger'] = ytdl_logger() # dummy silent logger
|
|
|
|
yt_dlp = ydl.YDL(kwargs['ydl_opts'])
|
2023-02-07 00:13:44 -07:00
|
|
|
video_n = yt_dlp.get_info(video['url'])
|
|
|
|
if not video_n:
|
2023-02-10 13:15:28 -07:00
|
|
|
output_dict['video_critical_err_msg_short'].append('failed to get info. Unavailable?')
|
2023-02-07 00:13:44 -07:00
|
|
|
return output_dict
|
|
|
|
video_n['url'] = video['url']
|
|
|
|
video = video_n
|
|
|
|
del video_n
|
|
|
|
|
2023-05-06 18:24:47 -06:00
|
|
|
# We created a new dict
|
2023-02-07 20:52:13 -07:00
|
|
|
video['title'] = unidecode(video['title'])
|
|
|
|
video['uploader'] = unidecode(video['uploader'])
|
2023-05-06 18:24:47 -06:00
|
|
|
|
2023-05-06 15:30:34 -06:00
|
|
|
video_filename = remove_special_chars_linux(ydl.get_output_templ(video_id=video['id'], title=video['title'], uploader=video['uploader'], uploader_id=video['uploader_id'], include_ext=False), special_chars=['/'])
|
|
|
|
|
2023-05-06 18:24:47 -06:00
|
|
|
# Make sure the video title isn't too long
|
|
|
|
while len(video_filename) >= name_max - 3: # -3 so that I can add ...
|
|
|
|
video['title'] = video['title'][:-1]
|
|
|
|
video_filename = remove_special_chars_linux(
|
|
|
|
ydl.get_output_templ(
|
|
|
|
video_id=video['id'],
|
|
|
|
title=video['title'] + '...',
|
|
|
|
uploader=video['uploader'],
|
|
|
|
uploader_id=video['uploader_id'],
|
|
|
|
include_ext=False
|
|
|
|
), special_chars=['/'])
|
2023-05-06 15:30:34 -06:00
|
|
|
|
|
|
|
base_path = str(Path(kwargs['output_dir'], video_filename))
|
|
|
|
|
2023-02-07 20:52:13 -07:00
|
|
|
kwargs['ydl_opts']['outtmpl'] = f"{base_path}.%(ext)s"
|
|
|
|
|
|
|
|
# try:
|
|
|
|
# base_path = os.path.splitext(Path(kwargs['output_dir'], yt_dlp.prepare_filename(video)))[0]
|
|
|
|
# except AttributeError:
|
|
|
|
# # Sometimes we won't be able to pull the video info so just use the video's ID.
|
|
|
|
# base_path = kwargs['output_dir'] / video['id']
|
2023-05-06 14:38:40 -06:00
|
|
|
ylogger = ytdl_logger(setup_file_logger(video['id'], base_path + '.log'))
|
2023-02-02 20:35:37 -07:00
|
|
|
kwargs['ydl_opts']['logger'] = ylogger
|
|
|
|
yt_dlp = ydl.YDL(kwargs['ydl_opts']) # recreate the object with the correct logging path
|
2023-01-20 22:47:18 -07:00
|
|
|
error_code = yt_dlp(video['url']) # Do the download
|
|
|
|
if not error_code:
|
|
|
|
elapsed = round(math.ceil(time.time() - start_time) / 60, 2)
|
2023-02-07 19:50:45 -07:00
|
|
|
output_dict['logger_msg'].append(f"'{video['title']}' - Downloaded in {elapsed} min.")
|
2023-01-20 22:47:18 -07:00
|
|
|
output_dict['downloaded_video_id'] = video['id']
|
|
|
|
else:
|
2023-02-07 19:31:38 -07:00
|
|
|
output_dict['video_critical_err_msg'] = output_dict['video_critical_err_msg'] + ylogger.errors
|
2023-02-07 00:13:44 -07:00
|
|
|
except Exception:
|
2023-02-07 19:31:38 -07:00
|
|
|
output_dict['video_critical_err_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
|
2023-02-06 21:33:24 -07:00
|
|
|
if locked:
|
|
|
|
bar.update(100 - bar.n)
|
2023-01-21 18:19:03 -07:00
|
|
|
if locked:
|
|
|
|
bar.close()
|
|
|
|
bar_lock.release()
|
2023-01-20 21:42:36 -07:00
|
|
|
return output_dict
|
2023-02-02 20:35:37 -07:00
|
|
|
|
|
|
|
|
|
|
|
def bar_eraser(video_bars, eraser_exit):
|
|
|
|
manager = Manager()
|
|
|
|
queue = manager.dict()
|
|
|
|
queue_lock = manager.Lock()
|
|
|
|
|
|
|
|
def eraser():
|
|
|
|
nonlocal queue
|
2023-02-03 01:10:04 -07:00
|
|
|
try:
|
|
|
|
while not eraser_exit.value:
|
|
|
|
for i in queue.keys():
|
|
|
|
if eraser_exit.value:
|
|
|
|
return
|
|
|
|
i = int(i)
|
|
|
|
lock = video_bars[i][1].acquire(timeout=0.1)
|
|
|
|
bar_lock = video_bars[i][1]
|
|
|
|
if lock:
|
|
|
|
bar = tqdm(position=video_bars[i][0], leave=False, bar_format='\x1b[2K')
|
|
|
|
bar.close()
|
|
|
|
with queue_lock:
|
|
|
|
del queue_dict[i]
|
|
|
|
queue = queue_dict
|
|
|
|
bar_lock.release()
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
sys.exit(0)
|
|
|
|
except multiprocessing.managers.RemoteError:
|
|
|
|
sys.exit(0)
|
|
|
|
except SystemExit:
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
try:
|
|
|
|
Thread(target=eraser).start()
|
2023-02-02 20:35:37 -07:00
|
|
|
while not eraser_exit.value:
|
2023-02-03 01:10:04 -07:00
|
|
|
for i, item in enumerate(video_bars):
|
2023-02-02 20:35:37 -07:00
|
|
|
if eraser_exit.value:
|
|
|
|
return
|
2023-02-03 01:10:04 -07:00
|
|
|
if is_manager_lock_locked(item[1]):
|
2023-02-02 20:35:37 -07:00
|
|
|
with queue_lock:
|
2023-02-03 01:10:04 -07:00
|
|
|
queue_dict = queue
|
|
|
|
queue_dict[i] = True
|
2023-02-02 20:35:37 -07:00
|
|
|
queue = queue_dict
|
2023-02-03 01:10:04 -07:00
|
|
|
except KeyboardInterrupt:
|
|
|
|
sys.exit(0)
|
|
|
|
except multiprocessing.managers.RemoteError:
|
|
|
|
sys.exit(0)
|
|
|
|
except SystemExit:
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
|
|
|
|
class ServiceExit(Exception):
|
|
|
|
"""
|
|
|
|
Custom exception which is used to trigger the clean exit
|
|
|
|
of all running threads and the main program.
|
|
|
|
"""
|
|
|
|
pass
|