133 lines
6.8 KiB
Python
133 lines
6.8 KiB
Python
import datetime
|
|
import math
|
|
import subprocess
|
|
import time
|
|
import traceback
|
|
from pathlib import Path
|
|
|
|
import yt_dlp as ydl_ydl
|
|
from hurry.filesize import size
|
|
from unidecode import unidecode
|
|
|
|
import ydl.yt_dlp as ydl
|
|
from server.mysql import db_logger
|
|
from server.process.funcs import remove_special_chars_linux, sanitize_colored_string
|
|
from server.process.ytlogging import YtdlLogger
|
|
|
|
name_max = int(subprocess.check_output("getconf NAME_MAX /", shell=True).decode()) - 30
|
|
|
|
|
|
def download_video(video, ydl_opts, output_dir, ignore_downloaded, job) -> dict:
|
|
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_url': video['url'], 'video_critical_err_msg': [], 'video_critical_err_msg_short': [], 'status_msg': [], 'logger_msg': []} # empty object
|
|
try:
|
|
job_progress = job.new_progress_thread(video['id'])
|
|
job_progress['start_time'] = int(datetime.datetime.now(datetime.timezone.utc).timestamp() * 1e3)
|
|
|
|
def progress_hook(d):
|
|
if d['status'] == 'downloading': # Variables can be None if the download hasn't started yet.
|
|
if d.get('downloaded_bytes'):
|
|
# We want total_bytes but it may not exist so total_bytes_estimate is good too
|
|
if d.get('total_bytes'):
|
|
job_progress['total'] = d.get('total_bytes')
|
|
elif d.get('total_bytes_estimate'):
|
|
job_progress['total'] = d.get('total_bytes_estimate')
|
|
|
|
if job_progress['total']: # If yt-dlp has this data
|
|
job_progress['downloaded_bytes'] = int(d['downloaded_bytes'])
|
|
if job_progress['total'] > 0:
|
|
job_progress['percent'] = (job_progress['downloaded_bytes'] / job_progress['total']) * 100
|
|
# bar.update(int(np.round(percent - bar.n))) # If the progress bar doesn't end at 100% then round to 1 decimal place
|
|
job_progress['speed'] = sanitize_colored_string(d['_speed_str']).strip(' ')
|
|
job_progress['size'] = f"{size(d.get('downloaded_bytes'))}/{size(job_progress['total'])}"
|
|
# bar.set_postfix({
|
|
# 'speed': d['_speed_str'],
|
|
# 'size': f"{size(d.get('downloaded_bytes'))}/{size(total)}",
|
|
# })
|
|
else: # otherwise just use their internal variables
|
|
# bar.set_postfix({
|
|
# 'speed': d['_speed_str'],
|
|
# 'size': f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}",
|
|
# })
|
|
job_progress['speed'] = sanitize_colored_string(d['_speed_str']).strip(' ')
|
|
job_progress['size'] = f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}"
|
|
|
|
if not ignore_downloaded and not video['channel_id'] or not video['channel'] or not video['channel_url']:
|
|
if video['duration'] or isinstance(video['view_count'], int):
|
|
# Sometimes videos don't have channel_id, channel, or channel_url but are actually valid. Like shorts.
|
|
pass
|
|
else:
|
|
output_dict['video_critical_err_msg_short'].append('unavailable.')
|
|
return output_dict
|
|
|
|
# Clean of forign languages
|
|
video['title'] = unidecode(video['title'])
|
|
|
|
try:
|
|
# Get the video info
|
|
yt_dlp = ydl.YDL(dict(ydl_opts, **{'logger': YtdlLogger(name=video['id'], table='jobs', job_id=job.id)}))
|
|
video_n = yt_dlp.get_info(video['url'])
|
|
|
|
if not video_n:
|
|
output_dict['video_critical_err_msg_short'].append('failed to get info. Unavailable?')
|
|
return output_dict
|
|
|
|
video_n['url'] = video['url']
|
|
video = video_n
|
|
del video_n
|
|
|
|
# We created a new dict
|
|
video['title'] = unidecode(video['title'])
|
|
video['uploader'] = unidecode(video['uploader']) # now this info is present since we fetched it
|
|
|
|
# TODO: do we also need to remove the @ char?
|
|
video_filename = remove_special_chars_linux(
|
|
ydl.get_output_templ(video_id=video['id'], title=video['title'], uploader=video['uploader'], uploader_id=video['uploader_id'], include_ext=False), special_chars=['/']
|
|
)
|
|
|
|
# Make sure the video title isn't too long
|
|
while len(video_filename) >= name_max - 3: # -3 so that I can add ...
|
|
video['title'] = video['title'][:-1]
|
|
video_filename = remove_special_chars_linux(
|
|
ydl.get_output_templ(
|
|
video_id=video['id'],
|
|
title=video['title'] + '...',
|
|
uploader=video['uploader'],
|
|
uploader_id=video['uploader_id'],
|
|
include_ext=False
|
|
), special_chars=['/'])
|
|
|
|
base_path = str(Path(output_dir, video_filename))
|
|
|
|
ydl_opts['outtmpl'] = f"{base_path}.%(ext)s"
|
|
|
|
# try:
|
|
# base_path = os.path.splitext(Path(output_dir, yt_dlp.prepare_filename(video)))[0]
|
|
# except AttributeError:
|
|
# # Sometimes we won't be able to pull the video info so just use the video's ID.
|
|
# base_path = output_dir / video['id']
|
|
ylogger = YtdlLogger(name=video['id'], table='jobs', job_id=job.id)
|
|
ydl_opts['logger'] = ylogger
|
|
ydl_opts['progress_hooks'] = [progress_hook]
|
|
with ydl_ydl.YoutubeDL(ydl_opts) as y:
|
|
error_code = y.download(video['url'])
|
|
# yt_dlp = ydl.YDL(ydl_opts) # recreate the object with the correct logging path
|
|
# error_code = yt_dlp(video['url']) # Do the download
|
|
|
|
if not error_code:
|
|
elapsed = round(math.ceil(time.time() - job_progress['start_time']) / 60, 2)
|
|
output_dict['logger_msg'].append(f"'{video['title']}' - Downloaded in {elapsed} min.")
|
|
output_dict['downloaded_video_id'] = video['id']
|
|
else:
|
|
output_dict['video_critical_err_msg'] = output_dict['video_critical_err_msg'] + ylogger.errors
|
|
except Exception as e:
|
|
output_dict['video_critical_err_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
|
|
logger = db_logger('DOWNLOADER', 'log', console=True)
|
|
logger.fatal(f'failed with {e.__class__.__name__}: {e}. {traceback.format_exc()}')
|
|
job.del_progress_thread(video['id'])
|
|
job.add_completed(video['id'])
|
|
return output_dict
|
|
except Exception as e:
|
|
output_dict['video_critical_err_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
|
|
logger = db_logger('DOWNLOADER', 'logs', console=True)
|
|
logger.fatal(f'failed with {e.__class__.__name__}: {e}. {traceback.format_exc()}')
|