This repository has been archived on 2023-11-11. You can view files and clone it, but cannot push or open issues or pull requests.
automated-youtube-dl/server/process/threads.py

133 lines
6.8 KiB
Python

import datetime
import math
import subprocess
import time
import traceback
from pathlib import Path
import yt_dlp as ydl_ydl
from hurry.filesize import size
from unidecode import unidecode
import ydl.yt_dlp as ydl
from server.mysql import db_logger
from server.process.funcs import remove_special_chars_linux, sanitize_colored_string
from server.process.ytlogging import YtdlLogger
name_max = int(subprocess.check_output("getconf NAME_MAX /", shell=True).decode()) - 30
def download_video(video, ydl_opts, output_dir, ignore_downloaded, job) -> dict:
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_url': video['url'], 'video_critical_err_msg': [], 'video_critical_err_msg_short': [], 'status_msg': [], 'logger_msg': []} # empty object
try:
job_progress = job.new_progress_thread(video['id'])
job_progress['start_time'] = int(datetime.datetime.now(datetime.timezone.utc).timestamp() * 1e3)
def progress_hook(d):
if d['status'] == 'downloading': # Variables can be None if the download hasn't started yet.
if d.get('downloaded_bytes'):
# We want total_bytes but it may not exist so total_bytes_estimate is good too
if d.get('total_bytes'):
job_progress['total'] = d.get('total_bytes')
elif d.get('total_bytes_estimate'):
job_progress['total'] = d.get('total_bytes_estimate')
if job_progress['total']: # If yt-dlp has this data
job_progress['downloaded_bytes'] = int(d['downloaded_bytes'])
if job_progress['total'] > 0:
job_progress['percent'] = (job_progress['downloaded_bytes'] / job_progress['total']) * 100
# bar.update(int(np.round(percent - bar.n))) # If the progress bar doesn't end at 100% then round to 1 decimal place
job_progress['speed'] = sanitize_colored_string(d['_speed_str']).strip(' ')
job_progress['size'] = f"{size(d.get('downloaded_bytes'))}/{size(job_progress['total'])}"
# bar.set_postfix({
# 'speed': d['_speed_str'],
# 'size': f"{size(d.get('downloaded_bytes'))}/{size(total)}",
# })
else: # otherwise just use their internal variables
# bar.set_postfix({
# 'speed': d['_speed_str'],
# 'size': f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}",
# })
job_progress['speed'] = sanitize_colored_string(d['_speed_str']).strip(' ')
job_progress['size'] = f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}"
if not ignore_downloaded and not video['channel_id'] or not video['channel'] or not video['channel_url']:
if video['duration'] or isinstance(video['view_count'], int):
# Sometimes videos don't have channel_id, channel, or channel_url but are actually valid. Like shorts.
pass
else:
output_dict['video_critical_err_msg_short'].append('unavailable.')
return output_dict
# Clean of forign languages
video['title'] = unidecode(video['title'])
try:
# Get the video info
yt_dlp = ydl.YDL(dict(ydl_opts, **{'logger': YtdlLogger(name=video['id'], table='jobs', job_id=job.id)}))
video_n = yt_dlp.get_info(video['url'])
if not video_n:
output_dict['video_critical_err_msg_short'].append('failed to get info. Unavailable?')
return output_dict
video_n['url'] = video['url']
video = video_n
del video_n
# We created a new dict
video['title'] = unidecode(video['title'])
video['uploader'] = unidecode(video['uploader']) # now this info is present since we fetched it
# TODO: do we also need to remove the @ char?
video_filename = remove_special_chars_linux(
ydl.get_output_templ(video_id=video['id'], title=video['title'], uploader=video['uploader'], uploader_id=video['uploader_id'], include_ext=False), special_chars=['/']
)
# Make sure the video title isn't too long
while len(video_filename) >= name_max - 3: # -3 so that I can add ...
video['title'] = video['title'][:-1]
video_filename = remove_special_chars_linux(
ydl.get_output_templ(
video_id=video['id'],
title=video['title'] + '...',
uploader=video['uploader'],
uploader_id=video['uploader_id'],
include_ext=False
), special_chars=['/'])
base_path = str(Path(output_dir, video_filename))
ydl_opts['outtmpl'] = f"{base_path}.%(ext)s"
# try:
# base_path = os.path.splitext(Path(output_dir, yt_dlp.prepare_filename(video)))[0]
# except AttributeError:
# # Sometimes we won't be able to pull the video info so just use the video's ID.
# base_path = output_dir / video['id']
ylogger = YtdlLogger(name=video['id'], table='jobs', job_id=job.id)
ydl_opts['logger'] = ylogger
ydl_opts['progress_hooks'] = [progress_hook]
with ydl_ydl.YoutubeDL(ydl_opts) as y:
error_code = y.download(video['url'])
# yt_dlp = ydl.YDL(ydl_opts) # recreate the object with the correct logging path
# error_code = yt_dlp(video['url']) # Do the download
if not error_code:
elapsed = round(math.ceil(time.time() - job_progress['start_time']) / 60, 2)
output_dict['logger_msg'].append(f"'{video['title']}' - Downloaded in {elapsed} min.")
output_dict['downloaded_video_id'] = video['id']
else:
output_dict['video_critical_err_msg'] = output_dict['video_critical_err_msg'] + ylogger.errors
except Exception as e:
output_dict['video_critical_err_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
logger = db_logger('DOWNLOADER', 'log', console=True)
logger.fatal(f'failed with {e.__class__.__name__}: {e}. {traceback.format_exc()}')
job.del_progress_thread(video['id'])
job.add_completed(video['id'])
return output_dict
except Exception as e:
output_dict['video_critical_err_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
logger = db_logger('DOWNLOADER', 'logs', console=True)
logger.fatal(f'failed with {e.__class__.__name__}: {e}. {traceback.format_exc()}')