This repository has been archived on 2023-11-11. You can view files and clone it, but cannot push or open issues or pull requests.

133 lines
6.8 KiB

import datetime
import math
import subprocess
import time
import traceback
from pathlib import Path
import yt_dlp as ydl_ydl
from hurry.filesize import size
from unidecode import unidecode
import ydl.yt_dlp as ydl
from server.mysql import db_logger
from server.process.funcs import remove_special_chars_linux, sanitize_colored_string
from server.process.ytlogging import YtdlLogger
name_max = int(subprocess.check_output("getconf NAME_MAX /", shell=True).decode()) - 30
def download_video(video, ydl_opts, output_dir, ignore_downloaded, job) -> dict:
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_url': video['url'], 'video_critical_err_msg': [], 'video_critical_err_msg_short': [], 'status_msg': [], 'logger_msg': []} # empty object
job_progress = job.new_progress_thread(video['id'])
job_progress['start_time'] = int( * 1e3)
def progress_hook(d):
if d['status'] == 'downloading': # Variables can be None if the download hasn't started yet.
if d.get('downloaded_bytes'):
# We want total_bytes but it may not exist so total_bytes_estimate is good too
if d.get('total_bytes'):
job_progress['total'] = d.get('total_bytes')
elif d.get('total_bytes_estimate'):
job_progress['total'] = d.get('total_bytes_estimate')
if job_progress['total']: # If yt-dlp has this data
job_progress['downloaded_bytes'] = int(d['downloaded_bytes'])
if job_progress['total'] > 0:
job_progress['percent'] = (job_progress['downloaded_bytes'] / job_progress['total']) * 100
# bar.update(int(np.round(percent - bar.n))) # If the progress bar doesn't end at 100% then round to 1 decimal place
job_progress['speed'] = sanitize_colored_string(d['_speed_str']).strip(' ')
job_progress['size'] = f"{size(d.get('downloaded_bytes'))}/{size(job_progress['total'])}"
# bar.set_postfix({
# 'speed': d['_speed_str'],
# 'size': f"{size(d.get('downloaded_bytes'))}/{size(total)}",
# })
else: # otherwise just use their internal variables
# bar.set_postfix({
# 'speed': d['_speed_str'],
# 'size': f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}",
# })
job_progress['speed'] = sanitize_colored_string(d['_speed_str']).strip(' ')
job_progress['size'] = f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}"
if not ignore_downloaded and not video['channel_id'] or not video['channel'] or not video['channel_url']:
if video['duration'] or isinstance(video['view_count'], int):
# Sometimes videos don't have channel_id, channel, or channel_url but are actually valid. Like shorts.
return output_dict
# Clean of forign languages
video['title'] = unidecode(video['title'])
# Get the video info
yt_dlp = ydl.YDL(dict(ydl_opts, **{'logger': YtdlLogger(name=video['id'], table='jobs',}))
video_n = yt_dlp.get_info(video['url'])
if not video_n:
output_dict['video_critical_err_msg_short'].append('failed to get info. Unavailable?')
return output_dict
video_n['url'] = video['url']
video = video_n
del video_n
# We created a new dict
video['title'] = unidecode(video['title'])
video['uploader'] = unidecode(video['uploader']) # now this info is present since we fetched it
# TODO: do we also need to remove the @ char?
video_filename = remove_special_chars_linux(
ydl.get_output_templ(video_id=video['id'], title=video['title'], uploader=video['uploader'], uploader_id=video['uploader_id'], include_ext=False), special_chars=['/']
# Make sure the video title isn't too long
while len(video_filename) >= name_max - 3: # -3 so that I can add ...
video['title'] = video['title'][:-1]
video_filename = remove_special_chars_linux(
title=video['title'] + '...',
), special_chars=['/'])
base_path = str(Path(output_dir, video_filename))
ydl_opts['outtmpl'] = f"{base_path}.%(ext)s"
# try:
# base_path = os.path.splitext(Path(output_dir, yt_dlp.prepare_filename(video)))[0]
# except AttributeError:
# # Sometimes we won't be able to pull the video info so just use the video's ID.
# base_path = output_dir / video['id']
ylogger = YtdlLogger(name=video['id'], table='jobs',
ydl_opts['logger'] = ylogger
ydl_opts['progress_hooks'] = [progress_hook]
with ydl_ydl.YoutubeDL(ydl_opts) as y:
error_code =['url'])
# yt_dlp = ydl.YDL(ydl_opts) # recreate the object with the correct logging path
# error_code = yt_dlp(video['url']) # Do the download
if not error_code:
elapsed = round(math.ceil(time.time() - job_progress['start_time']) / 60, 2)
output_dict['logger_msg'].append(f"'{video['title']}' - Downloaded in {elapsed} min.")
output_dict['downloaded_video_id'] = video['id']
output_dict['video_critical_err_msg'] = output_dict['video_critical_err_msg'] + ylogger.errors
except Exception as e:
output_dict['video_critical_err_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
logger = db_logger('DOWNLOADER', 'log', console=True)
logger.fatal(f'failed with {e.__class__.__name__}: {e}. {traceback.format_exc()}')
return output_dict
except Exception as e:
output_dict['video_critical_err_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
logger = db_logger('DOWNLOADER', 'logs', console=True)
logger.fatal(f'failed with {e.__class__.__name__}: {e}. {traceback.format_exc()}')