automated-youtube-dl/server/process/threads.py

import datetime
import math
import subprocess
import time
import traceback
from pathlib import Path

import yt_dlp as ydl_ydl
from hurry.filesize import size
from unidecode import unidecode

import ydl.yt_dlp as ydl
from server.mysql import db_logger
from server.process.funcs import remove_special_chars_linux, sanitize_colored_string
from server.process.ytlogging import YtdlLogger

name_max = int(subprocess.check_output("getconf NAME_MAX /", shell=True).decode()) - 30


def download_video(video, ydl_opts, output_dir, ignore_downloaded, job) -> dict:
    output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_url': video['url'], 'video_critical_err_msg': [], 'video_critical_err_msg_short': [], 'status_msg': [], 'logger_msg': []}  # empty object
    try:
        job_progress = job.new_progress_thread(video['id'])
        job_progress['start_time'] = int(datetime.datetime.now(datetime.timezone.utc).timestamp() * 1e3)

        def progress_hook(d):
            if d['status'] == 'downloading':  # Variables can be None if the download hasn't started yet.
                if d.get('downloaded_bytes'):
                    # We want total_bytes but it may not exist so total_bytes_estimate is good too
                    if d.get('total_bytes'):
                        job_progress['total'] = d.get('total_bytes')
                    elif d.get('total_bytes_estimate'):
                        job_progress['total'] = d.get('total_bytes_estimate')

                if job_progress['total']:  # If yt-dlp has this data
                    job_progress['downloaded_bytes'] = int(d['downloaded_bytes'])
                    if job_progress['total'] > 0:
                        job_progress['percent'] = (job_progress['downloaded_bytes'] / job_progress['total']) * 100
                        # bar.update(int(np.round(percent - bar.n)))  # If the progress bar doesn't end at 100% then round to 1 decimal place
                    job_progress['speed'] = sanitize_colored_string(d['_speed_str']).strip(' ')
                    job_progress['size'] = f"{size(d.get('downloaded_bytes'))}/{size(job_progress['total'])}"
                    # bar.set_postfix({
                    #     'speed': d['_speed_str'],
                    #     'size': f"{size(d.get('downloaded_bytes'))}/{size(total)}",
                    # })
                else:  # otherwise just use their internal variables
                    # bar.set_postfix({
                    #     'speed': d['_speed_str'],
                    #     'size': f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}",
                    # })
                    job_progress['speed'] = sanitize_colored_string(d['_speed_str']).strip(' ')
                    job_progress['size'] = f"{d['_downloaded_bytes_str'].strip()}/{d['_total_bytes_str'].strip()}"

        if not ignore_downloaded and not video['channel_id'] or not video['channel'] or not video['channel_url']:
            if video['duration'] or isinstance(video['view_count'], int):
                # Sometimes videos don't have channel_id, channel, or channel_url but are actually valid. Like shorts.
                pass
            else:
                output_dict['video_critical_err_msg_short'].append('unavailable.')
                return output_dict

        # Clean of forign languages
        video['title'] = unidecode(video['title'])

        try:
            # Get the video info
            yt_dlp = ydl.YDL(dict(ydl_opts, **{'logger': YtdlLogger(name=video['id'], table='jobs', job_id=job.id)}))
            video_n = yt_dlp.get_info(video['url'])

            if not video_n:
                output_dict['video_critical_err_msg_short'].append('failed to get info. Unavailable?')
                return output_dict

            video_n['url'] = video['url']
            video = video_n
            del video_n

            # We created a new dict
            video['title'] = unidecode(video['title'])
            video['uploader'] = unidecode(video['uploader'])  # now this info is present since we fetched it

            # TODO: do we also need to remove the @ char?
            video_filename = remove_special_chars_linux(
                ydl.get_output_templ(video_id=video['id'], title=video['title'], uploader=video['uploader'], uploader_id=video['uploader_id'], include_ext=False), special_chars=['/']
            )

            # Make sure the video title isn't too long
            while len(video_filename) >= name_max - 3:  # -3 so that I can add ...
                video['title'] = video['title'][:-1]
                video_filename = remove_special_chars_linux(
                    ydl.get_output_templ(
                        video_id=video['id'],
                        title=video['title'] + '...',
                        uploader=video['uploader'],
                        uploader_id=video['uploader_id'],
                        include_ext=False
                    ), special_chars=['/'])

            base_path = str(Path(output_dir, video_filename))

            ydl_opts['outtmpl'] = f"{base_path}.%(ext)s"

            # try:
            #     base_path = os.path.splitext(Path(output_dir, yt_dlp.prepare_filename(video)))[0]
            # except AttributeError:
            #     # Sometimes we won't be able to pull the video info so just use the video's ID.
            #     base_path = output_dir / video['id']
            ylogger = YtdlLogger(name=video['id'], table='jobs', job_id=job.id)
            ydl_opts['logger'] = ylogger
            ydl_opts['progress_hooks'] = [progress_hook]
            with ydl_ydl.YoutubeDL(ydl_opts) as y:
                error_code = y.download(video['url'])
            # yt_dlp = ydl.YDL(ydl_opts)  # recreate the object with the correct logging path
            # error_code = yt_dlp(video['url'])  # Do the download

            if not error_code:
                elapsed = round(math.ceil(time.time() - job_progress['start_time']) / 60, 2)
                output_dict['logger_msg'].append(f"'{video['title']}' - Downloaded in {elapsed} min.")
                output_dict['downloaded_video_id'] = video['id']
            else:
                output_dict['video_critical_err_msg'] = output_dict['video_critical_err_msg'] + ylogger.errors
        except Exception as e:
            output_dict['video_critical_err_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
            logger = db_logger('DOWNLOADER', 'log', console=True)
            logger.fatal(f'failed with {e.__class__.__name__}: {e}. {traceback.format_exc()}')
        job.del_progress_thread(video['id'])
        job.add_completed(video['id'])
        return output_dict
    except Exception as e:
        output_dict['video_critical_err_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
        logger = db_logger('DOWNLOADER', 'logs', console=True)
        logger.fatal(f'failed with {e.__class__.__name__}: {e}. {traceback.format_exc()}')