This repository has been archived on 2023-11-11. You can view files and clone it, but cannot push or open issues or pull requests.
automated-youtube-dl/server/process/main.py

132 lines
6.4 KiB
Python

import concurrent
import datetime
import traceback
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path
from queue import Empty
from server import opts
from server.api.jobs.queue import job_queue, job_status, queued_jobs
from server.mysql import db_logger
from server.process.funcs import remove_duplicates_from_playlist
from server.process.mysql import insert_video
from server.process.threads import download_video
from server.process.ytlogging import YtdlLogger
from ydl.yt_dlp import YDL
# TODO: https://github.com/TheFrenchGhosty/TheFrenchGhostys-Ultimate-YouTube-DL-Scripts-Collection/blob/master/docs/Scripts-Type.md#archivist-scripts
# https://github.com/yt-dlp/yt-dlp#embedding-examples
ydl_opts = {
# TODO: https://github.com/TheFrenchGhosty/TheFrenchGhostys-Ultimate-YouTube-DL-Scripts-Collection/blob/master/docs/Details.md
# https://old.reddit.com/r/DataHoarder/comments/c6fh4x/after_hoarding_over_50k_youtube_videos_here_is/
'format': f'(bestvideo[filesize<{opts.max_size}M][vcodec^=av01][height>=1080][fps>30]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9][height>=1080][fps>30]/bestvideo[filesize<{opts.max_size}M][vcodec^=av01][height>=1080]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9.2][height>=1080]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9][height>=1080]/bestvideo[filesize<{opts.max_size}M][height>=1080]/bestvideo[filesize<{opts.max_size}M][vcodec^=av01][height>=720][fps>30]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9.2][height>=720][fps>30]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9][height>=720][fps>30]/bestvideo[filesize<{opts.max_size}M][vcodec^=av01][height>=720]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9.2][height>=720]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9][height>=720]/bestvideo[filesize<{opts.max_size}M][height>=720]/bestvideo[filesize<{opts.max_size}M])+(bestaudio[acodec=opus]/bestaudio)/best',
'merge_output_format': 'mkv',
'logtostderr': True,
'embedchapters': True,
'writethumbnail': True,
# Save the thumbnail to a file. Embedding seems to be broken right now so this is an alternative.
'embedthumbnail': True,
'embeddescription': True,
'writesubtitles': True,
# 'allsubtitles': True, # Download every language.
'subtitlesformat': 'vtt',
'subtitleslangs': ['en'],
'writeautomaticsub': True,
'writedescription': True,
'ignoreerrors': True,
'continuedl': False,
'addmetadata': True,
'writeinfojson': True,
'verbose': opts.ydlp_verbose,
'postprocessors': [
{'key': 'FFmpegEmbedSubtitle'},
{'key': 'FFmpegMetadata', 'add_metadata': True},
{'key': 'EmbedThumbnail', 'already_have_thumbnail': True},
{'key': 'FFmpegThumbnailsConvertor', 'format': 'jpg', 'when': 'before_dl'},
# {'key': 'FFmpegSubtitlesConvertor', 'format': 'srt'}
],
# 'external_downloader': 'aria2c',
# 'external_downloader_args': ['-j 32', '-s 32', '-x 16', '--file-allocation=none', '--optimize-concurrent-downloads=true', '--http-accept-gzip=true', '--continue=true'],
}
def do_download():
while True:
try:
# Get a job from the queue
job, l_id, url, base_output, ignore_downloaded = job_queue.get(timeout=5)
# Remove the job from the list of queued jobs
queued_jobs.remove(job.id())
# Update the job status
job_status[job.id()] = 'running'
start_time = int(datetime.datetime.now(datetime.timezone.utc).timestamp() * 1e3)
encountered_errors = 0
logger = db_logger('DOWNLOADER', 'jobs', job_id=job.id())
logger.info('Starting job')
ydl = YDL(ydl_opts=dict(ydl_opts, **{'logger': YtdlLogger('DOWNLOADER', 'jobs', job.id())}))
playlist = ydl.playlist_contents(str(url))
if not playlist:
logger.fatal('URL is not a playlist!')
quit(1)
playlist['entries'] = remove_duplicates_from_playlist(playlist['entries'])
logger.info(f'Downloading item: "{playlist["title"]}" ({playlist["id"]}) {url}')
download_queue = []
for p, video in enumerate(playlist['entries']):
download_queue.append(video)
playlist_ydl_opts = ydl_opts.copy()
if len(download_queue):
with ProcessPoolExecutor(max_workers=opts.threads) as executor:
futures = {executor.submit(download_video, video, ydl_opts=playlist_ydl_opts, output_dir=Path(base_output), ignore_downloaded=ignore_downloaded, job=job) for video in download_queue}
for future in concurrent.futures.as_completed(futures):
try:
result = future.result()
error = False
if result['downloaded_video_id']:
logger.info(result['downloaded_video_id'])
for line in result['video_critical_err_msg_short']:
encountered_errors += 1
error = True
logger.error(f"{result['video_id']} - {line}")
for line in result['video_critical_err_msg']:
encountered_errors += 1
error = True
logger.error(f"{result['video_id']} - {line}")
for line in result['logger_msg']:
logger.info(f"{result['video_id']} - {line}")
if not error:
insert_video(l_id, result['video_id'], result['url'])
except Exception as exc:
logger.error(f'Video download generated an exception: {exc}')
if encountered_errors == 0:
job.success(True)
else:
job.success(False)
job.finish()
# Update the job status
job_status[job.id()] = 'finished'
job.finish()
print('======================================================= finished =============')
except Empty:
break
except Exception as e:
logger = db_logger(name='DOWNLOADER', table='logs', console=True)
logger.fatal(f'failed with {e.__class__.__name__}: {e}. {traceback.format_exc()}')
break