132 lines
6.4 KiB
Python
132 lines
6.4 KiB
Python
import concurrent
|
|
import datetime
|
|
import traceback
|
|
from concurrent.futures import ProcessPoolExecutor
|
|
from pathlib import Path
|
|
from queue import Empty
|
|
|
|
from server import opts
|
|
from server.api.jobs.queue import job_queue, job_status, queued_jobs
|
|
from server.mysql import db_logger
|
|
from server.process.funcs import remove_duplicates_from_playlist
|
|
from server.process.mysql import insert_video
|
|
from server.process.threads import download_video
|
|
from server.process.ytlogging import YtdlLogger
|
|
from ydl.yt_dlp import YDL
|
|
|
|
# TODO: https://github.com/TheFrenchGhosty/TheFrenchGhostys-Ultimate-YouTube-DL-Scripts-Collection/blob/master/docs/Scripts-Type.md#archivist-scripts
|
|
|
|
# https://github.com/yt-dlp/yt-dlp#embedding-examples
|
|
ydl_opts = {
|
|
# TODO: https://github.com/TheFrenchGhosty/TheFrenchGhostys-Ultimate-YouTube-DL-Scripts-Collection/blob/master/docs/Details.md
|
|
# https://old.reddit.com/r/DataHoarder/comments/c6fh4x/after_hoarding_over_50k_youtube_videos_here_is/
|
|
'format': f'(bestvideo[filesize<{opts.max_size}M][vcodec^=av01][height>=1080][fps>30]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9][height>=1080][fps>30]/bestvideo[filesize<{opts.max_size}M][vcodec^=av01][height>=1080]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9.2][height>=1080]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9][height>=1080]/bestvideo[filesize<{opts.max_size}M][height>=1080]/bestvideo[filesize<{opts.max_size}M][vcodec^=av01][height>=720][fps>30]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9.2][height>=720][fps>30]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9][height>=720][fps>30]/bestvideo[filesize<{opts.max_size}M][vcodec^=av01][height>=720]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9.2][height>=720]/bestvideo[filesize<{opts.max_size}M][vcodec=vp9][height>=720]/bestvideo[filesize<{opts.max_size}M][height>=720]/bestvideo[filesize<{opts.max_size}M])+(bestaudio[acodec=opus]/bestaudio)/best',
|
|
'merge_output_format': 'mkv',
|
|
'logtostderr': True,
|
|
'embedchapters': True,
|
|
'writethumbnail': True,
|
|
# Save the thumbnail to a file. Embedding seems to be broken right now so this is an alternative.
|
|
'embedthumbnail': True,
|
|
'embeddescription': True,
|
|
'writesubtitles': True,
|
|
# 'allsubtitles': True, # Download every language.
|
|
'subtitlesformat': 'vtt',
|
|
'subtitleslangs': ['en'],
|
|
'writeautomaticsub': True,
|
|
'writedescription': True,
|
|
'ignoreerrors': True,
|
|
'continuedl': False,
|
|
'addmetadata': True,
|
|
'writeinfojson': True,
|
|
'verbose': opts.ydlp_verbose,
|
|
'postprocessors': [
|
|
{'key': 'FFmpegEmbedSubtitle'},
|
|
{'key': 'FFmpegMetadata', 'add_metadata': True},
|
|
{'key': 'EmbedThumbnail', 'already_have_thumbnail': True},
|
|
{'key': 'FFmpegThumbnailsConvertor', 'format': 'jpg', 'when': 'before_dl'},
|
|
# {'key': 'FFmpegSubtitlesConvertor', 'format': 'srt'}
|
|
],
|
|
# 'external_downloader': 'aria2c',
|
|
# 'external_downloader_args': ['-j 32', '-s 32', '-x 16', '--file-allocation=none', '--optimize-concurrent-downloads=true', '--http-accept-gzip=true', '--continue=true'],
|
|
}
|
|
|
|
|
|
def do_download():
|
|
while True:
|
|
try:
|
|
# Get a job from the queue
|
|
job, l_id, url, base_output, ignore_downloaded = job_queue.get(timeout=5)
|
|
|
|
# Remove the job from the list of queued jobs
|
|
queued_jobs.remove(job.id())
|
|
|
|
# Update the job status
|
|
job_status[job.id()] = 'running'
|
|
|
|
start_time = int(datetime.datetime.now(datetime.timezone.utc).timestamp() * 1e3)
|
|
encountered_errors = 0
|
|
logger = db_logger('DOWNLOADER', 'jobs', job_id=job.id())
|
|
logger.info('Starting job')
|
|
|
|
ydl = YDL(ydl_opts=dict(ydl_opts, **{'logger': YtdlLogger('DOWNLOADER', 'jobs', job.id())}))
|
|
playlist = ydl.playlist_contents(str(url))
|
|
|
|
if not playlist:
|
|
logger.fatal('URL is not a playlist!')
|
|
quit(1)
|
|
|
|
playlist['entries'] = remove_duplicates_from_playlist(playlist['entries'])
|
|
|
|
logger.info(f'Downloading item: "{playlist["title"]}" ({playlist["id"]}) {url}')
|
|
|
|
download_queue = []
|
|
for p, video in enumerate(playlist['entries']):
|
|
download_queue.append(video)
|
|
|
|
playlist_ydl_opts = ydl_opts.copy()
|
|
|
|
if len(download_queue):
|
|
with ProcessPoolExecutor(max_workers=opts.threads) as executor:
|
|
futures = {executor.submit(download_video, video, ydl_opts=playlist_ydl_opts, output_dir=Path(base_output), ignore_downloaded=ignore_downloaded, job=job) for video in download_queue}
|
|
for future in concurrent.futures.as_completed(futures):
|
|
try:
|
|
result = future.result()
|
|
error = False
|
|
|
|
if result['downloaded_video_id']:
|
|
logger.info(result['downloaded_video_id'])
|
|
|
|
for line in result['video_critical_err_msg_short']:
|
|
encountered_errors += 1
|
|
error = True
|
|
logger.error(f"{result['video_id']} - {line}")
|
|
|
|
for line in result['video_critical_err_msg']:
|
|
encountered_errors += 1
|
|
error = True
|
|
logger.error(f"{result['video_id']} - {line}")
|
|
|
|
for line in result['logger_msg']:
|
|
logger.info(f"{result['video_id']} - {line}")
|
|
|
|
if not error:
|
|
insert_video(l_id, result['video_id'], result['url'])
|
|
except Exception as exc:
|
|
logger.error(f'Video download generated an exception: {exc}')
|
|
if encountered_errors == 0:
|
|
job.success(True)
|
|
else:
|
|
job.success(False)
|
|
job.finish()
|
|
|
|
# Update the job status
|
|
job_status[job.id()] = 'finished'
|
|
job.finish()
|
|
print('======================================================= finished =============')
|
|
except Empty:
|
|
break
|
|
except Exception as e:
|
|
logger = db_logger(name='DOWNLOADER', table='logs', console=True)
|
|
logger.fatal(f'failed with {e.__class__.__name__}: {e}. {traceback.format_exc()}')
|
|
break
|