better handling of unavailable videos

This commit is contained in:
Cyberes 2023-02-07 00:13:44 -07:00
parent 4e3d62c879
commit 4b391b9c9c
3 changed files with 43 additions and 14 deletions

View File

@ -4,9 +4,11 @@ import logging.config
import math
import os
import re
import shutil
import signal
import subprocess
import sys
import tempfile
import time
from multiprocessing import Manager, Pool, cpu_count
from pathlib import Path
@ -171,6 +173,15 @@ def load_existing_videos():
download_archive_file.touch()
with open(download_archive_file, 'r') as file:
output.update(([line.rstrip() for line in file]))
# Remove duplicate lines.
# Something may have gone wrong in the past so we want to make sure everything is cleaned up.
with open(download_archive_file) as file:
uniqlines = set(file.readlines())
fd, path = tempfile.mkstemp()
with os.fdopen(fd, 'w') as tmp:
tmp.writelines(set(uniqlines))
shutil.move(path, download_archive_file)
return output

View File

@ -70,15 +70,23 @@ def download_video(args) -> dict:
video = args[0]
kwargs = args[1]
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object
if not video['channel_id'] or not video['channel'] or not video['channel_url']:
if video['duration'] or isinstance(video['view_count'], int):
# Sometimes videos don't have channel_id, channel, or channel_url but are actually valid. Like shorts.
pass
else:
output_dict['video_error_logger_msg'].append(f"{video['id']} unavailable.")
return output_dict
# Get a bar
locked = False
if len(kwargs['bars']):
# We're going to wait until a bar is available for us to use.
while not locked:
while not locked: # We're going to wait until a bar is available for us to use.
for item in kwargs['bars']:
if not is_manager_lock_locked(item[1]):
locked = item[1].acquire(timeout=0.1) # get the lock ASAP and don't wait if we didn't get it.
locked = item[1].acquire(timeout=0.01) # get the lock ASAP and don't wait if we didn't get it.
offset = item[0]
bar_lock = item[1]
break
@ -86,16 +94,19 @@ def download_video(args) -> dict:
desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4)))
bar = tqdm(total=100, position=offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {elapsed}<{remaining}{postfix}', leave=False)
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object
start_time = time.time()
try:
kwargs['ydl_opts']['logger'] = ytdl_logger() # dummy silent logger
yt_dlp = ydl.YDL(kwargs['ydl_opts'])
url = video['url']
video = yt_dlp.sanitize_info(yt_dlp.extract_info(video['url'], download=False))
video['url'] = url
del url
video_n = yt_dlp.get_info(video['url'])
if not video_n:
output_dict['video_error_logger_msg'].append(f"{video['id']} failed to get info.")
return output_dict
video_n['url'] = video['url']
video = video_n
del video_n
try:
base_path = os.path.splitext(yt_dlp.prepare_filename(video))[0]
except AttributeError:
@ -111,7 +122,7 @@ def download_video(args) -> dict:
output_dict['downloaded_video_id'] = video['id']
else:
output_dict['video_error_logger_msg'] = output_dict['video_error_logger_msg'] + ylogger.errors
except Exception as e:
except Exception:
output_dict['video_error_logger_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
if locked:
bar.update(100 - bar.n)

View File

@ -40,19 +40,19 @@ class YDL:
'logger': self.ydl_opts['logger'],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.sanitize_info(ydl.extract_info(url, download=False))
info = self.get_info(url)
if not info:
return False
entries = []
if info['_type'] == 'playlist':
if 'entries' in info.keys():
# When downloading a channel youtube-dl returns a playlist for videos and another for shorts.
# We need to combine all the videos into one list
# We need to combine all the videos into one list.
for item in info['entries']:
if item['_type'] in ('video', 'url'):
entries.append(item)
elif item['_type'] == 'playlist':
for video in ydl.sanitize_info(ydl.extract_info(item['webpage_url'], download=False))['entries']:
for video in self.get_info(item['webpage_url'])['entries']:
entries.append(video)
else:
raise ValueError(f"Unknown sub-media type: {item['_type']}")
@ -83,8 +83,15 @@ class YDL:
def process_info(self, *args, **kwargs):
return self.yt_dlp.process_info(*args, **kwargs)
def sanitize_info(self, *args, **kwargs):
return self.yt_dlp.sanitize_info(*args, **kwargs)
def get_info(self, url):
ydl_opts = {
'extract_flat': True,
'skip_download': True,
'ignoreerrors': True,
'logger': self.ydl_opts['logger'],
}
ydl = yt_dlp.YoutubeDL(ydl_opts)
return ydl.sanitize_info(ydl.extract_info(url, download=False))
def __call__(self, *args, **kwargs):
return self.yt_dlp.download(*args, **kwargs)