better handling of unavailable videos

This commit is contained in:
Cyberes 2023-02-07 00:13:44 -07:00
parent 4e3d62c879
commit 4b391b9c9c
3 changed files with 43 additions and 14 deletions

View File

@ -4,9 +4,11 @@ import logging.config
import math import math
import os import os
import re import re
import shutil
import signal import signal
import subprocess import subprocess
import sys import sys
import tempfile
import time import time
from multiprocessing import Manager, Pool, cpu_count from multiprocessing import Manager, Pool, cpu_count
from pathlib import Path from pathlib import Path
@ -171,6 +173,15 @@ def load_existing_videos():
download_archive_file.touch() download_archive_file.touch()
with open(download_archive_file, 'r') as file: with open(download_archive_file, 'r') as file:
output.update(([line.rstrip() for line in file])) output.update(([line.rstrip() for line in file]))
# Remove duplicate lines.
# Something may have gone wrong in the past so we want to make sure everything is cleaned up.
with open(download_archive_file) as file:
uniqlines = set(file.readlines())
fd, path = tempfile.mkstemp()
with os.fdopen(fd, 'w') as tmp:
tmp.writelines(set(uniqlines))
shutil.move(path, download_archive_file)
return output return output

View File

@ -70,15 +70,23 @@ def download_video(args) -> dict:
video = args[0] video = args[0]
kwargs = args[1] kwargs = args[1]
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object
if not video['channel_id'] or not video['channel'] or not video['channel_url']:
if video['duration'] or isinstance(video['view_count'], int):
# Sometimes videos don't have channel_id, channel, or channel_url but are actually valid. Like shorts.
pass
else:
output_dict['video_error_logger_msg'].append(f"{video['id']} unavailable.")
return output_dict
# Get a bar # Get a bar
locked = False locked = False
if len(kwargs['bars']): if len(kwargs['bars']):
# We're going to wait until a bar is available for us to use. while not locked: # We're going to wait until a bar is available for us to use.
while not locked:
for item in kwargs['bars']: for item in kwargs['bars']:
if not is_manager_lock_locked(item[1]): if not is_manager_lock_locked(item[1]):
locked = item[1].acquire(timeout=0.1) # get the lock ASAP and don't wait if we didn't get it. locked = item[1].acquire(timeout=0.01) # get the lock ASAP and don't wait if we didn't get it.
offset = item[0] offset = item[0]
bar_lock = item[1] bar_lock = item[1]
break break
@ -86,16 +94,19 @@ def download_video(args) -> dict:
desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4))) desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4)))
bar = tqdm(total=100, position=offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {elapsed}<{remaining}{postfix}', leave=False) bar = tqdm(total=100, position=offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {elapsed}<{remaining}{postfix}', leave=False)
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object
start_time = time.time() start_time = time.time()
try: try:
kwargs['ydl_opts']['logger'] = ytdl_logger() # dummy silent logger kwargs['ydl_opts']['logger'] = ytdl_logger() # dummy silent logger
yt_dlp = ydl.YDL(kwargs['ydl_opts']) yt_dlp = ydl.YDL(kwargs['ydl_opts'])
url = video['url'] video_n = yt_dlp.get_info(video['url'])
video = yt_dlp.sanitize_info(yt_dlp.extract_info(video['url'], download=False)) if not video_n:
video['url'] = url output_dict['video_error_logger_msg'].append(f"{video['id']} failed to get info.")
del url return output_dict
video_n['url'] = video['url']
video = video_n
del video_n
try: try:
base_path = os.path.splitext(yt_dlp.prepare_filename(video))[0] base_path = os.path.splitext(yt_dlp.prepare_filename(video))[0]
except AttributeError: except AttributeError:
@ -111,7 +122,7 @@ def download_video(args) -> dict:
output_dict['downloaded_video_id'] = video['id'] output_dict['downloaded_video_id'] = video['id']
else: else:
output_dict['video_error_logger_msg'] = output_dict['video_error_logger_msg'] + ylogger.errors output_dict['video_error_logger_msg'] = output_dict['video_error_logger_msg'] + ylogger.errors
except Exception as e: except Exception:
output_dict['video_error_logger_msg'].append(f"EXCEPTION -> {traceback.format_exc()}") output_dict['video_error_logger_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
if locked: if locked:
bar.update(100 - bar.n) bar.update(100 - bar.n)

View File

@ -40,19 +40,19 @@ class YDL:
'logger': self.ydl_opts['logger'], 'logger': self.ydl_opts['logger'],
} }
with yt_dlp.YoutubeDL(ydl_opts) as ydl: with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.sanitize_info(ydl.extract_info(url, download=False)) info = self.get_info(url)
if not info: if not info:
return False return False
entries = [] entries = []
if info['_type'] == 'playlist': if info['_type'] == 'playlist':
if 'entries' in info.keys(): if 'entries' in info.keys():
# When downloading a channel youtube-dl returns a playlist for videos and another for shorts. # When downloading a channel youtube-dl returns a playlist for videos and another for shorts.
# We need to combine all the videos into one list # We need to combine all the videos into one list.
for item in info['entries']: for item in info['entries']:
if item['_type'] in ('video', 'url'): if item['_type'] in ('video', 'url'):
entries.append(item) entries.append(item)
elif item['_type'] == 'playlist': elif item['_type'] == 'playlist':
for video in ydl.sanitize_info(ydl.extract_info(item['webpage_url'], download=False))['entries']: for video in self.get_info(item['webpage_url'])['entries']:
entries.append(video) entries.append(video)
else: else:
raise ValueError(f"Unknown sub-media type: {item['_type']}") raise ValueError(f"Unknown sub-media type: {item['_type']}")
@ -83,8 +83,15 @@ class YDL:
def process_info(self, *args, **kwargs): def process_info(self, *args, **kwargs):
return self.yt_dlp.process_info(*args, **kwargs) return self.yt_dlp.process_info(*args, **kwargs)
def sanitize_info(self, *args, **kwargs): def get_info(self, url):
return self.yt_dlp.sanitize_info(*args, **kwargs) ydl_opts = {
'extract_flat': True,
'skip_download': True,
'ignoreerrors': True,
'logger': self.ydl_opts['logger'],
}
ydl = yt_dlp.YoutubeDL(ydl_opts)
return ydl.sanitize_info(ydl.extract_info(url, download=False))
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
return self.yt_dlp.download(*args, **kwargs) return self.yt_dlp.download(*args, **kwargs)