better handling of unavailable videos
This commit is contained in:
parent
4e3d62c879
commit
4b391b9c9c
|
@ -4,9 +4,11 @@ import logging.config
|
|||
import math
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from multiprocessing import Manager, Pool, cpu_count
|
||||
from pathlib import Path
|
||||
|
@ -171,6 +173,15 @@ def load_existing_videos():
|
|||
download_archive_file.touch()
|
||||
with open(download_archive_file, 'r') as file:
|
||||
output.update(([line.rstrip() for line in file]))
|
||||
|
||||
# Remove duplicate lines.
|
||||
# Something may have gone wrong in the past so we want to make sure everything is cleaned up.
|
||||
with open(download_archive_file) as file:
|
||||
uniqlines = set(file.readlines())
|
||||
fd, path = tempfile.mkstemp()
|
||||
with os.fdopen(fd, 'w') as tmp:
|
||||
tmp.writelines(set(uniqlines))
|
||||
shutil.move(path, download_archive_file)
|
||||
return output
|
||||
|
||||
|
||||
|
|
|
@ -70,15 +70,23 @@ def download_video(args) -> dict:
|
|||
|
||||
video = args[0]
|
||||
kwargs = args[1]
|
||||
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object
|
||||
|
||||
if not video['channel_id'] or not video['channel'] or not video['channel_url']:
|
||||
if video['duration'] or isinstance(video['view_count'], int):
|
||||
# Sometimes videos don't have channel_id, channel, or channel_url but are actually valid. Like shorts.
|
||||
pass
|
||||
else:
|
||||
output_dict['video_error_logger_msg'].append(f"{video['id']} unavailable.")
|
||||
return output_dict
|
||||
|
||||
# Get a bar
|
||||
locked = False
|
||||
if len(kwargs['bars']):
|
||||
# We're going to wait until a bar is available for us to use.
|
||||
while not locked:
|
||||
while not locked: # We're going to wait until a bar is available for us to use.
|
||||
for item in kwargs['bars']:
|
||||
if not is_manager_lock_locked(item[1]):
|
||||
locked = item[1].acquire(timeout=0.1) # get the lock ASAP and don't wait if we didn't get it.
|
||||
locked = item[1].acquire(timeout=0.01) # get the lock ASAP and don't wait if we didn't get it.
|
||||
offset = item[0]
|
||||
bar_lock = item[1]
|
||||
break
|
||||
|
@ -86,16 +94,19 @@ def download_video(args) -> dict:
|
|||
desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4)))
|
||||
bar = tqdm(total=100, position=offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {elapsed}<{remaining}{postfix}', leave=False)
|
||||
|
||||
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
kwargs['ydl_opts']['logger'] = ytdl_logger() # dummy silent logger
|
||||
yt_dlp = ydl.YDL(kwargs['ydl_opts'])
|
||||
url = video['url']
|
||||
video = yt_dlp.sanitize_info(yt_dlp.extract_info(video['url'], download=False))
|
||||
video['url'] = url
|
||||
del url
|
||||
video_n = yt_dlp.get_info(video['url'])
|
||||
if not video_n:
|
||||
output_dict['video_error_logger_msg'].append(f"{video['id']} failed to get info.")
|
||||
return output_dict
|
||||
video_n['url'] = video['url']
|
||||
video = video_n
|
||||
del video_n
|
||||
|
||||
try:
|
||||
base_path = os.path.splitext(yt_dlp.prepare_filename(video))[0]
|
||||
except AttributeError:
|
||||
|
@ -111,7 +122,7 @@ def download_video(args) -> dict:
|
|||
output_dict['downloaded_video_id'] = video['id']
|
||||
else:
|
||||
output_dict['video_error_logger_msg'] = output_dict['video_error_logger_msg'] + ylogger.errors
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
output_dict['video_error_logger_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
|
||||
if locked:
|
||||
bar.update(100 - bar.n)
|
||||
|
|
|
@ -40,19 +40,19 @@ class YDL:
|
|||
'logger': self.ydl_opts['logger'],
|
||||
}
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.sanitize_info(ydl.extract_info(url, download=False))
|
||||
info = self.get_info(url)
|
||||
if not info:
|
||||
return False
|
||||
entries = []
|
||||
if info['_type'] == 'playlist':
|
||||
if 'entries' in info.keys():
|
||||
# When downloading a channel youtube-dl returns a playlist for videos and another for shorts.
|
||||
# We need to combine all the videos into one list
|
||||
# We need to combine all the videos into one list.
|
||||
for item in info['entries']:
|
||||
if item['_type'] in ('video', 'url'):
|
||||
entries.append(item)
|
||||
elif item['_type'] == 'playlist':
|
||||
for video in ydl.sanitize_info(ydl.extract_info(item['webpage_url'], download=False))['entries']:
|
||||
for video in self.get_info(item['webpage_url'])['entries']:
|
||||
entries.append(video)
|
||||
else:
|
||||
raise ValueError(f"Unknown sub-media type: {item['_type']}")
|
||||
|
@ -83,8 +83,15 @@ class YDL:
|
|||
def process_info(self, *args, **kwargs):
|
||||
return self.yt_dlp.process_info(*args, **kwargs)
|
||||
|
||||
def sanitize_info(self, *args, **kwargs):
|
||||
return self.yt_dlp.sanitize_info(*args, **kwargs)
|
||||
def get_info(self, url):
|
||||
ydl_opts = {
|
||||
'extract_flat': True,
|
||||
'skip_download': True,
|
||||
'ignoreerrors': True,
|
||||
'logger': self.ydl_opts['logger'],
|
||||
}
|
||||
ydl = yt_dlp.YoutubeDL(ydl_opts)
|
||||
return ydl.sanitize_info(ydl.extract_info(url, download=False))
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return self.yt_dlp.download(*args, **kwargs)
|
||||
|
|
Reference in New Issue