better handling of unavailable videos
This commit is contained in:
parent
4e3d62c879
commit
4b391b9c9c
|
@ -4,9 +4,11 @@ import logging.config
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import shutil
|
||||||
import signal
|
import signal
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import tempfile
|
||||||
import time
|
import time
|
||||||
from multiprocessing import Manager, Pool, cpu_count
|
from multiprocessing import Manager, Pool, cpu_count
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -171,6 +173,15 @@ def load_existing_videos():
|
||||||
download_archive_file.touch()
|
download_archive_file.touch()
|
||||||
with open(download_archive_file, 'r') as file:
|
with open(download_archive_file, 'r') as file:
|
||||||
output.update(([line.rstrip() for line in file]))
|
output.update(([line.rstrip() for line in file]))
|
||||||
|
|
||||||
|
# Remove duplicate lines.
|
||||||
|
# Something may have gone wrong in the past so we want to make sure everything is cleaned up.
|
||||||
|
with open(download_archive_file) as file:
|
||||||
|
uniqlines = set(file.readlines())
|
||||||
|
fd, path = tempfile.mkstemp()
|
||||||
|
with os.fdopen(fd, 'w') as tmp:
|
||||||
|
tmp.writelines(set(uniqlines))
|
||||||
|
shutil.move(path, download_archive_file)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -70,15 +70,23 @@ def download_video(args) -> dict:
|
||||||
|
|
||||||
video = args[0]
|
video = args[0]
|
||||||
kwargs = args[1]
|
kwargs = args[1]
|
||||||
|
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object
|
||||||
|
|
||||||
|
if not video['channel_id'] or not video['channel'] or not video['channel_url']:
|
||||||
|
if video['duration'] or isinstance(video['view_count'], int):
|
||||||
|
# Sometimes videos don't have channel_id, channel, or channel_url but are actually valid. Like shorts.
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
output_dict['video_error_logger_msg'].append(f"{video['id']} unavailable.")
|
||||||
|
return output_dict
|
||||||
|
|
||||||
# Get a bar
|
# Get a bar
|
||||||
locked = False
|
locked = False
|
||||||
if len(kwargs['bars']):
|
if len(kwargs['bars']):
|
||||||
# We're going to wait until a bar is available for us to use.
|
while not locked: # We're going to wait until a bar is available for us to use.
|
||||||
while not locked:
|
|
||||||
for item in kwargs['bars']:
|
for item in kwargs['bars']:
|
||||||
if not is_manager_lock_locked(item[1]):
|
if not is_manager_lock_locked(item[1]):
|
||||||
locked = item[1].acquire(timeout=0.1) # get the lock ASAP and don't wait if we didn't get it.
|
locked = item[1].acquire(timeout=0.01) # get the lock ASAP and don't wait if we didn't get it.
|
||||||
offset = item[0]
|
offset = item[0]
|
||||||
bar_lock = item[1]
|
bar_lock = item[1]
|
||||||
break
|
break
|
||||||
|
@ -86,16 +94,19 @@ def download_video(args) -> dict:
|
||||||
desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4)))
|
desc_with = int(np.round(os.get_terminal_size()[0] * (1 / 4)))
|
||||||
bar = tqdm(total=100, position=offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {elapsed}<{remaining}{postfix}', leave=False)
|
bar = tqdm(total=100, position=offset, desc=f"{video['id']} - {video['title']}".ljust(desc_with)[:desc_with], bar_format='{l_bar}{bar}| {elapsed}<{remaining}{postfix}', leave=False)
|
||||||
|
|
||||||
output_dict = {'downloaded_video_id': None, 'video_id': video['id'], 'video_error_logger_msg': [], 'status_msg': [], 'logger_msg': []} # empty object
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
kwargs['ydl_opts']['logger'] = ytdl_logger() # dummy silent logger
|
kwargs['ydl_opts']['logger'] = ytdl_logger() # dummy silent logger
|
||||||
yt_dlp = ydl.YDL(kwargs['ydl_opts'])
|
yt_dlp = ydl.YDL(kwargs['ydl_opts'])
|
||||||
url = video['url']
|
video_n = yt_dlp.get_info(video['url'])
|
||||||
video = yt_dlp.sanitize_info(yt_dlp.extract_info(video['url'], download=False))
|
if not video_n:
|
||||||
video['url'] = url
|
output_dict['video_error_logger_msg'].append(f"{video['id']} failed to get info.")
|
||||||
del url
|
return output_dict
|
||||||
|
video_n['url'] = video['url']
|
||||||
|
video = video_n
|
||||||
|
del video_n
|
||||||
|
|
||||||
try:
|
try:
|
||||||
base_path = os.path.splitext(yt_dlp.prepare_filename(video))[0]
|
base_path = os.path.splitext(yt_dlp.prepare_filename(video))[0]
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
|
@ -111,7 +122,7 @@ def download_video(args) -> dict:
|
||||||
output_dict['downloaded_video_id'] = video['id']
|
output_dict['downloaded_video_id'] = video['id']
|
||||||
else:
|
else:
|
||||||
output_dict['video_error_logger_msg'] = output_dict['video_error_logger_msg'] + ylogger.errors
|
output_dict['video_error_logger_msg'] = output_dict['video_error_logger_msg'] + ylogger.errors
|
||||||
except Exception as e:
|
except Exception:
|
||||||
output_dict['video_error_logger_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
|
output_dict['video_error_logger_msg'].append(f"EXCEPTION -> {traceback.format_exc()}")
|
||||||
if locked:
|
if locked:
|
||||||
bar.update(100 - bar.n)
|
bar.update(100 - bar.n)
|
||||||
|
|
|
@ -40,19 +40,19 @@ class YDL:
|
||||||
'logger': self.ydl_opts['logger'],
|
'logger': self.ydl_opts['logger'],
|
||||||
}
|
}
|
||||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||||
info = ydl.sanitize_info(ydl.extract_info(url, download=False))
|
info = self.get_info(url)
|
||||||
if not info:
|
if not info:
|
||||||
return False
|
return False
|
||||||
entries = []
|
entries = []
|
||||||
if info['_type'] == 'playlist':
|
if info['_type'] == 'playlist':
|
||||||
if 'entries' in info.keys():
|
if 'entries' in info.keys():
|
||||||
# When downloading a channel youtube-dl returns a playlist for videos and another for shorts.
|
# When downloading a channel youtube-dl returns a playlist for videos and another for shorts.
|
||||||
# We need to combine all the videos into one list
|
# We need to combine all the videos into one list.
|
||||||
for item in info['entries']:
|
for item in info['entries']:
|
||||||
if item['_type'] in ('video', 'url'):
|
if item['_type'] in ('video', 'url'):
|
||||||
entries.append(item)
|
entries.append(item)
|
||||||
elif item['_type'] == 'playlist':
|
elif item['_type'] == 'playlist':
|
||||||
for video in ydl.sanitize_info(ydl.extract_info(item['webpage_url'], download=False))['entries']:
|
for video in self.get_info(item['webpage_url'])['entries']:
|
||||||
entries.append(video)
|
entries.append(video)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown sub-media type: {item['_type']}")
|
raise ValueError(f"Unknown sub-media type: {item['_type']}")
|
||||||
|
@ -83,8 +83,15 @@ class YDL:
|
||||||
def process_info(self, *args, **kwargs):
|
def process_info(self, *args, **kwargs):
|
||||||
return self.yt_dlp.process_info(*args, **kwargs)
|
return self.yt_dlp.process_info(*args, **kwargs)
|
||||||
|
|
||||||
def sanitize_info(self, *args, **kwargs):
|
def get_info(self, url):
|
||||||
return self.yt_dlp.sanitize_info(*args, **kwargs)
|
ydl_opts = {
|
||||||
|
'extract_flat': True,
|
||||||
|
'skip_download': True,
|
||||||
|
'ignoreerrors': True,
|
||||||
|
'logger': self.ydl_opts['logger'],
|
||||||
|
}
|
||||||
|
ydl = yt_dlp.YoutubeDL(ydl_opts)
|
||||||
|
return ydl.sanitize_info(ydl.extract_info(url, download=False))
|
||||||
|
|
||||||
def __call__(self, *args, **kwargs):
|
def __call__(self, *args, **kwargs):
|
||||||
return self.yt_dlp.download(*args, **kwargs)
|
return self.yt_dlp.download(*args, **kwargs)
|
||||||
|
|
Reference in New Issue