From 0a473f2f0fd2629f009edb8bf127c4eed1738bf6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Mar 2021 20:56:24 +0530 Subject: [PATCH] More improvements to HLS/DASH external downloader code * Fix error when there is no `protocol` in `info_dict` * Move HLS byte range detection to `Aria2cFD` so that the download will fall back to the native downloader instead of ffmpeg * Fix bug with getting no fragments in DASH * Convert `check_results` in `can_download` to a generator --- yt_dlp/YoutubeDL.py | 3 ++- yt_dlp/downloader/common.py | 6 ++++++ yt_dlp/downloader/dash.py | 9 +++++---- yt_dlp/downloader/external.py | 13 +++++++++++-- yt_dlp/downloader/hls.py | 35 ++++++++++++++++++++--------------- 5 files changed, 44 insertions(+), 22 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5e3c015ba..7e0a69528 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2437,7 +2437,8 @@ class YoutubeDL(object): else: assert fixup_policy in ('ignore', 'never') - if get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD': + if ('protocol' in info_dict + and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'): if fixup_policy == 'warn': self.report_warning('%s: malformed AAC bitstream detected.' % ( info_dict['id'])) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 2a9a62df4..8b16ad854 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -326,6 +326,12 @@ class FileDownloader(object): """Report it was impossible to resume download.""" self.to_screen('[download] Unable to resume') + @staticmethod + def supports_manifest(manifest): + """ Whether the downloader can download the fragments from the manifest. + Redefine in subclasses if needed. """ + pass + def download(self, filename, info_dict, subtitle=False): """Download to a filename using the info from info_dict Return True on success and False otherwise diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index 99acc8db2..6eae5bf0a 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -12,7 +12,8 @@ from ..utils import ( class DashSegmentsFD(FragmentFD): """ - Download segments in a DASH manifest + Download segments in a DASH manifest. External downloaders can take over + the fragment downloads by supporting the 'frag_urls' protocol """ FD_NAME = 'dashsegments' @@ -37,7 +38,7 @@ class DashSegmentsFD(FragmentFD): fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - fragments = [] + fragments_to_download = [] frag_index = 0 for i, fragment in enumerate(fragments): frag_index += 1 @@ -49,7 +50,7 @@ class DashSegmentsFD(FragmentFD): fragment_url = urljoin(fragment_base_url, fragment['path']) if real_downloader: - fragments.append({ + fragments_to_download.append({ 'url': fragment_url, }) continue @@ -92,7 +93,7 @@ class DashSegmentsFD(FragmentFD): if real_downloader: info_copy = info_dict.copy() - info_copy['fragments'] = fragments + info_copy['fragments'] = fragments_to_download fd = real_downloader(self.ydl, self.params) # TODO: Make progress updates work without hooking twice # for ph in self._progress_hooks: diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 026a4e382..c315deb2e 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -125,7 +125,7 @@ class ExternalFD(FileDownloader): if 'fragments' in info_dict: file_list = [] dest, _ = sanitize_open(tmpfilename, 'wb') - for [i, fragment] in enumerate(info_dict['fragments']): + for i, fragment in enumerate(info_dict['fragments']): file = '%s_%s.frag' % (tmpfilename, i) decrypt_info = fragment.get('decrypt_info') src, _ = sanitize_open(file, 'rb') @@ -242,6 +242,15 @@ class Aria2cFD(ExternalFD): AVAILABLE_OPT = '-v' SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'frag_urls') + @staticmethod + def supports_manifest(manifest): + UNSUPPORTED_FEATURES = [ + r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1] + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 + ] + check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) + return all(check_results) + def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-c'] dn = os.path.dirname(tmpfilename) @@ -264,7 +273,7 @@ class Aria2cFD(ExternalFD): cmd += ['--uri-selector', 'inorder', '--download-result=hide'] url_list_file = '%s.frag.urls' % tmpfilename url_list = [] - for [i, fragment] in enumerate(info_dict['fragments']): + for i, fragment in enumerate(info_dict['fragments']): tmpsegmentname = '%s_%s.frag' % (os.path.basename(tmpfilename), i) url_list.append('%s\n\tout=%s' % (fragment['url'], tmpsegmentname)) stream, _ = sanitize_open(url_list_file, 'wb') diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 29be6bdf9..77606b0ed 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -24,12 +24,16 @@ from ..utils import ( class HlsFD(FragmentFD): - """ A limited implementation that does not require ffmpeg """ + """ + Download segments in a m3u8 manifest. External downloaders can take over + the fragment downloads by supporting the 'frag_urls' protocol and + re-defining 'supports_manifest' function + """ FD_NAME = 'hlsnative' @staticmethod - def can_download(manifest, info_dict, allow_unplayable_formats=False, real_downloader=None, with_crypto=can_decrypt_frag): + def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypto=can_decrypt_frag): UNSUPPORTED_FEATURES = [ # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] @@ -53,16 +57,15 @@ class HlsFD(FragmentFD): UNSUPPORTED_FEATURES += [ r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] ] - if real_downloader: - UNSUPPORTED_FEATURES += [ - r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] - ] - check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] - is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest - check_results.append(with_crypto or not is_aes128_enc) - check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest)) - check_results.append(not info_dict.get('is_live')) - return all(check_results) + + def check_results(): + yield not info_dict.get('is_live') + is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest + yield with_crypto or not is_aes128_enc + yield not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest) + for feature in UNSUPPORTED_FEATURES: + yield not re.search(feature, manifest) + return all(check_results()) def real_download(self, filename, info_dict): man_url = info_dict['url'] @@ -72,9 +75,7 @@ class HlsFD(FragmentFD): man_url = urlh.geturl() s = urlh.read().decode('utf-8', 'ignore') - real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None) - - if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats'), real_downloader): + if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')): if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): self.report_error('pycryptodome not found. Please install it.') return False @@ -89,6 +90,10 @@ class HlsFD(FragmentFD): # fd.add_progress_hook(ph) return fd.real_download(filename, info_dict) + real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None) + if real_downloader and not real_downloader.supports_manifest(s): + real_downloader = None + def is_ad_fragment_start(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))