mirror of https://github.com/yt-dlp/yt-dlp.git
[extractor] Prevent unnecessary download of hls manifests
and refactor `hls_split_discontinuity` code
This commit is contained in:
parent
723d44b92b
commit
60755938b3
|
@ -1979,24 +1979,33 @@ class InfoExtractor(object):
|
||||||
preference=None, quality=None, m3u8_id=None, live=False, note=None,
|
preference=None, quality=None, m3u8_id=None, live=False, note=None,
|
||||||
errnote=None, fatal=True, data=None, headers={}, query={},
|
errnote=None, fatal=True, data=None, headers={}, query={},
|
||||||
video_id=None):
|
video_id=None):
|
||||||
|
formats, subtitles = [], {}
|
||||||
|
|
||||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||||
return [], {}
|
return formats, subtitles
|
||||||
|
|
||||||
if (not self.get_param('allow_unplayable_formats')
|
if (not self.get_param('allow_unplayable_formats')
|
||||||
and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)): # Apple FairPlay
|
and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)): # Apple FairPlay
|
||||||
return [], {}
|
return formats, subtitles
|
||||||
|
|
||||||
formats = []
|
def format_url(url):
|
||||||
|
return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url)
|
||||||
|
|
||||||
subtitles = {}
|
if self.get_param('hls_split_discontinuity', False):
|
||||||
|
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
|
||||||
|
if not m3u8_doc:
|
||||||
|
if not manifest_url:
|
||||||
|
return []
|
||||||
|
m3u8_doc = self._download_webpage(
|
||||||
|
manifest_url, video_id, fatal=fatal, data=data, headers=headers,
|
||||||
|
note=False, errnote='Failed to download m3u8 playlist information')
|
||||||
|
if m3u8_doc is False:
|
||||||
|
return []
|
||||||
|
return range(1 + sum(line.startswith('#EXT-X-DISCONTINUITY') for line in m3u8_doc.splitlines()))
|
||||||
|
|
||||||
format_url = lambda u: (
|
else:
|
||||||
u
|
def _extract_m3u8_playlist_indices(*args, **kwargs):
|
||||||
if re.match(r'^https?://', u)
|
return [None]
|
||||||
else compat_urlparse.urljoin(m3u8_url, u))
|
|
||||||
|
|
||||||
split_discontinuity = self.get_param('hls_split_discontinuity', False)
|
|
||||||
|
|
||||||
# References:
|
# References:
|
||||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
||||||
|
@ -2014,68 +2023,16 @@ class InfoExtractor(object):
|
||||||
# media playlist and MUST NOT appear in master playlist thus we can
|
# media playlist and MUST NOT appear in master playlist thus we can
|
||||||
# clearly detect media playlist with this criterion.
|
# clearly detect media playlist with this criterion.
|
||||||
|
|
||||||
def _extract_m3u8_playlist_formats(format_url=None, m3u8_doc=None, video_id=None,
|
|
||||||
fatal=True, data=None, headers={}):
|
|
||||||
if not m3u8_doc:
|
|
||||||
if not format_url:
|
|
||||||
return []
|
|
||||||
res = self._download_webpage_handle(
|
|
||||||
format_url, video_id,
|
|
||||||
note=False,
|
|
||||||
errnote='Failed to download m3u8 playlist information',
|
|
||||||
fatal=fatal, data=data, headers=headers)
|
|
||||||
|
|
||||||
if res is False:
|
|
||||||
return []
|
|
||||||
|
|
||||||
m3u8_doc, urlh = res
|
|
||||||
format_url = urlh.geturl()
|
|
||||||
|
|
||||||
playlist_formats = []
|
|
||||||
i = (
|
|
||||||
0
|
|
||||||
if split_discontinuity
|
|
||||||
else None)
|
|
||||||
format_info = {
|
|
||||||
'index': i,
|
|
||||||
'key_data': None,
|
|
||||||
'files': [],
|
|
||||||
}
|
|
||||||
for line in m3u8_doc.splitlines():
|
|
||||||
if not line.startswith('#'):
|
|
||||||
format_info['files'].append(line)
|
|
||||||
elif split_discontinuity and line.startswith('#EXT-X-DISCONTINUITY'):
|
|
||||||
i += 1
|
|
||||||
playlist_formats.append(format_info)
|
|
||||||
format_info = {
|
|
||||||
'index': i,
|
|
||||||
'url': format_url,
|
|
||||||
'files': [],
|
|
||||||
}
|
|
||||||
playlist_formats.append(format_info)
|
|
||||||
return playlist_formats
|
|
||||||
|
|
||||||
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
|
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
|
||||||
|
formats = [{
|
||||||
playlist_formats = _extract_m3u8_playlist_formats(m3u8_doc=m3u8_doc)
|
'format_id': '-'.join(map(str, filter(None, [m3u8_id, idx]))),
|
||||||
|
'format_index': idx,
|
||||||
for format in playlist_formats:
|
|
||||||
format_id = []
|
|
||||||
if m3u8_id:
|
|
||||||
format_id.append(m3u8_id)
|
|
||||||
format_index = format.get('index')
|
|
||||||
if format_index:
|
|
||||||
format_id.append(str(format_index))
|
|
||||||
f = {
|
|
||||||
'format_id': '-'.join(format_id),
|
|
||||||
'format_index': format_index,
|
|
||||||
'url': m3u8_url,
|
'url': m3u8_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'protocol': entry_protocol,
|
'protocol': entry_protocol,
|
||||||
'preference': preference,
|
'preference': preference,
|
||||||
'quality': quality,
|
'quality': quality,
|
||||||
}
|
} for idx in _extract_m3u8_playlist_indices(m3u8_doc=m3u8_doc)]
|
||||||
formats.append(f)
|
|
||||||
|
|
||||||
return formats, subtitles
|
return formats, subtitles
|
||||||
|
|
||||||
|
@ -2115,21 +2072,10 @@ class InfoExtractor(object):
|
||||||
media_url = media.get('URI')
|
media_url = media.get('URI')
|
||||||
if media_url:
|
if media_url:
|
||||||
manifest_url = format_url(media_url)
|
manifest_url = format_url(media_url)
|
||||||
format_id = []
|
formats.extend({
|
||||||
playlist_formats = _extract_m3u8_playlist_formats(manifest_url, video_id=video_id,
|
'format_id': '-'.join(map(str, filter(None, (m3u8_id, group_id, name, idx)))),
|
||||||
fatal=fatal, data=data, headers=headers)
|
|
||||||
|
|
||||||
for format in playlist_formats:
|
|
||||||
format_index = format.get('index')
|
|
||||||
for v in (m3u8_id, group_id, name):
|
|
||||||
if v:
|
|
||||||
format_id.append(v)
|
|
||||||
if format_index:
|
|
||||||
format_id.append(str(format_index))
|
|
||||||
f = {
|
|
||||||
'format_id': '-'.join(format_id),
|
|
||||||
'format_note': name,
|
'format_note': name,
|
||||||
'format_index': format_index,
|
'format_index': idx,
|
||||||
'url': manifest_url,
|
'url': manifest_url,
|
||||||
'manifest_url': m3u8_url,
|
'manifest_url': m3u8_url,
|
||||||
'language': media.get('LANGUAGE'),
|
'language': media.get('LANGUAGE'),
|
||||||
|
@ -2137,10 +2083,8 @@ class InfoExtractor(object):
|
||||||
'protocol': entry_protocol,
|
'protocol': entry_protocol,
|
||||||
'preference': preference,
|
'preference': preference,
|
||||||
'quality': quality,
|
'quality': quality,
|
||||||
}
|
'vcodec': 'none' if media_type == 'AUDIO' else None,
|
||||||
if media_type == 'AUDIO':
|
} for idx in _extract_m3u8_playlist_indices(manifest_url))
|
||||||
f['vcodec'] = 'none'
|
|
||||||
formats.append(f)
|
|
||||||
|
|
||||||
def build_stream_name():
|
def build_stream_name():
|
||||||
# Despite specification does not mention NAME attribute for
|
# Despite specification does not mention NAME attribute for
|
||||||
|
@ -2179,25 +2123,17 @@ class InfoExtractor(object):
|
||||||
or last_stream_inf.get('BANDWIDTH'), scale=1000)
|
or last_stream_inf.get('BANDWIDTH'), scale=1000)
|
||||||
manifest_url = format_url(line.strip())
|
manifest_url = format_url(line.strip())
|
||||||
|
|
||||||
playlist_formats = _extract_m3u8_playlist_formats(manifest_url, video_id=video_id,
|
for idx in _extract_m3u8_playlist_indices(manifest_url):
|
||||||
fatal=fatal, data=data, headers=headers)
|
format_id = [m3u8_id, None, idx]
|
||||||
|
|
||||||
for frmt in playlist_formats:
|
|
||||||
format_id = []
|
|
||||||
if m3u8_id:
|
|
||||||
format_id.append(m3u8_id)
|
|
||||||
format_index = frmt.get('index')
|
|
||||||
stream_name = build_stream_name()
|
|
||||||
# Bandwidth of live streams may differ over time thus making
|
# Bandwidth of live streams may differ over time thus making
|
||||||
# format_id unpredictable. So it's better to keep provided
|
# format_id unpredictable. So it's better to keep provided
|
||||||
# format_id intact.
|
# format_id intact.
|
||||||
if not live:
|
if not live:
|
||||||
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
|
stream_name = build_stream_name()
|
||||||
if format_index:
|
format_id[1] = stream_name if stream_name else '%d' % (tbr if tbr else len(formats))
|
||||||
format_id.append(str(format_index))
|
|
||||||
f = {
|
f = {
|
||||||
'format_id': '-'.join(format_id),
|
'format_id': '-'.join(map(str, filter(None, format_id))),
|
||||||
'format_index': format_index,
|
'format_index': idx,
|
||||||
'url': manifest_url,
|
'url': manifest_url,
|
||||||
'manifest_url': m3u8_url,
|
'manifest_url': m3u8_url,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
|
|
Loading…
Reference in New Issue