mirror of https://github.com/yt-dlp/yt-dlp.git
[extractor/common] Extract HLS subtitle tracks
_extract_m3u8_formats is renamed to _extract_m3u8_formats_and_subtitles and extended to handle subtitle tracks instead of skipping them; a wrapper with the old name is provided for compatibility. _parse_m3u8_formats is likewise renamed and extended, but without adding the compatibility wrapper; the test suite is adjusted to test the enhanced method instead.
This commit is contained in:
parent
19bb39202d
commit
a0c3b2d5cf
|
@ -684,17 +684,19 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'width': 1920,
|
'width': 1920,
|
||||||
'height': 1080,
|
'height': 1080,
|
||||||
'vcodec': 'avc1.64002a',
|
'vcodec': 'avc1.64002a',
|
||||||
}]
|
}],
|
||||||
|
{}
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
|
for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES:
|
||||||
with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
|
with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
|
||||||
mode='r', encoding='utf-8') as f:
|
mode='r', encoding='utf-8') as f:
|
||||||
formats = self.ie._parse_m3u8_formats(
|
formats, subs = self.ie._parse_m3u8_formats_and_subtitles(
|
||||||
f.read(), m3u8_url, ext='mp4')
|
f.read(), m3u8_url, ext='mp4')
|
||||||
self.ie._sort_formats(formats)
|
self.ie._sort_formats(formats)
|
||||||
expect_value(self, formats, expected_formats, None)
|
expect_value(self, formats, expected_formats, None)
|
||||||
|
expect_value(self, subs, expected_subs, None)
|
||||||
|
|
||||||
def test_parse_mpd_formats(self):
|
def test_parse_mpd_formats(self):
|
||||||
_TEST_CASES = [
|
_TEST_CASES = [
|
||||||
|
|
|
@ -1879,11 +1879,21 @@ class InfoExtractor(object):
|
||||||
'format_note': 'Quality selection URL',
|
'format_note': 'Quality selection URL',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
def _extract_m3u8_formats(self, *args, **kwargs):
|
||||||
entry_protocol='m3u8', preference=None, quality=None,
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(*args, **kwargs)
|
||||||
m3u8_id=None, note=None, errnote=None,
|
if subs:
|
||||||
fatal=True, live=False, data=None, headers={},
|
self.report_warning(bug_reports_message(
|
||||||
|
"Ignoring subtitle tracks found in the HLS manifest; "
|
||||||
|
"if any subtitle tracks are missing,"
|
||||||
|
))
|
||||||
|
return fmts
|
||||||
|
|
||||||
|
def _extract_m3u8_formats_and_subtitles(
|
||||||
|
self, m3u8_url, video_id, ext=None, entry_protocol='m3u8',
|
||||||
|
preference=None, quality=None, m3u8_id=None, note=None,
|
||||||
|
errnote=None, fatal=True, live=False, data=None, headers={},
|
||||||
query={}):
|
query={}):
|
||||||
|
|
||||||
res = self._download_webpage_handle(
|
res = self._download_webpage_handle(
|
||||||
m3u8_url, video_id,
|
m3u8_url, video_id,
|
||||||
note=note or 'Downloading m3u8 information',
|
note=note or 'Downloading m3u8 information',
|
||||||
|
@ -1891,30 +1901,34 @@ class InfoExtractor(object):
|
||||||
fatal=fatal, data=data, headers=headers, query=query)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
|
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return [], {}
|
||||||
|
|
||||||
m3u8_doc, urlh = res
|
m3u8_doc, urlh = res
|
||||||
m3u8_url = urlh.geturl()
|
m3u8_url = urlh.geturl()
|
||||||
|
|
||||||
return self._parse_m3u8_formats(
|
return self._parse_m3u8_formats_and_subtitles(
|
||||||
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
||||||
preference=preference, quality=quality, m3u8_id=m3u8_id,
|
preference=preference, quality=quality, m3u8_id=m3u8_id,
|
||||||
note=note, errnote=errnote, fatal=fatal, live=live, data=data,
|
note=note, errnote=errnote, fatal=fatal, live=live, data=data,
|
||||||
headers=headers, query=query, video_id=video_id)
|
headers=headers, query=query, video_id=video_id)
|
||||||
|
|
||||||
def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
|
def _parse_m3u8_formats_and_subtitles(
|
||||||
entry_protocol='m3u8', preference=None, quality=None,
|
self, m3u8_doc, m3u8_url, ext=None, entry_protocol='m3u8',
|
||||||
m3u8_id=None, live=False, note=None, errnote=None,
|
preference=None, quality=None, m3u8_id=None, live=False, note=None,
|
||||||
fatal=True, data=None, headers={}, query={}, video_id=None):
|
errnote=None, fatal=True, data=None, headers={}, query={},
|
||||||
|
video_id=None):
|
||||||
|
|
||||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||||
return []
|
return [], {}
|
||||||
|
|
||||||
if (not self._downloader.params.get('allow_unplayable_formats')
|
if (not self._downloader.params.get('allow_unplayable_formats')
|
||||||
and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)): # Apple FairPlay
|
and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)): # Apple FairPlay
|
||||||
return []
|
return [], {}
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
|
||||||
format_url = lambda u: (
|
format_url = lambda u: (
|
||||||
u
|
u
|
||||||
if re.match(r'^https?://', u)
|
if re.match(r'^https?://', u)
|
||||||
|
@ -2001,7 +2015,7 @@ class InfoExtractor(object):
|
||||||
}
|
}
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
return formats
|
return formats, subtitles
|
||||||
|
|
||||||
groups = {}
|
groups = {}
|
||||||
last_stream_inf = {}
|
last_stream_inf = {}
|
||||||
|
@ -2013,6 +2027,15 @@ class InfoExtractor(object):
|
||||||
if not (media_type and group_id and name):
|
if not (media_type and group_id and name):
|
||||||
return
|
return
|
||||||
groups.setdefault(group_id, []).append(media)
|
groups.setdefault(group_id, []).append(media)
|
||||||
|
# <https://tools.ietf.org/html/rfc8216#section-4.3.4.1>
|
||||||
|
if media_type == 'SUBTITLES':
|
||||||
|
lang = media['LANGUAGE'] # XXX: normalise?
|
||||||
|
url = format_url(media['URI'])
|
||||||
|
sub_info = {
|
||||||
|
'url': url,
|
||||||
|
'ext': determine_ext(url),
|
||||||
|
}
|
||||||
|
subtitles.setdefault(lang, []).append(sub_info)
|
||||||
if media_type not in ('VIDEO', 'AUDIO'):
|
if media_type not in ('VIDEO', 'AUDIO'):
|
||||||
return
|
return
|
||||||
media_url = media.get('URI')
|
media_url = media.get('URI')
|
||||||
|
@ -2160,7 +2183,7 @@ class InfoExtractor(object):
|
||||||
formats.append(http_f)
|
formats.append(http_f)
|
||||||
|
|
||||||
last_stream_inf = {}
|
last_stream_inf = {}
|
||||||
return formats
|
return formats, subtitles
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _xpath_ns(path, namespace=None):
|
def _xpath_ns(path, namespace=None):
|
||||||
|
|
Loading…
Reference in New Issue