[extractor/common] Fix url meta field for unfragmented DASH formats (closes #20346)

This commit is contained in:
Sergey M․ 2019-03-15 00:42:14 +07:00
parent 6db03a29d1
commit 79d2077edc
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 17 additions and 8 deletions

View File

@ -108,10 +108,13 @@ class InfoExtractor(object):
for RTMP - RTMP URL, for RTMP - RTMP URL,
for HLS - URL of the M3U8 media playlist, for HLS - URL of the M3U8 media playlist,
for HDS - URL of the F4M manifest, for HDS - URL of the F4M manifest,
for DASH - URL of the MPD manifest or for DASH
base URL representing the media - HTTP URL to plain file media (in case of
if MPD manifest is parsed from unfragmented media)
a string, - URL of the MPD manifest or base URL
representing the media if MPD manifest
is parsed froma string (in case of
fragmented media)
for MSS - URL of the ISM manifest. for MSS - URL of the ISM manifest.
* manifest_url * manifest_url
The URL of the manifest file in case of The URL of the manifest file in case of
@ -2137,8 +2140,6 @@ class InfoExtractor(object):
bandwidth = int_or_none(representation_attrib.get('bandwidth')) bandwidth = int_or_none(representation_attrib.get('bandwidth'))
f = { f = {
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
# NB: mpd_url may be empty when MPD manifest is parsed from a string
'url': mpd_url or base_url,
'manifest_url': mpd_url, 'manifest_url': mpd_url,
'ext': mimetype2ext(mime_type), 'ext': mimetype2ext(mime_type),
'width': int_or_none(representation_attrib.get('width')), 'width': int_or_none(representation_attrib.get('width')),
@ -2277,10 +2278,14 @@ class InfoExtractor(object):
fragment['duration'] = segment_duration fragment['duration'] = segment_duration
fragments.append(fragment) fragments.append(fragment)
representation_ms_info['fragments'] = fragments representation_ms_info['fragments'] = fragments
# NB: MPD manifest may contain direct URLs to unfragmented media. # If there is a fragments key available then we correctly recognized fragmented media.
# No fragments key is present in this case. # Otherwise we will assume unfragmented media with direct access. Technically, such
# assumption is not necessarily correct since we may simply have no support for
# some forms of fragmented media renditions yet, but for now we'll use this fallback.
if 'fragments' in representation_ms_info: if 'fragments' in representation_ms_info:
f.update({ f.update({
# NB: mpd_url may be empty when MPD manifest is parsed from a string
'url': mpd_url or base_url,
'fragment_base_url': base_url, 'fragment_base_url': base_url,
'fragments': [], 'fragments': [],
'protocol': 'http_dash_segments', 'protocol': 'http_dash_segments',
@ -2291,6 +2296,10 @@ class InfoExtractor(object):
f['url'] = initialization_url f['url'] = initialization_url
f['fragments'].append({location_key(initialization_url): initialization_url}) f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments']) f['fragments'].extend(representation_ms_info['fragments'])
else:
# Assuming direct URL to unfragmented media.
f['url'] = base_url
# According to [1, 5.3.5.2, Table 7, page 35] @id of Representation # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
# is not necessarily unique within a Period thus formats with # is not necessarily unique within a Period thus formats with
# the same `format_id` are quite possible. There are numerous examples # the same `format_id` are quite possible. There are numerous examples