mirror of https://github.com/yt-dlp/yt-dlp.git
Generalize XML manifest processing code and improve XSPF parsing (closes #15794)
This commit is contained in:
parent
e0d198c18d
commit
47a5cb7734
|
@ -698,40 +698,47 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
_TEST_CASES = [
|
_TEST_CASES = [
|
||||||
(
|
(
|
||||||
'foo_xspf',
|
'foo_xspf',
|
||||||
'https://example.org/src/',
|
'https://example.org/src/foo_xspf.xspf',
|
||||||
[{
|
[{
|
||||||
|
'id': 'foo_xspf',
|
||||||
|
'title': 'Pandemonium',
|
||||||
'description': 'Visit http://bigbrother404.bandcamp.com',
|
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||||
'duration': 202.416,
|
'duration': 202.416,
|
||||||
'formats': [{'url': 'https://example.org/src/cd1/track%201.mp3'}],
|
'formats': [{
|
||||||
|
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||||
|
'url': 'https://example.org/src/cd1/track%201.mp3',
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
'id': 'foo_xspf',
|
'id': 'foo_xspf',
|
||||||
'title': 'Pandemonium'
|
'title': 'Final Cartridge (Nichico Twelve Remix)',
|
||||||
},
|
|
||||||
{
|
|
||||||
'description': 'Visit http://bigbrother404.bandcamp.com',
|
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||||
'duration': 255.857,
|
'duration': 255.857,
|
||||||
'formats': [{'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3'}],
|
'formats': [{
|
||||||
|
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||||
|
'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3',
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
'id': 'foo_xspf',
|
'id': 'foo_xspf',
|
||||||
'title': 'Final Cartridge (Nichico Twelve Remix)'
|
'title': 'Rebuilding Nightingale',
|
||||||
},
|
|
||||||
{
|
|
||||||
'description': 'Visit http://bigbrother404.bandcamp.com',
|
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||||
'duration': 287.915,
|
'duration': 287.915,
|
||||||
'formats': [
|
'formats': [{
|
||||||
{'url': 'https://example.org/src/track3.mp3'},
|
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||||
{'url': 'https://example.com/track3.mp3'}
|
'url': 'https://example.org/src/track3.mp3',
|
||||||
],
|
}, {
|
||||||
'id': 'foo_xspf',
|
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||||
'title': 'Rebuilding Nightingale'
|
'url': 'https://example.com/track3.mp3',
|
||||||
|
}]
|
||||||
}]
|
}]
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
for xspf_file, xspf_base_url, expected_entries in _TEST_CASES:
|
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
|
||||||
with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
|
with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
|
||||||
mode='r', encoding='utf-8') as f:
|
mode='r', encoding='utf-8') as f:
|
||||||
entries = self.ie._parse_xspf(
|
entries = self.ie._parse_xspf(
|
||||||
compat_etree_fromstring(f.read().encode('utf-8')),
|
compat_etree_fromstring(f.read().encode('utf-8')),
|
||||||
xspf_file, xspf_base_url)
|
xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
|
||||||
expect_value(self, entries, expected_entries, None)
|
expect_value(self, entries, expected_entries, None)
|
||||||
for i in range(len(entries)):
|
for i in range(len(entries)):
|
||||||
expect_dict(self, entries[i], expected_entries[i])
|
expect_dict(self, entries[i], expected_entries[i])
|
||||||
|
|
|
@ -1706,22 +1706,24 @@ class InfoExtractor(object):
|
||||||
})
|
})
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
|
def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
|
||||||
xspf = self._download_xml(
|
xspf = self._download_xml(
|
||||||
playlist_url, playlist_id, 'Downloading xpsf playlist',
|
xspf_url, playlist_id, 'Downloading xpsf playlist',
|
||||||
'Unable to download xspf manifest', fatal=fatal)
|
'Unable to download xspf manifest', fatal=fatal)
|
||||||
if xspf is False:
|
if xspf is False:
|
||||||
return []
|
return []
|
||||||
return self._parse_xspf(xspf, playlist_id, base_url(playlist_url))
|
return self._parse_xspf(
|
||||||
|
xspf, playlist_id, xspf_url=xspf_url,
|
||||||
|
xspf_base_url=base_url(xspf_url))
|
||||||
|
|
||||||
def _parse_xspf(self, playlist, playlist_id, playlist_base_url=''):
|
def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
|
||||||
NS_MAP = {
|
NS_MAP = {
|
||||||
'xspf': 'http://xspf.org/ns/0/',
|
'xspf': 'http://xspf.org/ns/0/',
|
||||||
's1': 'http://static.streamone.nl/player/ns/0',
|
's1': 'http://static.streamone.nl/player/ns/0',
|
||||||
}
|
}
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
||||||
title = xpath_text(
|
title = xpath_text(
|
||||||
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
|
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
|
||||||
description = xpath_text(
|
description = xpath_text(
|
||||||
|
@ -1731,12 +1733,18 @@ class InfoExtractor(object):
|
||||||
duration = float_or_none(
|
duration = float_or_none(
|
||||||
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
|
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
|
||||||
|
|
||||||
formats = [{
|
formats = []
|
||||||
'url': urljoin(playlist_base_url, location.text),
|
for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
|
||||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
format_url = urljoin(xspf_base_url, location.text)
|
||||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
if not format_url:
|
||||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
continue
|
||||||
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'manifest_url': xspf_url,
|
||||||
|
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||||
|
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||||
|
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
|
@ -1750,18 +1758,18 @@ class InfoExtractor(object):
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
|
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
|
||||||
res = self._download_webpage_handle(
|
res = self._download_xml_handle(
|
||||||
mpd_url, video_id,
|
mpd_url, video_id,
|
||||||
note=note or 'Downloading MPD manifest',
|
note=note or 'Downloading MPD manifest',
|
||||||
errnote=errnote or 'Failed to download MPD manifest',
|
errnote=errnote or 'Failed to download MPD manifest',
|
||||||
fatal=fatal)
|
fatal=fatal)
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return []
|
||||||
mpd, urlh = res
|
mpd_doc, urlh = res
|
||||||
mpd_base_url = base_url(urlh.geturl())
|
mpd_base_url = base_url(urlh.geturl())
|
||||||
|
|
||||||
return self._parse_mpd_formats(
|
return self._parse_mpd_formats(
|
||||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
|
mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
|
||||||
formats_dict=formats_dict, mpd_url=mpd_url)
|
formats_dict=formats_dict, mpd_url=mpd_url)
|
||||||
|
|
||||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
||||||
|
@ -2035,17 +2043,16 @@ class InfoExtractor(object):
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
|
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
|
||||||
res = self._download_webpage_handle(
|
res = self._download_xml_handle(
|
||||||
ism_url, video_id,
|
ism_url, video_id,
|
||||||
note=note or 'Downloading ISM manifest',
|
note=note or 'Downloading ISM manifest',
|
||||||
errnote=errnote or 'Failed to download ISM manifest',
|
errnote=errnote or 'Failed to download ISM manifest',
|
||||||
fatal=fatal)
|
fatal=fatal)
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return []
|
||||||
ism, urlh = res
|
ism_doc, urlh = res
|
||||||
|
|
||||||
return self._parse_ism_formats(
|
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
|
||||||
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
|
|
||||||
|
|
||||||
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -2233,7 +2233,9 @@ class GenericIE(InfoExtractor):
|
||||||
return smil
|
return smil
|
||||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._parse_xspf(doc, video_id, compat_str(full_response.geturl())),
|
self._parse_xspf(
|
||||||
|
doc, video_id, xspf_url=url,
|
||||||
|
xspf_base_url=compat_str(full_response.geturl())),
|
||||||
video_id)
|
video_id)
|
||||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||||
info_dict['formats'] = self._parse_mpd_formats(
|
info_dict['formats'] = self._parse_mpd_formats(
|
||||||
|
|
Loading…
Reference in New Issue