mirror of https://github.com/yt-dlp/yt-dlp.git
[ie/ARDBetaMediathek] Fix series extraction (#8687)
Closes #7666 Authored by: lstrojny
This commit is contained in:
parent
00cdda4f6f
commit
1f8bd8eba8
|
@ -292,7 +292,7 @@ class ARDIE(InfoExtractor):
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# available till 7.12.2023
|
# available till 7.12.2023
|
||||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
|
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
|
||||||
'md5': 'a438f671e87a7eba04000336a119ccc4',
|
'md5': '94812e6438488fb923c361a44469614b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'maischberger-video-424',
|
'id': 'maischberger-video-424',
|
||||||
'display_id': 'maischberger-video-424',
|
'display_id': 'maischberger-video-424',
|
||||||
|
@ -403,26 +403,25 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
_VALID_URL = r'''(?x)https://
|
_VALID_URL = r'''(?x)https://
|
||||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||||
(?:(?P<client>[^/]+)/)?
|
(?:(?P<client>[^/]+)/)?
|
||||||
(?:player|live|video|(?P<playlist>sendung|sammlung))/
|
(?:player|live|video|(?P<playlist>sendung|serie|sammlung))/
|
||||||
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
|
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
|
||||||
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
|
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
|
||||||
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
|
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||||
'md5': '3fd5fead7a370a819341129c8d713136',
|
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
|
'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen',
|
||||||
'id': '12172961',
|
'id': '12939099',
|
||||||
'title': 'Wolfsland - Die traurigen Schwestern',
|
'title': 'Liebe auf vier Pfoten',
|
||||||
'description': r're:^Als der Polizeiobermeister Raaben',
|
'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
|
||||||
'duration': 5241,
|
'duration': 5222,
|
||||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
|
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b',
|
||||||
'timestamp': 1670710500,
|
'timestamp': 1701343800,
|
||||||
'upload_date': '20221210',
|
'upload_date': '20231130',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'age_limit': 12,
|
'episode': 'Liebe auf vier Pfoten',
|
||||||
'episode': 'Wolfsland - Die traurigen Schwestern',
|
|
||||||
'series': 'Filme im MDR'
|
'series': 'Filme im MDR'
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
@ -454,7 +453,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
'duration': 915,
|
'duration': 915,
|
||||||
'episode': 'tagesschau, 20:00 Uhr',
|
'episode': 'tagesschau, 20:00 Uhr',
|
||||||
'series': 'tagesschau',
|
'series': 'tagesschau',
|
||||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
|
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||||
|
@ -475,6 +474,10 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
# playlist of type 'sendung'
|
# playlist of type 'sendung'
|
||||||
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
|
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# playlist of type 'serie'
|
||||||
|
'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# playlist of type 'sammlung'
|
# playlist of type 'sammlung'
|
||||||
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
|
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
|
||||||
|
@ -487,10 +490,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
|
def _ARD_load_playlist_snippet(self, playlist_id, display_id, client, mode, page_number):
|
||||||
""" Query the ARD server for playlist information
|
""" Query the ARD server for playlist information
|
||||||
and returns the data in "raw" format """
|
and returns the data in "raw" format """
|
||||||
if mode == 'sendung':
|
assert mode in ('sendung', 'serie', 'sammlung')
|
||||||
|
if mode in ('sendung', 'serie'):
|
||||||
graphQL = json.dumps({
|
graphQL = json.dumps({
|
||||||
'query': '''{
|
'query': '''{
|
||||||
showPage(
|
showPage(
|
||||||
|
@ -507,7 +511,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
links { target { id href title } }
|
links { target { id href title } }
|
||||||
type
|
type
|
||||||
}
|
}
|
||||||
}}''' % (client, playlist_id, pageNumber),
|
}}''' % (client, playlist_id, page_number),
|
||||||
}).encode()
|
}).encode()
|
||||||
else: # mode == 'sammlung'
|
else: # mode == 'sammlung'
|
||||||
graphQL = json.dumps({
|
graphQL = json.dumps({
|
||||||
|
@ -528,7 +532,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
type
|
type
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}}''' % (client, playlist_id, pageNumber),
|
}}''' % (client, playlist_id, page_number),
|
||||||
}).encode()
|
}).encode()
|
||||||
# Ressources for ARD graphQL debugging:
|
# Ressources for ARD graphQL debugging:
|
||||||
# https://api-test.ardmediathek.de/public-gateway
|
# https://api-test.ardmediathek.de/public-gateway
|
||||||
|
@ -538,7 +542,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
data=graphQL,
|
data=graphQL,
|
||||||
headers={'Content-Type': 'application/json'})['data']
|
headers={'Content-Type': 'application/json'})['data']
|
||||||
# align the structure of the returned data:
|
# align the structure of the returned data:
|
||||||
if mode == 'sendung':
|
if mode in ('sendung', 'serie'):
|
||||||
show_page = show_page['showPage']
|
show_page = show_page['showPage']
|
||||||
else: # mode == 'sammlung'
|
else: # mode == 'sammlung'
|
||||||
show_page = show_page['morePage']['widget']
|
show_page = show_page['morePage']['widget']
|
||||||
|
@ -546,12 +550,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
|
|
||||||
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
|
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
|
||||||
""" Collects all playlist entries and returns them as info dict.
|
""" Collects all playlist entries and returns them as info dict.
|
||||||
Supports playlists of mode 'sendung' and 'sammlung', and also nested
|
Supports playlists of mode 'sendung', 'serie', and 'sammlung',
|
||||||
playlists. """
|
as well as nested playlists. """
|
||||||
entries = []
|
entries = []
|
||||||
pageNumber = 0
|
pageNumber = 0
|
||||||
while True: # iterate by pageNumber
|
while True: # iterate by pageNumber
|
||||||
show_page = self._ARD_load_playlist_snipped(
|
show_page = self._ARD_load_playlist_snippet(
|
||||||
playlist_id, display_id, client, mode, pageNumber)
|
playlist_id, display_id, client, mode, pageNumber)
|
||||||
for teaser in show_page['teasers']: # process playlist items
|
for teaser in show_page['teasers']: # process playlist items
|
||||||
if '/compilation/' in teaser['links']['target']['href']:
|
if '/compilation/' in teaser['links']['target']['href']:
|
||||||
|
|
Loading…
Reference in New Issue