Merge branch 'master' into ndtv

2024-08-22 13:53:03 +05:30 · 2024-08-22 13:53:03 +05:30 · 92a176955f
parent dbb1b746d4 f0bb28504c
commit 92a176955f
5 changed files with 91 additions and 20 deletions
--- a/yt_dlp/extractor/asobistage.py
+++ b/yt_dlp/extractor/asobistage.py
@ -101,9 +101,10 @@ class AsobiStageIE(InfoExtractor):
        self._HEADERS['Authorization'] = f'Bearer {token}'
    def _real_extract(self, url):
-        video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug')
+        webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
        video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug')
        video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
-        webpage = self._download_webpage(url, video_id)
+
        event_data = traverse_obj(
            self._search_nextjs_data(webpage, video_id, default={}),
            ('props', 'pageProps', 'eventCMSData', {
--- a/yt_dlp/extractor/eurosport.py
+++ b/yt_dlp/extractor/eurosport.py
@ -3,7 +3,12 @@ from ..utils import traverse_obj
 class EurosportIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
+    _VALID_URL = r'''(?x)
        https?://(?:
            (?:(?:www|espanol)\.)?eurosport\.(?:com(?:\.tr)?|de|dk|es|fr|hu|it|nl|no|ro)|
            eurosport\.tvn24\.pl
        )/[\w-]+/(?:[\w-]+/[\d-]+/)?[\w.-]+_(?P<id>vid\d+)
    '''
    _TESTS = [{
        'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
        'info_dict': {
@ -70,6 +75,42 @@ class EurosportIE(InfoExtractor):
            'duration': 105.0,
            'upload_date': '20230518',
        },
    }, {
        'url': 'https://www.eurosport.de/radsport/vuelta-a-espana/2024/vuelta-a-espana-2024-wout-van-aert-und-co.-verzweifeln-an-mcnulty-zeitfahr-krimi-in-lissabon_vid2219478/video.shtml',
        'only_matching': True,
    }, {
        'url': 'https://www.eurosport.dk/speedway/mikkel-michelsen-misser-finalen-i-cardiff-se-danskeren-i-semifinalen-her_vid2219363/video.shtml',
        'only_matching': True,
    }, {
        'url': 'https://www.eurosport.nl/mixed-martial-arts/ufc/2022/ufc-305-respect-tussen-adesanya-en-du-plessis_vid2219650/video.shtml',
        'only_matching': True,
    }, {
        'url': 'https://www.eurosport.es/ciclismo/la-vuelta-2024-carlos-rodriguez-olvida-la-crono-y-ya-espera-que-llegue-la-montana-no-me-encontre-nada-comodo_vid2219682/video.shtml',
        'only_matching': True,
    }, {
        'url': 'https://www.eurosport.fr/football/supercoupe-d-europe/2024-2025/kylian-mbappe-vinicius-junior-eduardo-camavinga-touche.-extraits-de-l-entrainement-du-real-madrid-en-video_vid2216993/video.shtml',
        'only_matching': True,
    }, {
        'url': 'https://www.eurosport.it/calcio/serie-a/2024-2025/samardzic-a-bergamo-per-le-visite-mediche-con-l-atalanta_vid2219680/video.shtml',
        'only_matching': True,
    }, {
        'url': 'https://www.eurosport.hu/kerekpar/vuelta-a-espana/2024/dramai-harc-a-masodpercekert-meglepetesgyoztes-a-vuelta-nyitoszakaszan_vid2219481/video.shtml',
        'only_matching': True,
    }, {
        'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid30000618/video.shtml',
        'only_matching': True,
    }, {
        'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid2219531/video.shtml',
        'only_matching': True,
    }, {
        'url': 'https://www.eurosport.ro/tenis/western-southern-open-2/2024/rezumatul-partidei-dintre-zverev-si-shelton-de-la-cincinnati_vid2219657/video.shtml',
        'only_matching': True,
    }, {
        'url': 'https://www.eurosport.com.tr/hentbol/olympic-games-paris-2024/2024/paris-2024-denmark-ile-germany-olimpiyatlarin-onemli-anlari_vid2215836/video.shtml',
        'only_matching': True,
    }, {
        'url': 'https://eurosport.tvn24.pl/kolarstwo/tour-de-france-kobiet/2024/kasia-niewiadoma-przed-ostatnim-8.-etapem-tour-de-france-kobiet_vid2219765/video.shtml',
        'only_matching': True,
    }]
    _TOKEN = None
@ -77,6 +118,7 @@ class EurosportIE(InfoExtractor):
    # actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
    # but this method require to get sha256 hash
    _GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR']  # Not complete list but it should work
    _GEO_BYPASS = False
    def _real_initialize(self):
        if EurosportIE._TOKEN is None:
@ -98,13 +140,13 @@ class EurosportIE(InfoExtractor):
        for stream_type in json_data['attributes']['streaming']:
            if stream_type == 'hls':
                fmts, subs = self._extract_m3u8_formats_and_subtitles(
-                    traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
+                    traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4', fatal=False)
            elif stream_type == 'dash':
                fmts, subs = self._extract_mpd_formats_and_subtitles(
-                    traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
+                    traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
            elif stream_type == 'mss':
                fmts, subs = self._extract_ism_formats_and_subtitles(
-                    traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
+                    traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
            formats.extend(fmts)
            self._merge_subtitles(subs, target=subtitles)
--- a/yt_dlp/extractor/radiko.py
+++ b/yt_dlp/extractor/radiko.py
@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    clean_html,
    join_nonempty,
    time_seconds,
    try_call,
    unified_timestamp,
@ -167,7 +168,7 @@ class RadikoBaseIE(InfoExtractor):
 class RadikoIE(RadikoBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<timestring>\d+)'
    _TESTS = [{
        # QRR (文化放送) station provides <desc>
@ -183,8 +184,9 @@ class RadikoIE(RadikoBaseIE):
    }]
    def _real_extract(self, url):
-        station, video_id = self._match_valid_url(url).groups()
+        station, timestring = self._match_valid_url(url).group('station', 'timestring')
-        vid_int = unified_timestamp(video_id, False)
+        video_id = join_nonempty(station, timestring)
        vid_int = unified_timestamp(timestring, False)
        prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
        auth_token, area_id = self._auth_client()
@ -207,7 +209,7 @@ class RadikoIE(RadikoBaseIE):
                    'ft': radio_begin,
                    'end_at': radio_end,
                    'to': radio_end,
-                    'seek': video_id,
+                    'seek': timestring,
                },
            ),
        }
--- a/yt_dlp/extractor/tvn24.py
+++ b/yt_dlp/extractor/tvn24.py
@ -8,7 +8,7 @@ from ..utils import (
 class TVN24IE(InfoExtractor):
    _WORKING = False
-    _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:(?!eurosport)[^/]+\.)?tvn24(?:bis)?\.pl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
        'md5': 'fbdec753d7bc29d96036808275f2130c',
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@ -1764,7 +1764,7 @@ class TwitterSpacesIE(TwitterBaseIE):
            'release_timestamp': 1659904215,
            'release_date': '20220807',
        },
-        'params': {'skip_download': 'm3u8'},
+        'skip': 'No longer available',
    }, {
        # post_live/TimedOut but downloadable
        'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
@ -1780,6 +1780,8 @@ class TwitterSpacesIE(TwitterBaseIE):
            'upload_date': '20230413',
            'release_timestamp': 1681839000,
            'release_date': '20230418',
            'protocol': 'm3u8',  # ffmpeg is forced
            'container': 'm4a_dash',  # audio-only format fixup is applied
        },
        'params': {'skip_download': 'm3u8'},
    }, {
@ -1790,11 +1792,31 @@ class TwitterSpacesIE(TwitterBaseIE):
            'ext': 'm4a',
            'title': 'あ',
            'description': 'Twitter Space participated by nobody yet',
-            'uploader': '息根とめる🔪Twitchで復活',
+            'uploader': '息根とめる',
            'uploader_id': 'tomeru_ikinone',
            'live_status': 'was_live',
            'timestamp': 1685617198,
            'upload_date': '20230601',
            'protocol': 'm3u8',  # ffmpeg is forced
            'container': 'm4a_dash',  # audio-only format fixup is applied
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        # Video Space
        'url': 'https://x.com/i/spaces/1DXGydznBYWKM',
        'info_dict': {
            'id': '1DXGydznBYWKM',
            'ext': 'mp4',
            'title': 'America and Israel’s “special relationship”',
            'description': 'Twitter Space participated by nobody yet',
            'uploader': 'Candace Owens',
            'uploader_id': 'RealCandaceO',
            'live_status': 'was_live',
            'timestamp': 1723931351,
            'upload_date': '20240817',
            'release_timestamp': 1723932000,
            'release_date': '20240817',
            'protocol': 'm3u8_native',  # not ffmpeg, detected as video space
        },
        'params': {'skip_download': 'm3u8'},
    }]
@ -1854,13 +1876,17 @@ class TwitterSpacesIE(TwitterBaseIE):
            source = traverse_obj(
                self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
                ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
-            formats = self._extract_m3u8_formats(  # XXX: Some Spaces need ffmpeg as downloader
+            is_audio_space = source and 'audio-space' in source
-                source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
+            formats = self._extract_m3u8_formats(
-                headers=headers, fatal=False) if source else []
+                source, metadata['media_key'], 'm4a' if is_audio_space else 'mp4',
-            for fmt in formats:
+                # XXX: Some audio-only Spaces need ffmpeg as downloader
-                fmt.update({'vcodec': 'none', 'acodec': 'aac'})
+                entry_protocol='m3u8' if is_audio_space else 'm3u8_native',
-                if not is_live:
+                live=is_live, headers=headers, fatal=False) if source else []
-                    fmt['container'] = 'm4a_dash'
+            if is_audio_space:
                for fmt in formats:
                    fmt.update({'vcodec': 'none', 'acodec': 'aac'})
                    if not is_live:
                        fmt['container'] = 'm4a_dash'
        participants = ', '.join(traverse_obj(
            space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'