From c6737310619022248f5d0fd13872073cac168453 Mon Sep 17 00:00:00 2001
From: Subrat Lima <74418100+subrat-lima@users.noreply.github.com>
Date: Tue, 12 Nov 2024 00:38:18 +0530
Subject: [PATCH] [ie/spreaker] Support podcast and feed pages (#10968)

Closes #10925
Authored by: subrat-lima
---
 yt_dlp/extractor/_extractors.py |  1 -
 yt_dlp/extractor/spreaker.py    | 50 +++++++++++++++++----------------
 2 files changed, 26 insertions(+), 25 deletions(-)
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 4543c8587..0b935fe3a 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1940,7 +1940,6 @@ from .spotify import (
 from .spreaker import (
     SpreakerIE,
     SpreakerShowIE,
-    SpreakerShowPageIE,
 )
 from .springboardplatform import SpringboardPlatformIE
 from .sprout import SproutIE
diff --git a/yt_dlp/extractor/spreaker.py b/yt_dlp/extractor/spreaker.py
index ff6e1f423..c64c2fcd2 100644
--- a/yt_dlp/extractor/spreaker.py
+++ b/yt_dlp/extractor/spreaker.py
@@ -2,6 +2,7 @@ import itertools
 
 from .common import InfoExtractor
 from ..utils import (
+    filter_dict,
     float_or_none,
     int_or_none,
     parse_qs,
@@ -119,29 +120,46 @@ class SpreakerIE(InfoExtractor):
     def _real_extract(self, url):
         episode_id = self._match_id(url)
         data = self._download_json(
-            f'https://api.spreaker.com/v2/episodes/{episode_id}',
-            episode_id, query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode']
+            f'https://api.spreaker.com/v2/episodes/{episode_id}', episode_id,
+            query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode']
         return _extract_episode(data, episode_id)
 
 
 class SpreakerShowIE(InfoExtractor):
-    _VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
+    _VALID_URL = [
+        r'https?://api\.spreaker\.com/show/(?P<id>\d+)',
+        r'https?://(?:www\.)?spreaker\.com/podcast/[\w-]+--(?P<id>[\d]+)',
+        r'https?://(?:www\.)?spreaker\.com/show/(?P<id>\d+)/episodes/feed',
+    ]
     _TESTS = [{
         'url': 'https://api.spreaker.com/show/4652058',
         'info_dict': {
             'id': '4652058',
         },
         'playlist_mincount': 118,
+    }, {
+        'url': 'https://www.spreaker.com/podcast/health-wealth--5918323',
+        'info_dict': {
+            'id': '5918323',
+        },
+        'playlist_mincount': 60,
+    }, {
+        'url': 'https://www.spreaker.com/show/5887186/episodes/feed',
+        'info_dict': {
+            'id': '5887186',
+        },
+        'playlist_mincount': 290,
     }]
 
-    def _entries(self, show_id):
+    def _entries(self, show_id, key=None):
         for page_num in itertools.count(1):
             episodes = self._download_json(
                 f'https://api.spreaker.com/show/{show_id}/episodes',
-                show_id, note=f'Downloading JSON page {page_num}', query={
+                show_id, note=f'Downloading JSON page {page_num}', query=filter_dict({
                     'page': page_num,
                     'max_per_page': 100,
-                })
+                    'key': key,
+                }))
             pager = try_get(episodes, lambda x: x['response']['pager'], dict)
             if not pager:
                 break
@@ -157,21 +175,5 @@ class SpreakerShowIE(InfoExtractor):
 
     def _real_extract(self, url):
         show_id = self._match_id(url)
-        return self.playlist_result(self._entries(show_id), playlist_id=show_id)
-
-
-class SpreakerShowPageIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
-    _TESTS = [{
-        'url': 'https://www.spreaker.com/show/success-with-music',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        show_id = self._search_regex(
-            r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
-        return self.url_result(
-            f'https://api.spreaker.com/show/{show_id}',
-            ie=SpreakerShowIE.ie_key(), video_id=show_id)
+        key = traverse_obj(parse_qs(url), ('key', 0))
+        return self.playlist_result(self._entries(show_id, key), playlist_id=show_id)