[extractor/doodstream] Remove extractor

It was added in youtube-dlc, likely without sufficient scrutiny Closes #3808, Closes #5251, Closes #5403
2022-11-09 15:48:25 +05:30 · 2022-11-09 15:48:25 +05:30 · ed6bec168d
parent 0d8affc17f
commit ed6bec168d
3 changed files with 51 additions and 89 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -477,7 +477,6 @@ from .digitalconcerthall import DigitalConcertHallIE
 from .discovery import DiscoveryIE
 from .disney import DisneyIE
 from .dispeak import DigitallySpeakingIE
-from .doodstream import DoodStreamIE
 from .dropbox import DropboxIE
 from .dropout import (
    DropoutSeasonIE,
@ -2023,7 +2022,7 @@ from .umg import UMGDeIE
 from .unistra import UnistraIE
 from .unity import UnityIE
 from .unscripted import UnscriptedNewsVideoIE
-from .unsupported import KnownDRMIE
+from .unsupported import KnownDRMIE, KnownPiracyIE
 from .uol import UOLIE
 from .uplynk import (
    UplynkIE,
--- a/yt_dlp/extractor/doodstream.py
+++ b/yt_dlp/extractor/doodstream.py
@ -1,77 +0,0 @@
-import string
-import random
-import time
-
-from .common import InfoExtractor
-
-
-class DoodStreamIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch|so|pm|wf)/[ed]/(?P<id>[a-z0-9]+)'
-    _TESTS = [{
-        'url': 'http://dood.to/e/5s1wmbdacezb',
-        'md5': '4568b83b31e13242b3f1ff96c55f0595',
-        'info_dict': {
-            'id': '5s1wmbdacezb',
-            'ext': 'mp4',
-            'title': 'Kat Wonders - Monthly May 2020',
-            'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
-            'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
-        }
-    }, {
-        'url': 'http://dood.watch/d/5s1wmbdacezb',
-        'md5': '4568b83b31e13242b3f1ff96c55f0595',
-        'info_dict': {
-            'id': '5s1wmbdacezb',
-            'ext': 'mp4',
-            'title': 'Kat Wonders - Monthly May 2020',
-            'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
-            'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
-        }
-    }, {
-        'url': 'https://dood.to/d/jzrxn12t2s7n',
-        'md5': '3207e199426eca7c2aa23c2872e6728a',
-        'info_dict': {
-            'id': 'jzrxn12t2s7n',
-            'ext': 'mp4',
-            'title': 'Stacy Cruz Cute ALLWAYSWELL',
-            'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com',
-            'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg',
-        }
-    }, {
-        'url': 'https://dood.so/d/jzrxn12t2s7n',
-        'only_matching': True
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        url = f'https://dood.to/e/{video_id}'
-        webpage = self._download_webpage(url, video_id)
-
-        title = self._html_search_meta(
-            ('og:title', 'twitter:title'), webpage, default=None) or self._html_extract_title(webpage)
-        thumb = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None)
-        token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
-        description = self._html_search_meta(
-            ['og:description', 'description', 'twitter:description'], webpage, default=None)
-
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
-            'referer': url
-        }
-
-        pass_md5 = self._html_search_regex(r'(/pass_md5.*?)\'', webpage, 'pass_md5')
-        final_url = ''.join((
-            self._download_webpage(f'https://dood.to{pass_md5}', video_id, headers=headers),
-            *(random.choice(string.ascii_letters + string.digits) for _ in range(10)),
-            f'?token={token}&expiry={int(time.time() * 1000)}',
-        ))
-
-        return {
-            'id': video_id,
-            'title': title,
-            'url': final_url,
-            'http_headers': headers,
-            'ext': 'mp4',
-            'description': description,
-            'thumbnail': thumb,
-        }
--- a/yt_dlp/extractor/unsupported.py
+++ b/yt_dlp/extractor/unsupported.py
@ -1,11 +1,32 @@
 from .common import InfoExtractor
-from ..utils import classproperty, ExtractorError
+from ..utils import ExtractorError, classproperty, remove_start


-class KnownDRMIE(InfoExtractor):
+class UnsupportedInfoExtractor(InfoExtractor):
    IE_DESC = False
-    IE_NAME = 'unsupported:drm'
-    UNSUPPORTED_URLS = (
+    URLS = ()  # Redefine in subclasses
+
+    @classproperty
+    def IE_NAME(cls):
+        return remove_start(super().IE_NAME, 'Known')
+
+    @classproperty
+    def _VALID_URL(cls):
+        return rf'https?://(?:www\.)?(?:{"|".join(cls.URLS)})'
+
+
+LF = '\n       '
+
+
+class KnownDRMIE(UnsupportedInfoExtractor):
+    """Sites that are known to use DRM for all their videos
+
+    Add to this list only if:
+    * You are reasonably certain that the site uses DRM for ALL their videos
+    * Multiple users have asked about this site on github/reddit/discord
+    """
+
+    URLS = (
        r'play\.hbomax\.com',
        r'channel(?:4|5)\.com',
        r'peacocktv\.com',
@ -82,12 +103,31 @@ class KnownDRMIE(InfoExtractor):
        'only_matching': True,
    }]

-    @classproperty
-    def _VALID_URL(cls):
-        return rf'https?://(?:www\.)?(?:{"|".join(cls.UNSUPPORTED_URLS)})'
+    def _real_extract(self, url):
+        raise ExtractorError(
+            f'The requested site is known to use DRM protection. '
+            f'It will {self._downloader._format_err("NOT", self._downloader.Styles.EMPHASIS)} be supported.{LF}'
+            f'Please {self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open an issue, '
+            'unless you have evidence that the video is not DRM protected', expected=True)
+
+
+class KnownPiracyIE(UnsupportedInfoExtractor):
+    """Sites that have been deemed to be piracy
+
+    In order for this to not end up being a catalog of piracy sites,
+    only sites that were once supported should be added to this list
+    """
+
+    URLS = (
+        r'dood\.(?:to|watch|so|pm|wf|ru)',
+    )
+
+    _TESTS = [{
+        'url': 'http://dood.to/e/5s1wmbdacezb',
+        'only_matching': True,
+    }]

    def _real_extract(self, url):
        raise ExtractorError(
-            f'The requested site is known to use DRM protection. It will {self._downloader._format_err("NOT", self._downloader.Styles.EMPHASIS)} be supported by yt-dlp. '
-            f'Please {self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open an issue, unless you have evidence that it is not DRM protected.',
-            expected=True)
+            f'This website is no longer supported since it has been determined to be primarily used for piracy.{LF}'
+            f'{self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open issues for it', expected=True)