From 0fa9a1e23625f0ba4516e5107ce447ac693e7ec1 Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Sun, 2 May 2021 23:13:37 +0900 Subject: [PATCH] [whowatch] Add extractor #292 closes #223 Authored by: nao20010128nao Modified from: https://github.com/nao20010128nao/ytdl-patched/blob/9e4a0e061a558cdb05a618e27f47ca0ac56ece94/youtube_dl/extractor/whowatch.py --- yt_dlp/downloader/__init__.py | 3 + yt_dlp/downloader/external.py | 2 +- yt_dlp/extractor/common.py | 2 +- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/whowatch.py | 101 +++++++++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 yt_dlp/extractor/whowatch.py diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 510c7b601..c7ba91862 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -31,6 +31,7 @@ from .external import ( PROTOCOL_MAP = { 'rtmp': RtmpFD, + 'rtmp_ffmpeg': FFmpegFD, 'm3u8_native': HlsFD, 'm3u8': FFmpegFD, 'mms': RtspFD, @@ -46,6 +47,7 @@ PROTOCOL_MAP = { def shorten_protocol_name(proto, simplify=False): short_protocol_names = { 'm3u8_native': 'm3u8_n', + 'rtmp_ffmpeg': 'rtmp_f', 'http_dash_segments': 'dash', 'niconico_dmc': 'dmc', } @@ -54,6 +56,7 @@ def shorten_protocol_name(proto, simplify=False): 'https': 'http', 'ftps': 'ftp', 'm3u8_native': 'm3u8', + 'rtmp_ffmpeg': 'rtmp', 'm3u8_frag_urls': 'm3u8', 'dash_frag_urls': 'dash', }) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index d879bc66d..89f3ef28d 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -338,7 +338,7 @@ class HttpieFD(ExternalFD): class FFmpegFD(ExternalFD): - SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'mms') + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms') @classmethod def available(cls, path=None): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 2ca25951b..642c94930 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -157,7 +157,7 @@ class InfoExtractor(object): * player_url SWF Player URL (used for rtmpdump). * protocol The protocol that will be used for the actual download, lower-case. - "http", "https", "rtsp", "rtmp", "rtmpe", + "http", "https", "rtsp", "rtmp", "rtmp_ffmpeg", "rtmpe", "m3u8", "m3u8_native" or "http_dash_segments". * fragment_base_url Base URL for fragments. Each fragment's path diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 46a401e89..79f9c74a3 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1602,6 +1602,7 @@ from .weibo import ( ) from .weiqitv import WeiqiTVIE from .wimtv import WimTVIE +from .whowatch import WhoWatchIE from .wistia import ( WistiaIE, WistiaPlaylistIE, diff --git a/yt_dlp/extractor/whowatch.py b/yt_dlp/extractor/whowatch.py new file mode 100644 index 000000000..8080f289a --- /dev/null +++ b/yt_dlp/extractor/whowatch.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + qualities, + try_get, + ExtractorError, +) +from ..compat import compat_str + + +class WhoWatchIE(InfoExtractor): + IE_NAME = 'whowatch' + _VALID_URL = r'https?://whowatch\.tv/viewer/(?P\d+)' + + _TESTS = [{ + 'url': 'https://whowatch.tv/viewer/21450171', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + self._download_webpage(url, video_id) + metadata = self._download_json('https://api.whowatch.tv/lives/%s' % video_id, video_id) + live_data = self._download_json('https://api.whowatch.tv/lives/%s/play' % video_id, video_id) + + title = try_get(None, ( + lambda x: live_data['share_info']['live_title'][1:-1], + lambda x: metadata['live']['title'], + ), compat_str) + + hls_url = live_data.get('hls_url') + if not hls_url: + raise ExtractorError(live_data.get('error_message') or 'The user is offline.', expected=True) + + QUALITIES = qualities(['low', 'medium', 'high', 'veryhigh']) + formats = [] + + for i, fmt in enumerate(live_data.get('streams') or []): + name = fmt.get('quality') or fmt.get('name') or compat_str(i) + hls_url = fmt.get('hls_url') + rtmp_url = fmt.get('rtmp_url') + audio_only = fmt.get('audio_only') + quality = QUALITIES(fmt.get('quality')) + + if hls_url: + hls_fmts = self._extract_m3u8_formats( + hls_url, video_id, ext='mp4', entry_protocol='m3u8', + m3u8_id='hls-%s' % name, quality=quality) + formats.extend(hls_fmts) + else: + hls_fmts = [] + + # RTMP url for audio_only is same as high format, so skip it + if rtmp_url and not audio_only: + formats.append({ + 'url': rtmp_url, + 'format_id': 'rtmp-%s' % name, + 'ext': 'mp4', + 'protocol': 'rtmp_ffmpeg', # ffmpeg can, while rtmpdump can't + 'vcodec': 'h264', + 'acodec': 'aac', + 'quality': quality, + 'format_note': fmt.get('label'), + # note: HLS and RTMP have same resolution for now, so it's acceptable + 'width': try_get(hls_fmts, lambda x: x[0]['width'], int), + 'height': try_get(hls_fmts, lambda x: x[0]['height'], int), + }) + + # This contains the same formats as the above manifests and is used only as a fallback + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, ext='mp4', entry_protocol='m3u8', + m3u8_id='hls')) + self._remove_duplicate_formats(formats) + self._sort_formats(formats) + + uploader_url = try_get(metadata, lambda x: x['live']['user']['user_path'], compat_str) + if uploader_url: + uploader_url = 'https://whowatch.tv/profile/%s' % uploader_url + uploader_id = compat_str(try_get(metadata, lambda x: x['live']['user']['id'], int)) + uploader = try_get(metadata, lambda x: x['live']['user']['name'], compat_str) + thumbnail = try_get(metadata, lambda x: x['live']['latest_thumbnail_url'], compat_str) + timestamp = int_or_none(try_get(metadata, lambda x: x['live']['started_at'], int), scale=1000) + view_count = try_get(metadata, lambda x: x['live']['total_view_count'], int) + comment_count = try_get(metadata, lambda x: x['live']['comment_count'], int) + + return { + 'id': video_id, + 'title': title, + 'uploader_id': uploader_id, + 'uploader_url': uploader_url, + 'uploader': uploader, + 'formats': formats, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'view_count': view_count, + 'comment_count': comment_count, + 'is_live': True, + }