mirror of https://github.com/yt-dlp/yt-dlp.git
parent
9073ae6458
commit
954e57e405
|
@ -1784,8 +1784,7 @@ The following extractors use this feature:
|
|||
* `version`: The video version to extract - `uncut` or `simulcast`
|
||||
|
||||
#### crunchyrollbeta (Crunchyroll)
|
||||
* `format`: Which stream type(s) to extract (default: `adaptive_hls`). Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2`
|
||||
* `hardsub`: Preference order for which hardsub versions to extract, or `all` (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None`
|
||||
* `hardsub`: One or more hardsub versions to extract (in order of preference), or `all` (default: `None` = no hardsubs will be extracted), e.g. `crunchyrollbeta:hardsub=en-US,de-DE`
|
||||
|
||||
#### vikichannel
|
||||
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import base64
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
|
@ -7,12 +8,11 @@ from ..utils import (
|
|||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
jwt_decode_hs256,
|
||||
parse_age_limit,
|
||||
parse_count,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
remove_start,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
|
@ -27,6 +27,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
_AUTH_HEADERS = None
|
||||
_API_ENDPOINT = None
|
||||
_BASIC_AUTH = None
|
||||
_IS_PREMIUM = None
|
||||
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
|
||||
_LOCALE_LOOKUP = {
|
||||
'ar': 'ar-SA',
|
||||
|
@ -84,11 +85,16 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||
|
||||
grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
|
||||
auth_headers = {'Authorization': CrunchyrollBaseIE._BASIC_AUTH}
|
||||
if self.is_logged_in:
|
||||
grant_type = 'etp_rt_cookie'
|
||||
else:
|
||||
grant_type = 'client_id'
|
||||
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
|
||||
try:
|
||||
auth_response = self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
|
||||
headers=auth_headers, data=f'grant_type={grant_type}'.encode())
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
|
||||
raise ExtractorError(
|
||||
|
@ -97,6 +103,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
raise
|
||||
|
||||
CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(auth_response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
|
||||
|
@ -135,62 +142,72 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
raise ExtractorError(f'Unexpected response when downloading {note} JSON')
|
||||
return result
|
||||
|
||||
def _extract_formats(self, stream_response, display_id=None):
|
||||
requested_formats = self._configuration_arg('format') or ['vo_adaptive_hls']
|
||||
available_formats = {}
|
||||
for stream_type, streams in traverse_obj(
|
||||
stream_response, (('streams', ('data', 0)), {dict.items}, ...)):
|
||||
if stream_type not in requested_formats:
|
||||
def _extract_chapters(self, internal_id):
|
||||
# if no skip events are available, a 403 xml error is returned
|
||||
skip_events = self._download_json(
|
||||
f'https://static.crunchyroll.com/skip-events/production/{internal_id}.json',
|
||||
internal_id, note='Downloading chapter info', fatal=False, errnote=False)
|
||||
if not skip_events:
|
||||
return None
|
||||
|
||||
chapters = []
|
||||
for event in ('recap', 'intro', 'credits', 'preview'):
|
||||
start = traverse_obj(skip_events, (event, 'start', {float_or_none}))
|
||||
end = traverse_obj(skip_events, (event, 'end', {float_or_none}))
|
||||
# some chapters have no start and/or ending time, they will just be ignored
|
||||
if start is None or end is None:
|
||||
continue
|
||||
for stream in traverse_obj(streams, lambda _, v: v['url']):
|
||||
hardsub_lang = stream.get('hardsub_locale') or ''
|
||||
format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
|
||||
available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
|
||||
chapters.append({'title': event.capitalize(), 'start_time': start, 'end_time': end})
|
||||
|
||||
return chapters
|
||||
|
||||
def _extract_stream(self, identifier, display_id=None):
|
||||
if not display_id:
|
||||
display_id = identifier
|
||||
|
||||
self._update_auth()
|
||||
stream_response = self._download_json(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
|
||||
display_id, note='Downloading stream info', headers=CrunchyrollBaseIE._AUTH_HEADERS)
|
||||
|
||||
available_formats = {'': ('', '', stream_response['url'])}
|
||||
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
|
||||
available_formats[hardsub_lang] = (f'hardsub-{hardsub_lang}', hardsub_lang, stream['url'])
|
||||
|
||||
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
|
||||
if '' in available_formats and 'all' not in requested_hardsubs:
|
||||
hardsub_langs = [lang for lang in available_formats if lang]
|
||||
if hardsub_langs and 'all' not in requested_hardsubs:
|
||||
full_format_langs = set(requested_hardsubs)
|
||||
self.to_screen(f'Available hardsub languages: {", ".join(hardsub_langs)}')
|
||||
self.to_screen(
|
||||
'To get all formats of a hardsub language, use '
|
||||
'To extract formats of a hardsub language, use '
|
||||
'"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
|
||||
'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info',
|
||||
only_once=True)
|
||||
else:
|
||||
full_format_langs = set(map(str.lower, available_formats))
|
||||
|
||||
audio_locale = traverse_obj(stream_response, ((None, 'meta'), 'audio_locale'), get_all=False)
|
||||
audio_locale = traverse_obj(stream_response, ('audioLocale', {str}))
|
||||
hardsub_preference = qualities(requested_hardsubs[::-1])
|
||||
formats = []
|
||||
for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
|
||||
if stream_type.endswith('hls'):
|
||||
if hardsub_lang.lower() in full_format_langs:
|
||||
adaptive_formats = self._extract_m3u8_formats(
|
||||
stream_url, display_id, 'mp4', m3u8_id=format_id,
|
||||
fatal=False, note=f'Downloading {format_id} HLS manifest')
|
||||
else:
|
||||
adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),)
|
||||
elif stream_type.endswith('dash'):
|
||||
adaptive_formats = self._extract_mpd_formats(
|
||||
stream_url, display_id, mpd_id=format_id,
|
||||
fatal=False, note=f'Downloading {format_id} MPD manifest')
|
||||
formats, subtitles = [], {}
|
||||
for format_id, hardsub_lang, stream_url in available_formats.values():
|
||||
if hardsub_lang.lower() in full_format_langs:
|
||||
adaptive_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
|
||||
stream_url, display_id, mpd_id=format_id, headers=CrunchyrollBaseIE._AUTH_HEADERS,
|
||||
fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest')
|
||||
self._merge_subtitles(dash_subs, target=subtitles)
|
||||
else:
|
||||
self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True)
|
||||
continue
|
||||
continue # XXX: Update this if/when meta mpd formats are working
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = audio_locale
|
||||
f['quality'] = hardsub_preference(hardsub_lang.lower())
|
||||
formats.extend(adaptive_formats)
|
||||
|
||||
return formats
|
||||
for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)):
|
||||
subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'}))
|
||||
|
||||
def _extract_subtitles(self, data):
|
||||
subtitles = {}
|
||||
|
||||
for locale, subtitle in traverse_obj(data, ((None, 'meta'), 'subtitles', {dict.items}, ...)):
|
||||
subtitles[locale] = [traverse_obj(subtitle, {'url': 'url', 'ext': 'format'})]
|
||||
|
||||
return subtitles
|
||||
return formats, subtitles
|
||||
|
||||
|
||||
class CrunchyrollCmsBaseIE(CrunchyrollBaseIE):
|
||||
|
@ -245,7 +262,11 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
|||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
'extractor_args': {'crunchyrollbeta': {'hardsub': ['de-DE']}},
|
||||
'format': 'bv[format_id~=hardsub]',
|
||||
},
|
||||
}, {
|
||||
# Premium only
|
||||
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
|
||||
|
@ -306,6 +327,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
|||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'no longer exists',
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6',
|
||||
'info_dict': {
|
||||
|
@ -359,31 +381,15 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
|||
else:
|
||||
raise ExtractorError(f'Unknown object type {object_type}')
|
||||
|
||||
# There might be multiple audio languages for one object (`<object>_metadata.versions`),
|
||||
# so we need to get the id from `streams_link` instead or we dont know which language to choose
|
||||
streams_link = response.get('streams_link')
|
||||
if not streams_link and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
|
||||
if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
|
||||
message = f'This {object_type} is for premium members only'
|
||||
if self.is_logged_in:
|
||||
raise ExtractorError(message, expected=True)
|
||||
self.raise_login_required(message)
|
||||
|
||||
# We need go from unsigned to signed api to avoid getting soft banned
|
||||
stream_response = self._call_cms_api_signed(remove_start(
|
||||
streams_link, '/content/v2/cms/'), internal_id, lang, 'stream info')
|
||||
result['formats'] = self._extract_formats(stream_response, internal_id)
|
||||
result['subtitles'] = self._extract_subtitles(stream_response)
|
||||
result['formats'], result['subtitles'] = self._extract_stream(internal_id)
|
||||
|
||||
# if no intro chapter is available, a 403 without usable data is returned
|
||||
intro_chapter = self._download_json(
|
||||
f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json',
|
||||
internal_id, note='Downloading chapter info', fatal=False, errnote=False)
|
||||
if isinstance(intro_chapter, dict):
|
||||
result['chapters'] = [{
|
||||
'title': 'Intro',
|
||||
'start_time': float_or_none(intro_chapter.get('startTime')),
|
||||
'end_time': float_or_none(intro_chapter.get('endTime')),
|
||||
}]
|
||||
result['chapters'] = self._extract_chapters(internal_id)
|
||||
|
||||
def calculate_count(item):
|
||||
return parse_count(''.join((item['displayed'], item.get('unit') or '')))
|
||||
|
@ -512,7 +518,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
'display_id': 'egaono-hana',
|
||||
'title': 'Egaono Hana',
|
||||
'track': 'Egaono Hana',
|
||||
'artist': 'Goose house',
|
||||
'artists': ['Goose house'],
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'genres': ['J-Pop'],
|
||||
},
|
||||
|
@ -525,11 +531,12 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
'display_id': 'crossing-field',
|
||||
'title': 'Crossing Field',
|
||||
'track': 'Crossing Field',
|
||||
'artist': 'LiSA',
|
||||
'artists': ['LiSA'],
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'genres': ['Anime'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'no longer exists',
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135',
|
||||
'info_dict': {
|
||||
|
@ -538,7 +545,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
'display_id': 'live-is-smile-always-364joker-at-yokohama-arena',
|
||||
'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
||||
'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
||||
'artist': 'LiSA',
|
||||
'artists': ['LiSA'],
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'description': 'md5:747444e7e6300907b7a43f0a0503072e',
|
||||
'genres': ['J-Pop'],
|
||||
|
@ -566,16 +573,14 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
if not response:
|
||||
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
|
||||
|
||||
streams_link = response.get('streams_link')
|
||||
if not streams_link and response.get('isPremiumOnly'):
|
||||
if not self._IS_PREMIUM and response.get('isPremiumOnly'):
|
||||
message = f'This {response.get("type") or "media"} is for premium members only'
|
||||
if self.is_logged_in:
|
||||
raise ExtractorError(message, expected=True)
|
||||
self.raise_login_required(message)
|
||||
|
||||
result = self._transform_music_response(response)
|
||||
stream_response = self._call_api(streams_link, internal_id, lang, 'stream info')
|
||||
result['formats'] = self._extract_formats(stream_response, internal_id)
|
||||
result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
|
||||
|
||||
return result
|
||||
|
||||
|
@ -587,7 +592,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
'display_id': 'slug',
|
||||
'title': 'title',
|
||||
'track': 'title',
|
||||
'artist': ('artist', 'name'),
|
||||
'artists': ('artist', 'name', all),
|
||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}),
|
||||
'thumbnails': ('images', ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
|
@ -611,7 +616,7 @@ class CrunchyrollArtistIE(CrunchyrollBaseIE):
|
|||
'info_dict': {
|
||||
'id': 'MA179CB50D',
|
||||
'title': 'LiSA',
|
||||
'genres': ['J-Pop', 'Anime', 'Rock'],
|
||||
'genres': ['Anime', 'J-Pop', 'Rock'],
|
||||
'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
|
||||
},
|
||||
'playlist_mincount': 83,
|
||||
|
|
Loading…
Reference in New Issue