mirror of https://github.com/yt-dlp/yt-dlp.git
[extractor] Add `default` parameter to `_search_json` (#4057)
Authored by: pukkandan, coletdjnz
This commit is contained in:
parent
9fde8a6b12
commit
f0bc6e2019
|
@ -486,9 +486,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||||
search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) if webpage else (lambda x: None))
|
search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) if webpage else (lambda x: None))
|
||||||
player_response = self._search_json(
|
player_response = self._search_json(
|
||||||
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response',
|
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response',
|
||||||
video_id, fatal=False)
|
video_id, default={})
|
||||||
initial_data = self._search_json(
|
initial_data = self._search_json(
|
||||||
self._YT_INITIAL_DATA_RE, webpage, 'initial data', video_id, fatal=False)
|
self._YT_INITIAL_DATA_RE, webpage, 'initial data', video_id, default={})
|
||||||
|
|
||||||
initial_data_video = traverse_obj(
|
initial_data_video = traverse_obj(
|
||||||
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'videoPrimaryInfoRenderer'),
|
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'videoPrimaryInfoRenderer'),
|
||||||
|
|
|
@ -1188,13 +1188,32 @@ class InfoExtractor:
|
||||||
self.report_warning('unable to extract %s' % _name + bug_reports_message())
|
self.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='', contains_pattern='(?s:.+)', fatal=True, **kwargs):
|
def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
|
||||||
|
contains_pattern='(?s:.+)', fatal=True, default=NO_DEFAULT, **kwargs):
|
||||||
"""Searches string for the JSON object specified by start_pattern"""
|
"""Searches string for the JSON object specified by start_pattern"""
|
||||||
# NB: end_pattern is only used to reduce the size of the initial match
|
# NB: end_pattern is only used to reduce the size of the initial match
|
||||||
return self._parse_json(
|
if default is NO_DEFAULT:
|
||||||
self._search_regex(rf'{start_pattern}\s*(?P<json>{{{contains_pattern}}})\s*{end_pattern}',
|
default, has_default = {}, False
|
||||||
string, name, group='json', fatal=fatal) or '{}',
|
else:
|
||||||
video_id, fatal=fatal, ignore_extra=True, **kwargs) or {}
|
fatal, has_default = False, True
|
||||||
|
|
||||||
|
json_string = self._search_regex(
|
||||||
|
rf'{start_pattern}\s*(?P<json>{{\s*{contains_pattern}\s*}})\s*{end_pattern}',
|
||||||
|
string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
|
||||||
|
if not json_string:
|
||||||
|
return default
|
||||||
|
|
||||||
|
_name = self._downloader._format_err(name, self._downloader.Styles.EMPHASIS)
|
||||||
|
try:
|
||||||
|
return self._parse_json(json_string, video_id, ignore_extra=True, **kwargs)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(
|
||||||
|
f'Unable to extract {_name} - Failed to parse JSON', cause=e.cause, video_id=video_id)
|
||||||
|
elif not has_default:
|
||||||
|
self.report_warning(
|
||||||
|
f'Unable to extract {_name} - Failed to parse JSON: {e}', video_id=video_id)
|
||||||
|
return default
|
||||||
|
|
||||||
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue