[youtube] Fix playlist and feed extraction (closes #25675)

This commit is contained in:
Sergey M․ 2020-06-16 01:59:46 +07:00
parent 48bd042ce7
commit d84b21b427
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 9 additions and 2 deletions

View File

@ -70,6 +70,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}' _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
_YOUTUBE_CLIENT_HEADERS = {
'x-youtube-client-name': '1',
'x-youtube-client-version': '1.20200609.04.02',
}
def _set_language(self): def _set_language(self):
self._set_cookie( self._set_cookie(
'.youtube.com', 'PREF', 'f1=50000000&hl=en', '.youtube.com', 'PREF', 'f1=50000000&hl=en',
@ -301,7 +306,8 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
'Downloading page #%s%s' 'Downloading page #%s%s'
% (page_num, ' (retry #%d)' % count if count else ''), % (page_num, ' (retry #%d)' % count if count else ''),
transform_source=uppercase_escape) transform_source=uppercase_escape,
headers=self._YOUTUBE_CLIENT_HEADERS)
break break
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503): if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
@ -3250,7 +3256,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
more = self._download_json( more = self._download_json(
'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE, 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
'Downloading page #%s' % page_num, 'Downloading page #%s' % page_num,
transform_source=uppercase_escape) transform_source=uppercase_escape,
headers=self._YOUTUBE_CLIENT_HEADERS)
content_html = more['content_html'] content_html = more['content_html']
more_widget_html = more['load_more_widget_html'] more_widget_html = more['load_more_widget_html']