From 876f1c17fff194cbed3595bb2a8497ea9e479bf7 Mon Sep 17 00:00:00 2001 From: Ali Sherief Date: Mon, 9 Nov 2020 16:06:48 +0000 Subject: [PATCH] Fix #93 YoutubePlaylistsIE --- youtube_dlc/extractor/youtube.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 3ec2581dc..35ac67b49 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -300,11 +300,12 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor): # Extract entries from page with "Load more" button def _entries(self, page, playlist_id): more_widget_html = content_html = page + mobj_reg = r'(?:(?:data-uix-load-more-href="[^"]+?;continuation=)|(?:"continuation":"))(?P[^"]+)"' for page_num in itertools.count(1): for entry in self._process_page(content_html): yield entry - mobj = re.search(r'data-uix-load-more-href="/?(?P[^"]+)"', more_widget_html) + mobj = re.search(mobj_reg, more_widget_html) if not mobj: break @@ -315,7 +316,7 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor): # Downloading page may result in intermittent 5xx HTTP error # that is usually worked around with a retry more = self._download_json( - 'https://www.youtube.com/%s' % mobj.group('more'), playlist_id, + 'https://www.youtube.com/browse_ajax?ctoken=%s' % mobj.group('more'), playlist_id, 'Downloading page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''), transform_source=uppercase_escape, @@ -372,7 +373,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): def _process_page(self, content): for playlist_id in orderedSet(re.findall( - r']+class="[^"]*yt-lockup-title[^"]*"[^>]*>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', + r'"/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)): yield self.url_result( 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')