[youtube:feed] Implement lazy playlist extraction (closes #10184)

This commit is contained in:
Sergey M․ 2018-04-22 06:07:32 +07:00
parent 6cdaaf7031
commit 3853309fe2
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 10 additions and 6 deletions

View File

@ -2699,10 +2699,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
def _real_initialize(self):
self._login()
def _real_extract(self, url):
page = self._download_webpage(
'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
def _entries(self, page):
# The extraction process is the same as for playlists, but the regex
# for the video ids doesn't contain an index
ids = []
@ -2713,12 +2710,15 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
# 'recommended' feed has infinite 'load more' and each new portion spins
# the same videos in (sometimes) slightly different order, so we'll check
# for unicity and break when portion has no new videos
new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
if not new_ids:
break
ids.extend(new_ids)
for entry in self._ids_to_results(new_ids):
yield entry
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
if not mobj:
break
@ -2730,8 +2730,12 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
content_html = more['content_html']
more_widget_html = more['load_more_widget_html']
def _real_extract(self, url):
page = self._download_webpage(
'https://www.youtube.com/feed/%s' % self._FEED_NAME,
self._PLAYLIST_TITLE)
return self.playlist_result(
self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
self._entries(page), playlist_title=self._PLAYLIST_TITLE)
class YoutubeWatchLaterIE(YoutubePlaylistIE):