[youtube] Improve _VALID_URLs (closes #12538)

This commit is contained in:
Sergey M․ 2017-03-25 01:17:17 +07:00
parent 54b960f340
commit d0ba55871e
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 15 additions and 4 deletions

View File

@ -59,6 +59,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# If True it will raise an error if no login info is provided # If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False _LOGIN_REQUIRED = False
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
def _set_language(self): def _set_language(self):
self._set_cookie( self._set_cookie(
'.youtube.com', 'PREF', 'f1=50000000&hl=en', '.youtube.com', 'PREF', 'f1=50000000&hl=en',
@ -265,9 +267,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
) )
)? # all until now is optional -> you can pass the naked ID )? # all until now is optional -> you can pass the naked ID
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
(?!.*?\blist=) # combined list/video URLs are handled by the playlist IE (?!.*?\blist=
(?:
%(playlist_id)s| # combined list/video URLs are handled by the playlist IE
WL # WL are handled by the watch later IE
)
)
(?(1).+)? # if we found the ID, everything can follow (?(1).+)? # if we found the ID, everything can follow
$""" $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)' _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
_formats = { _formats = {
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
@ -924,6 +931,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': 'sJL6WA-aGkQ', 'url': 'sJL6WA-aGkQ',
'only_matching': True, 'only_matching': True,
}, },
{
'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
'only_matching': True,
},
] ]
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -1864,8 +1875,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
) )
.* .*
| |
((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}) (%(playlist_id)s)
)""" )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true' _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?' _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
IE_NAME = 'youtube:playlist' IE_NAME = 'youtube:playlist'