[extractor/common] Move workaround for applying first Set-Cookie header into a separate method

This commit is contained in:
Sergey M․ 2019-05-18 03:17:15 +07:00
parent 82e91d20a0
commit e3c1266f49
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 26 additions and 19 deletions

View File

@ -2817,6 +2817,29 @@ class InfoExtractor(object):
self._downloader.cookiejar.add_cookie_header(req) self._downloader.cookiejar.add_cookie_header(req)
return compat_cookies.SimpleCookie(req.get_header('Cookie')) return compat_cookies.SimpleCookie(req.get_header('Cookie'))
def _apply_first_set_cookie_header(self, url_handle, cookie):
# Some sites (e.g. [1-3]) may serve two cookies under the same name
# in Set-Cookie header and expect the first (old) one to be set rather
# than second (new). However, as of RFC6265 the newer one cookie
# should be set into cookie store what actually happens.
# We will workaround this issue by resetting the cookie to
# the first one manually.
# 1. https://new.vk.com/
# 2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
# 3. https://learning.oreilly.com/
for header, cookies in url_handle.headers.items():
if header.lower() != 'set-cookie':
continue
if sys.version_info[0] >= 3:
cookies = cookies.encode('iso-8859-1')
cookies = cookies.decode('utf-8')
cookie_value = re.search(
r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
if cookie_value:
value, domain = cookie_value.groups()
self._set_cookie(domain, cookie, value)
break
def get_testcases(self, include_onlymatching=False): def get_testcases(self, include_onlymatching=False):
t = getattr(self, '_TEST', None) t = getattr(self, '_TEST', None)
if t: if t:

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import collections import collections
import re import re
import sys
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import compat_urlparse
@ -45,24 +44,9 @@ class VKBaseIE(InfoExtractor):
'pass': password.encode('cp1251'), 'pass': password.encode('cp1251'),
}) })
# https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header # vk serves two same remixlhk cookies in Set-Cookie header and expects
# and expects the first one to be set rather than second (see # first one to be actually set
# https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201). self._apply_first_set_cookie_header(url_handle, 'remixlhk')
# As of RFC6265 the newer one cookie should be set into cookie store
# what actually happens.
# We will workaround this VK issue by resetting the remixlhk cookie to
# the first one manually.
for header, cookies in url_handle.headers.items():
if header.lower() != 'set-cookie':
continue
if sys.version_info[0] >= 3:
cookies = cookies.encode('iso-8859-1')
cookies = cookies.decode('utf-8')
remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
if remixlhk:
value, domain = remixlhk.groups()
self._set_cookie(domain, 'remixlhk', value)
break
login_page = self._download_webpage( login_page = self._download_webpage(
'https://login.vk.com/?act=login', None, 'https://login.vk.com/?act=login', None,