From e3c1266f492d710e2acbf0d80f44f7f805eb5187 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 May 2019 03:17:15 +0700 Subject: [PATCH] [extractor/common] Move workaround for applying first Set-Cookie header into a separate method --- youtube_dl/extractor/common.py | 23 +++++++++++++++++++++++ youtube_dl/extractor/vk.py | 22 +++------------------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 69c3bc755..f994953bc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2817,6 +2817,29 @@ class InfoExtractor(object): self._downloader.cookiejar.add_cookie_header(req) return compat_cookies.SimpleCookie(req.get_header('Cookie')) + def _apply_first_set_cookie_header(self, url_handle, cookie): + # Some sites (e.g. [1-3]) may serve two cookies under the same name + # in Set-Cookie header and expect the first (old) one to be set rather + # than second (new). However, as of RFC6265 the newer one cookie + # should be set into cookie store what actually happens. + # We will workaround this issue by resetting the cookie to + # the first one manually. + # 1. https://new.vk.com/ + # 2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201 + # 3. https://learning.oreilly.com/ + for header, cookies in url_handle.headers.items(): + if header.lower() != 'set-cookie': + continue + if sys.version_info[0] >= 3: + cookies = cookies.encode('iso-8859-1') + cookies = cookies.decode('utf-8') + cookie_value = re.search( + r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies) + if cookie_value: + value, domain = cookie_value.groups() + self._set_cookie(domain, cookie, value) + break + def get_testcases(self, include_onlymatching=False): t = getattr(self, '_TEST', None) if t: diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index b7ce2fb97..f57ed2288 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import collections import re -import sys from .common import InfoExtractor from ..compat import compat_urlparse @@ -45,24 +44,9 @@ class VKBaseIE(InfoExtractor): 'pass': password.encode('cp1251'), }) - # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header - # and expects the first one to be set rather than second (see - # https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201). - # As of RFC6265 the newer one cookie should be set into cookie store - # what actually happens. - # We will workaround this VK issue by resetting the remixlhk cookie to - # the first one manually. - for header, cookies in url_handle.headers.items(): - if header.lower() != 'set-cookie': - continue - if sys.version_info[0] >= 3: - cookies = cookies.encode('iso-8859-1') - cookies = cookies.decode('utf-8') - remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) - if remixlhk: - value, domain = remixlhk.groups() - self._set_cookie(domain, 'remixlhk', value) - break + # vk serves two same remixlhk cookies in Set-Cookie header and expects + # first one to be actually set + self._apply_first_set_cookie_header(url_handle, 'remixlhk') login_page = self._download_webpage( 'https://login.vk.com/?act=login', None,