mirror of https://github.com/yt-dlp/yt-dlp.git
[cookies] Parse cookies leniently (#4780)
Closes #4776, #3778 Authored by: Grub4K
This commit is contained in:
parent
5736d79172
commit
8817a80d3a
|
@ -3,6 +3,7 @@ from datetime import datetime, timezone
|
||||||
|
|
||||||
from yt_dlp import cookies
|
from yt_dlp import cookies
|
||||||
from yt_dlp.cookies import (
|
from yt_dlp.cookies import (
|
||||||
|
LenientSimpleCookie,
|
||||||
LinuxChromeCookieDecryptor,
|
LinuxChromeCookieDecryptor,
|
||||||
MacChromeCookieDecryptor,
|
MacChromeCookieDecryptor,
|
||||||
WindowsChromeCookieDecryptor,
|
WindowsChromeCookieDecryptor,
|
||||||
|
@ -137,3 +138,148 @@ class TestCookies(unittest.TestCase):
|
||||||
def test_pbkdf2_sha1(self):
|
def test_pbkdf2_sha1(self):
|
||||||
key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16)
|
key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16)
|
||||||
self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34')
|
self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34')
|
||||||
|
|
||||||
|
|
||||||
|
class TestLenientSimpleCookie(unittest.TestCase):
|
||||||
|
def _run_tests(self, *cases):
|
||||||
|
for message, raw_cookie, expected in cases:
|
||||||
|
cookie = LenientSimpleCookie(raw_cookie)
|
||||||
|
|
||||||
|
with self.subTest(message, expected=expected):
|
||||||
|
self.assertEqual(cookie.keys(), expected.keys(), message)
|
||||||
|
|
||||||
|
for key, expected_value in expected.items():
|
||||||
|
morsel = cookie[key]
|
||||||
|
if isinstance(expected_value, tuple):
|
||||||
|
expected_value, expected_attributes = expected_value
|
||||||
|
else:
|
||||||
|
expected_attributes = {}
|
||||||
|
|
||||||
|
attributes = {
|
||||||
|
key: value
|
||||||
|
for key, value in dict(morsel).items()
|
||||||
|
if value != ""
|
||||||
|
}
|
||||||
|
self.assertEqual(attributes, expected_attributes, message)
|
||||||
|
|
||||||
|
self.assertEqual(morsel.value, expected_value, message)
|
||||||
|
|
||||||
|
def test_parsing(self):
|
||||||
|
self._run_tests(
|
||||||
|
# Copied from https://github.com/python/cpython/blob/v3.10.7/Lib/test/test_http_cookies.py
|
||||||
|
(
|
||||||
|
"Test basic cookie",
|
||||||
|
"chips=ahoy; vienna=finger",
|
||||||
|
{"chips": "ahoy", "vienna": "finger"},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Test quoted cookie",
|
||||||
|
'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
|
||||||
|
{"keebler": 'E=mc2; L="Loves"; fudge=\012;'},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Allow '=' in an unquoted value",
|
||||||
|
"keebler=E=mc2",
|
||||||
|
{"keebler": "E=mc2"},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Allow cookies with ':' in their name",
|
||||||
|
"key:term=value:term",
|
||||||
|
{"key:term": "value:term"},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Allow '[' and ']' in cookie values",
|
||||||
|
"a=b; c=[; d=r; f=h",
|
||||||
|
{"a": "b", "c": "[", "d": "r", "f": "h"},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Test basic cookie attributes",
|
||||||
|
'Customer="WILE_E_COYOTE"; Version=1; Path=/acme',
|
||||||
|
{"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Test flag only cookie attributes",
|
||||||
|
'Customer="WILE_E_COYOTE"; HttpOnly; Secure',
|
||||||
|
{"Customer": ("WILE_E_COYOTE", {"httponly": True, "secure": True})},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Test flag only attribute with values",
|
||||||
|
"eggs=scrambled; httponly=foo; secure=bar; Path=/bacon",
|
||||||
|
{"eggs": ("scrambled", {"httponly": "foo", "secure": "bar", "path": "/bacon"})},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Test special case for 'expires' attribute, 4 digit year",
|
||||||
|
'Customer="W"; expires=Wed, 01 Jan 2010 00:00:00 GMT',
|
||||||
|
{"Customer": ("W", {"expires": "Wed, 01 Jan 2010 00:00:00 GMT"})},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Test special case for 'expires' attribute, 2 digit year",
|
||||||
|
'Customer="W"; expires=Wed, 01 Jan 98 00:00:00 GMT',
|
||||||
|
{"Customer": ("W", {"expires": "Wed, 01 Jan 98 00:00:00 GMT"})},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Test extra spaces in keys and values",
|
||||||
|
"eggs = scrambled ; secure ; path = bar ; foo=foo ",
|
||||||
|
{"eggs": ("scrambled", {"secure": True, "path": "bar"}), "foo": "foo"},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Test quoted attributes",
|
||||||
|
'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"',
|
||||||
|
{"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})}
|
||||||
|
),
|
||||||
|
# Our own tests that CPython passes
|
||||||
|
(
|
||||||
|
"Allow ';' in quoted value",
|
||||||
|
'chips="a;hoy"; vienna=finger',
|
||||||
|
{"chips": "a;hoy", "vienna": "finger"},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Keep only the last set value",
|
||||||
|
"a=c; a=b",
|
||||||
|
{"a": "b"},
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_lenient_parsing(self):
|
||||||
|
self._run_tests(
|
||||||
|
(
|
||||||
|
"Ignore and try to skip invalid cookies",
|
||||||
|
'chips={"ahoy;": 1}; vienna="finger;"',
|
||||||
|
{"vienna": "finger;"},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Ignore cookies without a name",
|
||||||
|
"a=b; unnamed; c=d",
|
||||||
|
{"a": "b", "c": "d"},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Ignore '\"' cookie without name",
|
||||||
|
'a=b; "; c=d',
|
||||||
|
{"a": "b", "c": "d"},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Skip all space separated values",
|
||||||
|
"x a=b c=d x; e=f",
|
||||||
|
{"a": "b", "c": "d", "e": "f"},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Skip all space separated values",
|
||||||
|
'x a=b; data={"complex": "json", "with": "key=value"}; x c=d x',
|
||||||
|
{"a": "b", "c": "d"},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Expect quote mending",
|
||||||
|
'a=b; invalid="; c=d',
|
||||||
|
{"a": "b", "c": "d"},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Reset morsel after invalid to not capture attributes",
|
||||||
|
"a=b; invalid; Version=1; c=d",
|
||||||
|
{"a": "b", "c": "d"},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Continue after non-flag attribute without value",
|
||||||
|
"a=b; path; Version=1; c=d",
|
||||||
|
{"a": "b", "c": "d"},
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import base64
|
import base64
|
||||||
import contextlib
|
import contextlib
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
|
import http.cookies
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -990,3 +991,98 @@ def _parse_browser_specification(browser_name, profile=None, keyring=None, conta
|
||||||
if profile is not None and _is_path(profile):
|
if profile is not None and _is_path(profile):
|
||||||
profile = os.path.expanduser(profile)
|
profile = os.path.expanduser(profile)
|
||||||
return browser_name, profile, keyring, container
|
return browser_name, profile, keyring, container
|
||||||
|
|
||||||
|
|
||||||
|
class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||||
|
"""More lenient version of http.cookies.SimpleCookie"""
|
||||||
|
# From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
|
||||||
|
_LEGAL_KEY_CHARS = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
|
||||||
|
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + r"\[\]"
|
||||||
|
|
||||||
|
_RESERVED = {
|
||||||
|
"expires",
|
||||||
|
"path",
|
||||||
|
"comment",
|
||||||
|
"domain",
|
||||||
|
"max-age",
|
||||||
|
"secure",
|
||||||
|
"httponly",
|
||||||
|
"version",
|
||||||
|
"samesite",
|
||||||
|
}
|
||||||
|
|
||||||
|
_FLAGS = {"secure", "httponly"}
|
||||||
|
|
||||||
|
# Added 'bad' group to catch the remaining value
|
||||||
|
_COOKIE_PATTERN = re.compile(r"""
|
||||||
|
\s* # Optional whitespace at start of cookie
|
||||||
|
(?P<key> # Start of group 'key'
|
||||||
|
[""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
|
||||||
|
) # End of group 'key'
|
||||||
|
( # Optional group: there may not be a value.
|
||||||
|
\s*=\s* # Equal Sign
|
||||||
|
( # Start of potential value
|
||||||
|
(?P<val> # Start of group 'val'
|
||||||
|
"(?:[^\\"]|\\.)*" # Any doublequoted string
|
||||||
|
| # or
|
||||||
|
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
|
||||||
|
| # or
|
||||||
|
[""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
|
||||||
|
) # End of group 'val'
|
||||||
|
| # or
|
||||||
|
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
|
||||||
|
) # End of potential value
|
||||||
|
)? # End of optional value group
|
||||||
|
\s* # Any number of spaces.
|
||||||
|
(\s+|;|$) # Ending either at space, semicolon, or EOS.
|
||||||
|
""", re.ASCII | re.VERBOSE)
|
||||||
|
|
||||||
|
def load(self, data):
|
||||||
|
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
|
||||||
|
if not isinstance(data, str):
|
||||||
|
return super().load(data)
|
||||||
|
|
||||||
|
morsel = None
|
||||||
|
index = 0
|
||||||
|
length = len(data)
|
||||||
|
|
||||||
|
while 0 <= index < length:
|
||||||
|
match = self._COOKIE_PATTERN.search(data, index)
|
||||||
|
if not match:
|
||||||
|
break
|
||||||
|
|
||||||
|
index = match.end(0)
|
||||||
|
if match.group("bad"):
|
||||||
|
morsel = None
|
||||||
|
continue
|
||||||
|
|
||||||
|
key, value = match.group("key", "val")
|
||||||
|
|
||||||
|
if key[0] == "$":
|
||||||
|
if morsel is not None:
|
||||||
|
morsel[key[1:]] = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
lower_key = key.lower()
|
||||||
|
if lower_key in self._RESERVED:
|
||||||
|
if morsel is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if value is None:
|
||||||
|
if lower_key not in self._FLAGS:
|
||||||
|
morsel = None
|
||||||
|
continue
|
||||||
|
value = True
|
||||||
|
else:
|
||||||
|
value, _ = self.value_decode(value)
|
||||||
|
|
||||||
|
morsel[key] = value
|
||||||
|
|
||||||
|
elif value is not None:
|
||||||
|
morsel = self.get(key, http.cookies.Morsel())
|
||||||
|
real_value, coded_value = self.value_decode(value)
|
||||||
|
morsel.set(key, real_value, coded_value)
|
||||||
|
self[key] = morsel
|
||||||
|
|
||||||
|
else:
|
||||||
|
morsel = None
|
||||||
|
|
|
@ -22,6 +22,7 @@ import xml.etree.ElementTree
|
||||||
|
|
||||||
from ..compat import functools # isort: split
|
from ..compat import functools # isort: split
|
||||||
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
|
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
|
||||||
|
from ..cookies import LenientSimpleCookie
|
||||||
from ..downloader import FileDownloader
|
from ..downloader import FileDownloader
|
||||||
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -3632,7 +3633,7 @@ class InfoExtractor:
|
||||||
|
|
||||||
def _get_cookies(self, url):
|
def _get_cookies(self, url):
|
||||||
""" Return a http.cookies.SimpleCookie with the cookies for the url """
|
""" Return a http.cookies.SimpleCookie with the cookies for the url """
|
||||||
return http.cookies.SimpleCookie(self._downloader._calc_cookies(url))
|
return LenientSimpleCookie(self._downloader._calc_cookies(url))
|
||||||
|
|
||||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue