mirror of https://github.com/yt-dlp/yt-dlp.git
[networking] Rewrite architecture (#2861)
New networking interface consists of a `RequestDirector` that directs each `Request` to appropriate `RequestHandler` and returns the `Response` or raises `RequestError`. The handlers define adapters to transform its internal Request/Response/Errors to our interfaces. User-facing changes: - Fix issues with per request proxies on redirects for urllib - Support for `ALL_PROXY` environment variable for proxy setting - Support for `socks5h` proxy - Closes https://github.com/yt-dlp/yt-dlp/issues/6325, https://github.com/ytdl-org/youtube-dl/issues/22618, https://github.com/ytdl-org/youtube-dl/pull/28093 - Raise error when using `https` proxy instead of silently converting it to `http` Authored by: coletdjnz
This commit is contained in:
parent
c365dba843
commit
227bf1a33b
|
@ -10,10 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
import hashlib
|
import hashlib
|
||||||
import http.client
|
|
||||||
import json
|
import json
|
||||||
import socket
|
|
||||||
import urllib.error
|
|
||||||
|
|
||||||
from test.helper import (
|
from test.helper import (
|
||||||
assertGreaterEqual,
|
assertGreaterEqual,
|
||||||
|
@ -29,6 +26,7 @@ from test.helper import (
|
||||||
|
|
||||||
import yt_dlp.YoutubeDL # isort: split
|
import yt_dlp.YoutubeDL # isort: split
|
||||||
from yt_dlp.extractor import get_info_extractor
|
from yt_dlp.extractor import get_info_extractor
|
||||||
|
from yt_dlp.networking.exceptions import HTTPError, TransportError
|
||||||
from yt_dlp.utils import (
|
from yt_dlp.utils import (
|
||||||
DownloadError,
|
DownloadError,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
@ -162,8 +160,7 @@ def generator(test_case, tname):
|
||||||
force_generic_extractor=params.get('force_generic_extractor', False))
|
force_generic_extractor=params.get('force_generic_extractor', False))
|
||||||
except (DownloadError, ExtractorError) as err:
|
except (DownloadError, ExtractorError) as err:
|
||||||
# Check if the exception is not a network related one
|
# Check if the exception is not a network related one
|
||||||
if (err.exc_info[0] not in (urllib.error.URLError, socket.timeout, UnavailableVideoError, http.client.BadStatusLine)
|
if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].code == 503):
|
||||||
or (err.exc_info[0] == urllib.error.HTTPError and err.exc_info[1].code == 503)):
|
|
||||||
err.msg = f'{getattr(err, "msg", err)} ({tname})'
|
err.msg = f'{getattr(err, "msg", err)} ({tname})'
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
@ -249,7 +246,7 @@ def generator(test_case, tname):
|
||||||
# extractor returns full results even with extract_flat
|
# extractor returns full results even with extract_flat
|
||||||
res_tcs = [{'info_dict': e} for e in res_dict['entries']]
|
res_tcs = [{'info_dict': e} for e in res_dict['entries']]
|
||||||
try_rm_tcs_files(res_tcs)
|
try_rm_tcs_files(res_tcs)
|
||||||
|
ydl.close()
|
||||||
return test_template
|
return test_template
|
||||||
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,239 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
import io
|
||||||
|
import platform
|
||||||
|
import random
|
||||||
|
import ssl
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||||
|
from yt_dlp.dependencies import certifi
|
||||||
|
from yt_dlp.networking import Response
|
||||||
|
from yt_dlp.networking._helper import (
|
||||||
|
InstanceStoreMixin,
|
||||||
|
add_accept_encoding_header,
|
||||||
|
get_redirect_method,
|
||||||
|
make_socks_proxy_opts,
|
||||||
|
select_proxy,
|
||||||
|
ssl_load_certs,
|
||||||
|
)
|
||||||
|
from yt_dlp.networking.exceptions import (
|
||||||
|
HTTPError,
|
||||||
|
IncompleteRead,
|
||||||
|
_CompatHTTPError,
|
||||||
|
)
|
||||||
|
from yt_dlp.socks import ProxyType
|
||||||
|
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||||
|
|
||||||
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
|
class TestNetworkingUtils:
|
||||||
|
|
||||||
|
def test_select_proxy(self):
|
||||||
|
proxies = {
|
||||||
|
'all': 'socks5://example.com',
|
||||||
|
'http': 'http://example.com:1080',
|
||||||
|
'no': 'bypass.example.com,yt-dl.org'
|
||||||
|
}
|
||||||
|
|
||||||
|
assert select_proxy('https://example.com', proxies) == proxies['all']
|
||||||
|
assert select_proxy('http://example.com', proxies) == proxies['http']
|
||||||
|
assert select_proxy('http://bypass.example.com', proxies) is None
|
||||||
|
assert select_proxy('https://yt-dl.org', proxies) is None
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('socks_proxy,expected', [
|
||||||
|
('socks5h://example.com', {
|
||||||
|
'proxytype': ProxyType.SOCKS5,
|
||||||
|
'addr': 'example.com',
|
||||||
|
'port': 1080,
|
||||||
|
'rdns': True,
|
||||||
|
'username': None,
|
||||||
|
'password': None
|
||||||
|
}),
|
||||||
|
('socks5://user:@example.com:5555', {
|
||||||
|
'proxytype': ProxyType.SOCKS5,
|
||||||
|
'addr': 'example.com',
|
||||||
|
'port': 5555,
|
||||||
|
'rdns': False,
|
||||||
|
'username': 'user',
|
||||||
|
'password': ''
|
||||||
|
}),
|
||||||
|
('socks4://u%40ser:pa%20ss@127.0.0.1:1080', {
|
||||||
|
'proxytype': ProxyType.SOCKS4,
|
||||||
|
'addr': '127.0.0.1',
|
||||||
|
'port': 1080,
|
||||||
|
'rdns': False,
|
||||||
|
'username': 'u@ser',
|
||||||
|
'password': 'pa ss'
|
||||||
|
}),
|
||||||
|
('socks4a://:pa%20ss@127.0.0.1', {
|
||||||
|
'proxytype': ProxyType.SOCKS4A,
|
||||||
|
'addr': '127.0.0.1',
|
||||||
|
'port': 1080,
|
||||||
|
'rdns': True,
|
||||||
|
'username': '',
|
||||||
|
'password': 'pa ss'
|
||||||
|
})
|
||||||
|
])
|
||||||
|
def test_make_socks_proxy_opts(self, socks_proxy, expected):
|
||||||
|
assert make_socks_proxy_opts(socks_proxy) == expected
|
||||||
|
|
||||||
|
def test_make_socks_proxy_unknown(self):
|
||||||
|
with pytest.raises(ValueError, match='Unknown SOCKS proxy version: socks'):
|
||||||
|
make_socks_proxy_opts('socks://127.0.0.1')
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not certifi, reason='certifi is not installed')
|
||||||
|
def test_load_certifi(self):
|
||||||
|
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||||
|
context2 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||||
|
ssl_load_certs(context, use_certifi=True)
|
||||||
|
context2.load_verify_locations(cafile=certifi.where())
|
||||||
|
assert context.get_ca_certs() == context2.get_ca_certs()
|
||||||
|
|
||||||
|
# Test load normal certs
|
||||||
|
# XXX: could there be a case where system certs are the same as certifi?
|
||||||
|
context3 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||||
|
ssl_load_certs(context3, use_certifi=False)
|
||||||
|
assert context3.get_ca_certs() != context.get_ca_certs()
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('method,status,expected', [
|
||||||
|
('GET', 303, 'GET'),
|
||||||
|
('HEAD', 303, 'HEAD'),
|
||||||
|
('PUT', 303, 'GET'),
|
||||||
|
('POST', 301, 'GET'),
|
||||||
|
('HEAD', 301, 'HEAD'),
|
||||||
|
('POST', 302, 'GET'),
|
||||||
|
('HEAD', 302, 'HEAD'),
|
||||||
|
('PUT', 302, 'PUT'),
|
||||||
|
('POST', 308, 'POST'),
|
||||||
|
('POST', 307, 'POST'),
|
||||||
|
('HEAD', 308, 'HEAD'),
|
||||||
|
('HEAD', 307, 'HEAD'),
|
||||||
|
])
|
||||||
|
def test_get_redirect_method(self, method, status, expected):
|
||||||
|
assert get_redirect_method(method, status) == expected
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('headers,supported_encodings,expected', [
|
||||||
|
({'Accept-Encoding': 'br'}, ['gzip', 'br'], {'Accept-Encoding': 'br'}),
|
||||||
|
({}, ['gzip', 'br'], {'Accept-Encoding': 'gzip, br'}),
|
||||||
|
({'Content-type': 'application/json'}, [], {'Content-type': 'application/json', 'Accept-Encoding': 'identity'}),
|
||||||
|
])
|
||||||
|
def test_add_accept_encoding_header(self, headers, supported_encodings, expected):
|
||||||
|
headers = HTTPHeaderDict(headers)
|
||||||
|
add_accept_encoding_header(headers, supported_encodings)
|
||||||
|
assert headers == HTTPHeaderDict(expected)
|
||||||
|
|
||||||
|
|
||||||
|
class TestInstanceStoreMixin:
|
||||||
|
|
||||||
|
class FakeInstanceStoreMixin(InstanceStoreMixin):
|
||||||
|
def _create_instance(self, **kwargs):
|
||||||
|
return random.randint(0, 1000000)
|
||||||
|
|
||||||
|
def _close_instance(self, instance):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_mixin(self):
|
||||||
|
mixin = self.FakeInstanceStoreMixin()
|
||||||
|
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) == mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
|
||||||
|
|
||||||
|
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'e', 4}}) != mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
|
||||||
|
|
||||||
|
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}} != mixin._get_instance(d={'a': 1, 'b': 2, 'g': {'d', 4}}))
|
||||||
|
|
||||||
|
assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) == mixin._get_instance(d={'a': 1}, e=[1, 2, 3])
|
||||||
|
|
||||||
|
assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) != mixin._get_instance(d={'a': 1}, e=[1, 2, 3, 4])
|
||||||
|
|
||||||
|
cookiejar = YoutubeDLCookieJar()
|
||||||
|
assert mixin._get_instance(b=[1, 2], c=cookiejar) == mixin._get_instance(b=[1, 2], c=cookiejar)
|
||||||
|
|
||||||
|
assert mixin._get_instance(b=[1, 2], c=cookiejar) != mixin._get_instance(b=[1, 2], c=YoutubeDLCookieJar())
|
||||||
|
|
||||||
|
# Different order
|
||||||
|
assert mixin._get_instance(c=cookiejar, b=[1, 2]) == mixin._get_instance(b=[1, 2], c=cookiejar)
|
||||||
|
|
||||||
|
m = mixin._get_instance(t=1234)
|
||||||
|
assert mixin._get_instance(t=1234) == m
|
||||||
|
mixin._clear_instances()
|
||||||
|
assert mixin._get_instance(t=1234) != m
|
||||||
|
|
||||||
|
|
||||||
|
class TestNetworkingExceptions:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_response(status):
|
||||||
|
return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
|
||||||
|
def test_http_error(self, http_error_class):
|
||||||
|
|
||||||
|
response = self.create_response(403)
|
||||||
|
error = http_error_class(response)
|
||||||
|
|
||||||
|
assert error.status == 403
|
||||||
|
assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
|
||||||
|
assert error.reason == response.reason
|
||||||
|
assert error.response is response
|
||||||
|
|
||||||
|
data = error.response.read()
|
||||||
|
assert data == b'test'
|
||||||
|
assert repr(error) == '<HTTPError 403: Forbidden>'
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
|
||||||
|
def test_redirect_http_error(self, http_error_class):
|
||||||
|
response = self.create_response(301)
|
||||||
|
error = http_error_class(response, redirect_loop=True)
|
||||||
|
assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
|
||||||
|
assert error.reason == 'Moved Permanently'
|
||||||
|
|
||||||
|
def test_compat_http_error(self):
|
||||||
|
response = self.create_response(403)
|
||||||
|
error = _CompatHTTPError(HTTPError(response))
|
||||||
|
assert isinstance(error, HTTPError)
|
||||||
|
assert isinstance(error, urllib.error.HTTPError)
|
||||||
|
|
||||||
|
assert error.code == 403
|
||||||
|
assert error.getcode() == 403
|
||||||
|
assert error.hdrs is error.response.headers
|
||||||
|
assert error.info() is error.response.headers
|
||||||
|
assert error.headers is error.response.headers
|
||||||
|
assert error.filename == error.response.url
|
||||||
|
assert error.url == error.response.url
|
||||||
|
assert error.geturl() == error.response.url
|
||||||
|
|
||||||
|
# Passthrough file operations
|
||||||
|
assert error.read() == b'test'
|
||||||
|
assert not error.closed
|
||||||
|
# Technically Response operations are also passed through, which should not be used.
|
||||||
|
assert error.get_header('test') == 'test'
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
|
||||||
|
def test_compat_http_error_autoclose(self):
|
||||||
|
# Compat HTTPError should not autoclose response
|
||||||
|
response = self.create_response(403)
|
||||||
|
_CompatHTTPError(HTTPError(response))
|
||||||
|
assert not response.closed
|
||||||
|
|
||||||
|
def test_incomplete_read_error(self):
|
||||||
|
error = IncompleteRead(b'test', 3, cause='test')
|
||||||
|
assert isinstance(error, IncompleteRead)
|
||||||
|
assert repr(error) == '<IncompleteRead: 4 bytes read, 3 more expected>'
|
||||||
|
assert str(error) == error.msg == '4 bytes read, 3 more expected'
|
||||||
|
assert error.partial == b'test'
|
||||||
|
assert error.expected == 3
|
||||||
|
assert error.cause == 'test'
|
||||||
|
|
||||||
|
error = IncompleteRead(b'aaa')
|
||||||
|
assert repr(error) == '<IncompleteRead: 3 bytes read>'
|
||||||
|
assert str(error) == '3 bytes read'
|
|
@ -51,6 +51,7 @@ from yt_dlp.utils import (
|
||||||
escape_url,
|
escape_url,
|
||||||
expand_path,
|
expand_path,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
|
extract_basic_auth,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
@ -103,7 +104,6 @@ from yt_dlp.utils import (
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
sanitize_path,
|
sanitize_path,
|
||||||
sanitize_url,
|
sanitize_url,
|
||||||
sanitized_Request,
|
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
@ -132,6 +132,7 @@ from yt_dlp.utils import (
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||||
|
|
||||||
|
|
||||||
class TestUtil(unittest.TestCase):
|
class TestUtil(unittest.TestCase):
|
||||||
|
@ -2315,14 +2316,43 @@ Line 1
|
||||||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
|
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
|
||||||
msg='function on a `re.Match` should give group name as well')
|
msg='function on a `re.Match` should give group name as well')
|
||||||
|
|
||||||
|
def test_http_header_dict(self):
|
||||||
|
headers = HTTPHeaderDict()
|
||||||
|
headers['ytdl-test'] = 1
|
||||||
|
self.assertEqual(list(headers.items()), [('Ytdl-Test', '1')])
|
||||||
|
headers['Ytdl-test'] = '2'
|
||||||
|
self.assertEqual(list(headers.items()), [('Ytdl-Test', '2')])
|
||||||
|
self.assertTrue('ytDl-Test' in headers)
|
||||||
|
self.assertEqual(str(headers), str(dict(headers)))
|
||||||
|
self.assertEqual(repr(headers), str(dict(headers)))
|
||||||
|
|
||||||
|
headers.update({'X-dlp': 'data'})
|
||||||
|
self.assertEqual(set(headers.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data')})
|
||||||
|
self.assertEqual(dict(headers), {'Ytdl-Test': '2', 'X-Dlp': 'data'})
|
||||||
|
self.assertEqual(len(headers), 2)
|
||||||
|
self.assertEqual(headers.copy(), headers)
|
||||||
|
headers2 = HTTPHeaderDict({'X-dlp': 'data3'}, **headers, **{'X-dlp': 'data2'})
|
||||||
|
self.assertEqual(set(headers2.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data2')})
|
||||||
|
self.assertEqual(len(headers2), 2)
|
||||||
|
headers2.clear()
|
||||||
|
self.assertEqual(len(headers2), 0)
|
||||||
|
|
||||||
|
# ensure we prefer latter headers
|
||||||
|
headers3 = HTTPHeaderDict({'Ytdl-TeSt': 1}, {'Ytdl-test': 2})
|
||||||
|
self.assertEqual(set(headers3.items()), {('Ytdl-Test', '2')})
|
||||||
|
del headers3['ytdl-tesT']
|
||||||
|
self.assertEqual(dict(headers3), {})
|
||||||
|
|
||||||
|
headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
|
||||||
|
self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})
|
||||||
|
|
||||||
def test_extract_basic_auth(self):
|
def test_extract_basic_auth(self):
|
||||||
auth_header = lambda url: sanitized_Request(url).get_header('Authorization')
|
assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
|
||||||
self.assertFalse(auth_header('http://foo.bar'))
|
assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)
|
||||||
self.assertFalse(auth_header('http://:foo.bar'))
|
assert extract_basic_auth('http://@foo.bar') == ('http://foo.bar', 'Basic Og==')
|
||||||
self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==')
|
assert extract_basic_auth('http://:pass@foo.bar') == ('http://foo.bar', 'Basic OnBhc3M=')
|
||||||
self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=')
|
assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
|
||||||
self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=')
|
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
|
||||||
self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -4,7 +4,6 @@ import copy
|
||||||
import datetime
|
import datetime
|
||||||
import errno
|
import errno
|
||||||
import fileinput
|
import fileinput
|
||||||
import functools
|
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
import io
|
import io
|
||||||
import itertools
|
import itertools
|
||||||
|
@ -25,8 +24,8 @@ import traceback
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .compat import urllib # isort: split
|
from .compat import functools, urllib # isort: split
|
||||||
from .compat import compat_os_name, compat_shlex_quote
|
from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
|
||||||
from .cookies import LenientSimpleCookie, load_cookies
|
from .cookies import LenientSimpleCookie, load_cookies
|
||||||
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
||||||
from .downloader.rtmp import rtmpdump_version
|
from .downloader.rtmp import rtmpdump_version
|
||||||
|
@ -34,6 +33,15 @@ from .extractor import gen_extractor_classes, get_info_extractor
|
||||||
from .extractor.common import UnsupportedURLIE
|
from .extractor.common import UnsupportedURLIE
|
||||||
from .extractor.openload import PhantomJSwrapper
|
from .extractor.openload import PhantomJSwrapper
|
||||||
from .minicurses import format_text
|
from .minicurses import format_text
|
||||||
|
from .networking import Request, RequestDirector
|
||||||
|
from .networking.common import _REQUEST_HANDLERS
|
||||||
|
from .networking.exceptions import (
|
||||||
|
HTTPError,
|
||||||
|
NoSupportingHandlers,
|
||||||
|
RequestError,
|
||||||
|
SSLError,
|
||||||
|
_CompatHTTPError,
|
||||||
|
)
|
||||||
from .plugins import directories as plugin_directories
|
from .plugins import directories as plugin_directories
|
||||||
from .postprocessor import _PLUGIN_CLASSES as plugin_pps
|
from .postprocessor import _PLUGIN_CLASSES as plugin_pps
|
||||||
from .postprocessor import (
|
from .postprocessor import (
|
||||||
|
@ -78,7 +86,6 @@ from .utils import (
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
Namespace,
|
Namespace,
|
||||||
PagedList,
|
PagedList,
|
||||||
PerRequestProxyHandler,
|
|
||||||
PlaylistEntries,
|
PlaylistEntries,
|
||||||
Popen,
|
Popen,
|
||||||
PostProcessingError,
|
PostProcessingError,
|
||||||
|
@ -87,9 +94,6 @@ from .utils import (
|
||||||
SameFileError,
|
SameFileError,
|
||||||
UnavailableVideoError,
|
UnavailableVideoError,
|
||||||
UserNotLive,
|
UserNotLive,
|
||||||
YoutubeDLCookieProcessor,
|
|
||||||
YoutubeDLHandler,
|
|
||||||
YoutubeDLRedirectHandler,
|
|
||||||
age_restricted,
|
age_restricted,
|
||||||
args_to_str,
|
args_to_str,
|
||||||
bug_reports_message,
|
bug_reports_message,
|
||||||
|
@ -102,6 +106,7 @@ from .utils import (
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
escapeHTML,
|
escapeHTML,
|
||||||
expand_path,
|
expand_path,
|
||||||
|
extract_basic_auth,
|
||||||
filter_dict,
|
filter_dict,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
|
@ -117,8 +122,6 @@ from .utils import (
|
||||||
locked_file,
|
locked_file,
|
||||||
make_archive_id,
|
make_archive_id,
|
||||||
make_dir,
|
make_dir,
|
||||||
make_HTTPS_handler,
|
|
||||||
merge_headers,
|
|
||||||
network_exceptions,
|
network_exceptions,
|
||||||
number_of_digits,
|
number_of_digits,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
@ -132,7 +135,6 @@ from .utils import (
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
sanitize_path,
|
sanitize_path,
|
||||||
sanitize_url,
|
sanitize_url,
|
||||||
sanitized_Request,
|
|
||||||
std_headers,
|
std_headers,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
strftime_or_none,
|
strftime_or_none,
|
||||||
|
@ -151,7 +153,12 @@ from .utils import (
|
||||||
write_json_file,
|
write_json_file,
|
||||||
write_string,
|
write_string,
|
||||||
)
|
)
|
||||||
from .utils.networking import clean_headers
|
from .utils._utils import _YDLLogger
|
||||||
|
from .utils.networking import (
|
||||||
|
HTTPHeaderDict,
|
||||||
|
clean_headers,
|
||||||
|
clean_proxies,
|
||||||
|
)
|
||||||
from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
|
from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
|
||||||
|
|
||||||
if compat_os_name == 'nt':
|
if compat_os_name == 'nt':
|
||||||
|
@ -673,7 +680,9 @@ class YoutubeDL:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
|
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
|
||||||
self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
|
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
|
||||||
|
self._request_director = self.build_request_director(
|
||||||
|
sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
|
||||||
if auto_init and auto_init != 'no_verbose_header':
|
if auto_init and auto_init != 'no_verbose_header':
|
||||||
self.print_debug_header()
|
self.print_debug_header()
|
||||||
|
|
||||||
|
@ -763,8 +772,6 @@ class YoutubeDL:
|
||||||
get_postprocessor(pp_def.pop('key'))(self, **pp_def),
|
get_postprocessor(pp_def.pop('key'))(self, **pp_def),
|
||||||
when=when)
|
when=when)
|
||||||
|
|
||||||
self._setup_opener()
|
|
||||||
|
|
||||||
def preload_download_archive(fn):
|
def preload_download_archive(fn):
|
||||||
"""Preload the archive, if any is specified"""
|
"""Preload the archive, if any is specified"""
|
||||||
archive = set()
|
archive = set()
|
||||||
|
@ -946,7 +953,11 @@ class YoutubeDL:
|
||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
self.restore_console_title()
|
self.restore_console_title()
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
def close(self):
|
||||||
self.save_cookies()
|
self.save_cookies()
|
||||||
|
self._request_director.close()
|
||||||
|
|
||||||
def trouble(self, message=None, tb=None, is_error=True):
|
def trouble(self, message=None, tb=None, is_error=True):
|
||||||
"""Determine action to take when a download problem appears.
|
"""Determine action to take when a download problem appears.
|
||||||
|
@ -2468,7 +2479,7 @@ class YoutubeDL:
|
||||||
return _build_selector_function(parsed_selector)
|
return _build_selector_function(parsed_selector)
|
||||||
|
|
||||||
def _calc_headers(self, info_dict):
|
def _calc_headers(self, info_dict):
|
||||||
res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
|
res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
|
||||||
clean_headers(res)
|
clean_headers(res)
|
||||||
cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
|
cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
|
||||||
if cookies:
|
if cookies:
|
||||||
|
@ -3943,13 +3954,8 @@ class YoutubeDL:
|
||||||
join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
|
join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
|
||||||
})) or 'none'))
|
})) or 'none'))
|
||||||
|
|
||||||
self._setup_opener()
|
write_debug(f'Proxy map: {self.proxies}')
|
||||||
proxy_map = {}
|
# write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
|
||||||
for handler in self._opener.handlers:
|
|
||||||
if hasattr(handler, 'proxies'):
|
|
||||||
proxy_map.update(handler.proxies)
|
|
||||||
write_debug(f'Proxy map: {proxy_map}')
|
|
||||||
|
|
||||||
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
||||||
display_list = ['%s%s' % (
|
display_list = ['%s%s' % (
|
||||||
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
||||||
|
@ -3977,53 +3983,21 @@ class YoutubeDL:
|
||||||
'See https://yt-dl.org/update if you need help updating.' %
|
'See https://yt-dl.org/update if you need help updating.' %
|
||||||
latest_version)
|
latest_version)
|
||||||
|
|
||||||
def _setup_opener(self):
|
@functools.cached_property
|
||||||
if hasattr(self, '_opener'):
|
def proxies(self):
|
||||||
return
|
"""Global proxy configuration"""
|
||||||
timeout_val = self.params.get('socket_timeout')
|
|
||||||
self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
|
|
||||||
opts_proxy = self.params.get('proxy')
|
opts_proxy = self.params.get('proxy')
|
||||||
|
|
||||||
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
|
|
||||||
if opts_proxy is not None:
|
if opts_proxy is not None:
|
||||||
if opts_proxy == '':
|
if opts_proxy == '':
|
||||||
proxies = {}
|
opts_proxy = '__noproxy__'
|
||||||
else:
|
proxies = {'all': opts_proxy}
|
||||||
proxies = {'http': opts_proxy, 'https': opts_proxy}
|
|
||||||
else:
|
else:
|
||||||
proxies = urllib.request.getproxies()
|
proxies = urllib.request.getproxies()
|
||||||
# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
|
# compat. Set HTTPS_PROXY to __noproxy__ to revert
|
||||||
if 'http' in proxies and 'https' not in proxies:
|
if 'http' in proxies and 'https' not in proxies:
|
||||||
proxies['https'] = proxies['http']
|
proxies['https'] = proxies['http']
|
||||||
proxy_handler = PerRequestProxyHandler(proxies)
|
|
||||||
|
|
||||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
return proxies
|
||||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
|
||||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
|
||||||
redirect_handler = YoutubeDLRedirectHandler()
|
|
||||||
data_handler = urllib.request.DataHandler()
|
|
||||||
|
|
||||||
# When passing our own FileHandler instance, build_opener won't add the
|
|
||||||
# default FileHandler and allows us to disable the file protocol, which
|
|
||||||
# can be used for malicious purposes (see
|
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/8227)
|
|
||||||
file_handler = urllib.request.FileHandler()
|
|
||||||
|
|
||||||
if not self.params.get('enable_file_urls'):
|
|
||||||
def file_open(*args, **kwargs):
|
|
||||||
raise urllib.error.URLError(
|
|
||||||
'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
|
|
||||||
'Use --enable-file-urls to enable at your own risk.')
|
|
||||||
file_handler.file_open = file_open
|
|
||||||
|
|
||||||
opener = urllib.request.build_opener(
|
|
||||||
proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
|
|
||||||
|
|
||||||
# Delete the default user-agent header, which would otherwise apply in
|
|
||||||
# cases where our custom HTTP handler doesn't come into play
|
|
||||||
# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
|
|
||||||
opener.addheaders = []
|
|
||||||
self._opener = opener
|
|
||||||
|
|
||||||
@functools.cached_property
|
@functools.cached_property
|
||||||
def cookiejar(self):
|
def cookiejar(self):
|
||||||
|
@ -4031,11 +4005,84 @@ class YoutubeDL:
|
||||||
return load_cookies(
|
return load_cookies(
|
||||||
self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
|
self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _opener(self):
|
||||||
|
"""
|
||||||
|
Get a urllib OpenerDirector from the Urllib handler (deprecated).
|
||||||
|
"""
|
||||||
|
self.deprecation_warning('YoutubeDL._opener() is deprecated, use YoutubeDL.urlopen()')
|
||||||
|
handler = self._request_director.handlers['Urllib']
|
||||||
|
return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
|
||||||
|
|
||||||
def urlopen(self, req):
|
def urlopen(self, req):
|
||||||
""" Start an HTTP download """
|
""" Start an HTTP download """
|
||||||
if isinstance(req, str):
|
if isinstance(req, str):
|
||||||
req = sanitized_Request(req)
|
req = Request(req)
|
||||||
return self._opener.open(req, timeout=self._socket_timeout)
|
elif isinstance(req, urllib.request.Request):
|
||||||
|
req = urllib_req_to_req(req)
|
||||||
|
assert isinstance(req, Request)
|
||||||
|
|
||||||
|
# compat: Assume user:pass url params are basic auth
|
||||||
|
url, basic_auth_header = extract_basic_auth(req.url)
|
||||||
|
if basic_auth_header:
|
||||||
|
req.headers['Authorization'] = basic_auth_header
|
||||||
|
req.url = sanitize_url(url)
|
||||||
|
|
||||||
|
clean_proxies(proxies=req.proxies, headers=req.headers)
|
||||||
|
clean_headers(req.headers)
|
||||||
|
|
||||||
|
try:
|
||||||
|
return self._request_director.send(req)
|
||||||
|
except NoSupportingHandlers as e:
|
||||||
|
for ue in e.unsupported_errors:
|
||||||
|
if not (ue.handler and ue.msg):
|
||||||
|
continue
|
||||||
|
if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
|
||||||
|
raise RequestError(
|
||||||
|
'file:// URLs are disabled by default in yt-dlp for security reasons. '
|
||||||
|
'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
|
||||||
|
raise
|
||||||
|
except SSLError as e:
|
||||||
|
if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
|
||||||
|
raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
|
||||||
|
elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
|
||||||
|
raise RequestError(
|
||||||
|
'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
|
||||||
|
'Try using --legacy-server-connect', cause=e) from e
|
||||||
|
raise
|
||||||
|
except HTTPError as e: # TODO: Remove in a future release
|
||||||
|
raise _CompatHTTPError(e) from e
|
||||||
|
|
||||||
|
def build_request_director(self, handlers):
|
||||||
|
logger = _YDLLogger(self)
|
||||||
|
headers = self.params.get('http_headers').copy()
|
||||||
|
proxies = self.proxies.copy()
|
||||||
|
clean_headers(headers)
|
||||||
|
clean_proxies(proxies, headers)
|
||||||
|
|
||||||
|
director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
|
||||||
|
for handler in handlers:
|
||||||
|
director.add_handler(handler(
|
||||||
|
logger=logger,
|
||||||
|
headers=headers,
|
||||||
|
cookiejar=self.cookiejar,
|
||||||
|
proxies=proxies,
|
||||||
|
prefer_system_certs='no-certifi' in self.params['compat_opts'],
|
||||||
|
verify=not self.params.get('nocheckcertificate'),
|
||||||
|
**traverse_obj(self.params, {
|
||||||
|
'verbose': 'debug_printtraffic',
|
||||||
|
'source_address': 'source_address',
|
||||||
|
'timeout': 'socket_timeout',
|
||||||
|
'legacy_ssl_support': 'legacy_server_connect',
|
||||||
|
'enable_file_urls': 'enable_file_urls',
|
||||||
|
'client_cert': {
|
||||||
|
'client_certificate': 'client_certificate',
|
||||||
|
'client_certificate_key': 'client_certificate_key',
|
||||||
|
'client_certificate_password': 'client_certificate_password',
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
))
|
||||||
|
return director
|
||||||
|
|
||||||
def encode(self, s):
|
def encode(self, s):
|
||||||
if isinstance(s, bytes):
|
if isinstance(s, bytes):
|
||||||
|
@ -4188,7 +4235,7 @@ class YoutubeDL:
|
||||||
else:
|
else:
|
||||||
self.to_screen(f'[info] Downloading {thumb_display_id} ...')
|
self.to_screen(f'[info] Downloading {thumb_display_id} ...')
|
||||||
try:
|
try:
|
||||||
uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
|
uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
|
||||||
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
|
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
|
||||||
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
|
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
|
||||||
shutil.copyfileobj(uf, thumbf)
|
shutil.copyfileobj(uf, thumbf)
|
||||||
|
|
|
@ -70,3 +70,13 @@ if compat_os_name in ('nt', 'ce'):
|
||||||
return userhome + path[i:]
|
return userhome + path[i:]
|
||||||
else:
|
else:
|
||||||
compat_expanduser = os.path.expanduser
|
compat_expanduser = os.path.expanduser
|
||||||
|
|
||||||
|
|
||||||
|
def urllib_req_to_req(urllib_request):
|
||||||
|
"""Convert urllib Request to a networking Request"""
|
||||||
|
from ..networking import Request
|
||||||
|
from ..utils.networking import HTTPHeaderDict
|
||||||
|
return Request(
|
||||||
|
urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(),
|
||||||
|
headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs),
|
||||||
|
extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None)
|
||||||
|
|
|
@ -1,12 +1,10 @@
|
||||||
import http.client
|
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import socket
|
|
||||||
import ssl
|
|
||||||
import time
|
import time
|
||||||
import urllib.error
|
import urllib.error
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
|
from ..networking.exceptions import CertificateVerifyError, TransportError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ContentTooShortError,
|
ContentTooShortError,
|
||||||
RetryManager,
|
RetryManager,
|
||||||
|
@ -21,14 +19,6 @@ from ..utils import (
|
||||||
write_xattr,
|
write_xattr,
|
||||||
)
|
)
|
||||||
|
|
||||||
RESPONSE_READ_EXCEPTIONS = (
|
|
||||||
TimeoutError,
|
|
||||||
socket.timeout, # compat: py < 3.10
|
|
||||||
ConnectionError,
|
|
||||||
ssl.SSLError,
|
|
||||||
http.client.HTTPException
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class HttpFD(FileDownloader):
|
class HttpFD(FileDownloader):
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
|
@ -196,13 +186,9 @@ class HttpFD(FileDownloader):
|
||||||
# Unexpected HTTP error
|
# Unexpected HTTP error
|
||||||
raise
|
raise
|
||||||
raise RetryDownload(err)
|
raise RetryDownload(err)
|
||||||
except urllib.error.URLError as err:
|
except CertificateVerifyError:
|
||||||
if isinstance(err.reason, ssl.CertificateError):
|
raise
|
||||||
raise
|
except TransportError as err:
|
||||||
raise RetryDownload(err)
|
|
||||||
# In urllib.request.AbstractHTTPHandler, the response is partially read on request.
|
|
||||||
# Any errors that occur during this will not be wrapped by URLError
|
|
||||||
except RESPONSE_READ_EXCEPTIONS as err:
|
|
||||||
raise RetryDownload(err)
|
raise RetryDownload(err)
|
||||||
|
|
||||||
def close_stream():
|
def close_stream():
|
||||||
|
@ -258,7 +244,7 @@ class HttpFD(FileDownloader):
|
||||||
try:
|
try:
|
||||||
# Download and write
|
# Download and write
|
||||||
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||||
except RESPONSE_READ_EXCEPTIONS as err:
|
except TransportError as err:
|
||||||
retry(err)
|
retry(err)
|
||||||
|
|
||||||
byte_counter += len(data_block)
|
byte_counter += len(data_block)
|
||||||
|
|
|
@ -17,16 +17,22 @@ import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import types
|
import types
|
||||||
import urllib.error
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from ..compat import functools # isort: split
|
from ..compat import functools # isort: split
|
||||||
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
|
from ..compat import (
|
||||||
|
compat_etree_fromstring,
|
||||||
|
compat_expanduser,
|
||||||
|
compat_os_name,
|
||||||
|
urllib_req_to_req,
|
||||||
|
)
|
||||||
from ..cookies import LenientSimpleCookie
|
from ..cookies import LenientSimpleCookie
|
||||||
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
||||||
from ..downloader.hls import HlsFD
|
from ..downloader.hls import HlsFD
|
||||||
|
from ..networking.common import HEADRequest, Request
|
||||||
|
from ..networking.exceptions import network_exceptions
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
IDENTITY,
|
IDENTITY,
|
||||||
JSON_LD_RE,
|
JSON_LD_RE,
|
||||||
|
@ -35,7 +41,6 @@ from ..utils import (
|
||||||
FormatSorter,
|
FormatSorter,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
GeoUtils,
|
GeoUtils,
|
||||||
HEADRequest,
|
|
||||||
LenientJSONDecoder,
|
LenientJSONDecoder,
|
||||||
Popen,
|
Popen,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
|
@ -61,7 +66,6 @@ from ..utils import (
|
||||||
js_to_json,
|
js_to_json,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
netrc_from_content,
|
netrc_from_content,
|
||||||
network_exceptions,
|
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_bitrate,
|
parse_bitrate,
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
|
@ -71,7 +75,6 @@ from ..utils import (
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
sanitize_url,
|
sanitize_url,
|
||||||
sanitized_Request,
|
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
@ -83,8 +86,6 @@ from ..utils import (
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_Request,
|
|
||||||
update_url_query,
|
|
||||||
url_basename,
|
url_basename,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlhandle_detect_ext,
|
urlhandle_detect_ext,
|
||||||
|
@ -797,10 +798,12 @@ class InfoExtractor:
|
||||||
|
|
||||||
def _create_request(self, url_or_request, data=None, headers=None, query=None):
|
def _create_request(self, url_or_request, data=None, headers=None, query=None):
|
||||||
if isinstance(url_or_request, urllib.request.Request):
|
if isinstance(url_or_request, urllib.request.Request):
|
||||||
return update_Request(url_or_request, data=data, headers=headers, query=query)
|
url_or_request = urllib_req_to_req(url_or_request)
|
||||||
if query:
|
elif not isinstance(url_or_request, Request):
|
||||||
url_or_request = update_url_query(url_or_request, query)
|
url_or_request = Request(url_or_request)
|
||||||
return sanitized_Request(url_or_request, data, headers or {})
|
|
||||||
|
url_or_request.update(data=data, headers=headers, query=query)
|
||||||
|
return url_or_request
|
||||||
|
|
||||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
|
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
|
||||||
"""
|
"""
|
||||||
|
@ -838,12 +841,7 @@ class InfoExtractor:
|
||||||
except network_exceptions as err:
|
except network_exceptions as err:
|
||||||
if isinstance(err, urllib.error.HTTPError):
|
if isinstance(err, urllib.error.HTTPError):
|
||||||
if self.__can_accept_status_code(err, expected_status):
|
if self.__can_accept_status_code(err, expected_status):
|
||||||
# Retain reference to error to prevent file object from
|
return err.response
|
||||||
# being closed before it can be read. Works around the
|
|
||||||
# effects of <https://bugs.python.org/issue15002>
|
|
||||||
# introduced in Python 3.4.1.
|
|
||||||
err.fp._error = err
|
|
||||||
return err.fp
|
|
||||||
|
|
||||||
if errnote is False:
|
if errnote is False:
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
# flake8: noqa: 401
|
||||||
|
from .common import (
|
||||||
|
HEADRequest,
|
||||||
|
PUTRequest,
|
||||||
|
Request,
|
||||||
|
RequestDirector,
|
||||||
|
RequestHandler,
|
||||||
|
Response,
|
||||||
|
)
|
||||||
|
|
||||||
|
# isort: split
|
||||||
|
# TODO: all request handlers should be safely imported
|
||||||
|
from . import _urllib
|
|
@ -1,13 +1,22 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import functools
|
||||||
import ssl
|
import ssl
|
||||||
import sys
|
import sys
|
||||||
|
import typing
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
from .exceptions import RequestError, UnsupportedRequest
|
||||||
from ..dependencies import certifi
|
from ..dependencies import certifi
|
||||||
from ..socks import ProxyType
|
from ..socks import ProxyType
|
||||||
from ..utils import YoutubeDLError
|
from ..utils import format_field, traverse_obj
|
||||||
|
|
||||||
|
if typing.TYPE_CHECKING:
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
|
from ..utils.networking import HTTPHeaderDict
|
||||||
|
|
||||||
|
|
||||||
def ssl_load_certs(context: ssl.SSLContext, use_certifi=True):
|
def ssl_load_certs(context: ssl.SSLContext, use_certifi=True):
|
||||||
|
@ -23,11 +32,11 @@ def ssl_load_certs(context: ssl.SSLContext, use_certifi=True):
|
||||||
# enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
|
# enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
|
||||||
if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
|
if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
|
||||||
for storename in ('CA', 'ROOT'):
|
for storename in ('CA', 'ROOT'):
|
||||||
_ssl_load_windows_store_certs(context, storename)
|
ssl_load_windows_store_certs(context, storename)
|
||||||
context.set_default_verify_paths()
|
context.set_default_verify_paths()
|
||||||
|
|
||||||
|
|
||||||
def _ssl_load_windows_store_certs(ssl_context, storename):
|
def ssl_load_windows_store_certs(ssl_context, storename):
|
||||||
# Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
|
# Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
|
||||||
try:
|
try:
|
||||||
certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
|
certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
|
||||||
|
@ -44,10 +53,18 @@ def make_socks_proxy_opts(socks_proxy):
|
||||||
url_components = urllib.parse.urlparse(socks_proxy)
|
url_components = urllib.parse.urlparse(socks_proxy)
|
||||||
if url_components.scheme.lower() == 'socks5':
|
if url_components.scheme.lower() == 'socks5':
|
||||||
socks_type = ProxyType.SOCKS5
|
socks_type = ProxyType.SOCKS5
|
||||||
elif url_components.scheme.lower() in ('socks', 'socks4'):
|
rdns = False
|
||||||
|
elif url_components.scheme.lower() == 'socks5h':
|
||||||
|
socks_type = ProxyType.SOCKS5
|
||||||
|
rdns = True
|
||||||
|
elif url_components.scheme.lower() == 'socks4':
|
||||||
socks_type = ProxyType.SOCKS4
|
socks_type = ProxyType.SOCKS4
|
||||||
|
rdns = False
|
||||||
elif url_components.scheme.lower() == 'socks4a':
|
elif url_components.scheme.lower() == 'socks4a':
|
||||||
socks_type = ProxyType.SOCKS4A
|
socks_type = ProxyType.SOCKS4A
|
||||||
|
rdns = True
|
||||||
|
else:
|
||||||
|
raise ValueError(f'Unknown SOCKS proxy version: {url_components.scheme.lower()}')
|
||||||
|
|
||||||
def unquote_if_non_empty(s):
|
def unquote_if_non_empty(s):
|
||||||
if not s:
|
if not s:
|
||||||
|
@ -57,12 +74,25 @@ def make_socks_proxy_opts(socks_proxy):
|
||||||
'proxytype': socks_type,
|
'proxytype': socks_type,
|
||||||
'addr': url_components.hostname,
|
'addr': url_components.hostname,
|
||||||
'port': url_components.port or 1080,
|
'port': url_components.port or 1080,
|
||||||
'rdns': True,
|
'rdns': rdns,
|
||||||
'username': unquote_if_non_empty(url_components.username),
|
'username': unquote_if_non_empty(url_components.username),
|
||||||
'password': unquote_if_non_empty(url_components.password),
|
'password': unquote_if_non_empty(url_components.password),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def select_proxy(url, proxies):
|
||||||
|
"""Unified proxy selector for all backends"""
|
||||||
|
url_components = urllib.parse.urlparse(url)
|
||||||
|
if 'no' in proxies:
|
||||||
|
hostport = url_components.hostname + format_field(url_components.port, None, ':%s')
|
||||||
|
if urllib.request.proxy_bypass_environment(hostport, {'no': proxies['no']}):
|
||||||
|
return
|
||||||
|
elif urllib.request.proxy_bypass(hostport): # check system settings
|
||||||
|
return
|
||||||
|
|
||||||
|
return traverse_obj(proxies, url_components.scheme or 'http', 'all')
|
||||||
|
|
||||||
|
|
||||||
def get_redirect_method(method, status):
|
def get_redirect_method(method, status):
|
||||||
"""Unified redirect method handling"""
|
"""Unified redirect method handling"""
|
||||||
|
|
||||||
|
@ -126,14 +156,53 @@ def make_ssl_context(
|
||||||
client_certificate, keyfile=client_certificate_key,
|
client_certificate, keyfile=client_certificate_key,
|
||||||
password=client_certificate_password)
|
password=client_certificate_password)
|
||||||
except ssl.SSLError:
|
except ssl.SSLError:
|
||||||
raise YoutubeDLError('Unable to load client certificate')
|
raise RequestError('Unable to load client certificate')
|
||||||
|
|
||||||
|
if getattr(context, 'post_handshake_auth', None) is not None:
|
||||||
|
context.post_handshake_auth = True
|
||||||
return context
|
return context
|
||||||
|
|
||||||
|
|
||||||
def add_accept_encoding_header(headers, supported_encodings):
|
class InstanceStoreMixin:
|
||||||
if supported_encodings and 'Accept-Encoding' not in headers:
|
def __init__(self, **kwargs):
|
||||||
headers['Accept-Encoding'] = ', '.join(supported_encodings)
|
self.__instances = []
|
||||||
|
super().__init__(**kwargs) # So that both MRO works
|
||||||
|
|
||||||
elif 'Accept-Encoding' not in headers:
|
@staticmethod
|
||||||
headers['Accept-Encoding'] = 'identity'
|
def _create_instance(**kwargs):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def _get_instance(self, **kwargs):
|
||||||
|
for key, instance in self.__instances:
|
||||||
|
if key == kwargs:
|
||||||
|
return instance
|
||||||
|
|
||||||
|
instance = self._create_instance(**kwargs)
|
||||||
|
self.__instances.append((kwargs, instance))
|
||||||
|
return instance
|
||||||
|
|
||||||
|
def _close_instance(self, instance):
|
||||||
|
if callable(getattr(instance, 'close', None)):
|
||||||
|
instance.close()
|
||||||
|
|
||||||
|
def _clear_instances(self):
|
||||||
|
for _, instance in self.__instances:
|
||||||
|
self._close_instance(instance)
|
||||||
|
self.__instances.clear()
|
||||||
|
|
||||||
|
|
||||||
|
def add_accept_encoding_header(headers: HTTPHeaderDict, supported_encodings: Iterable[str]):
|
||||||
|
if 'Accept-Encoding' not in headers:
|
||||||
|
headers['Accept-Encoding'] = ', '.join(supported_encodings) or 'identity'
|
||||||
|
|
||||||
|
|
||||||
|
def wrap_request_errors(func):
|
||||||
|
@functools.wraps(func)
|
||||||
|
def wrapper(self, *args, **kwargs):
|
||||||
|
try:
|
||||||
|
return func(self, *args, **kwargs)
|
||||||
|
except UnsupportedRequest as e:
|
||||||
|
if e.handler is None:
|
||||||
|
e.handler = self
|
||||||
|
raise
|
||||||
|
return wrapper
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
import gzip
|
import gzip
|
||||||
import http.client
|
import http.client
|
||||||
|
@ -9,26 +11,48 @@ import urllib.parse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.response
|
import urllib.response
|
||||||
import zlib
|
import zlib
|
||||||
|
from urllib.request import (
|
||||||
|
DataHandler,
|
||||||
|
FileHandler,
|
||||||
|
FTPHandler,
|
||||||
|
HTTPCookieProcessor,
|
||||||
|
HTTPDefaultErrorHandler,
|
||||||
|
HTTPErrorProcessor,
|
||||||
|
UnknownHandler,
|
||||||
|
)
|
||||||
|
|
||||||
from ._helper import (
|
from ._helper import (
|
||||||
|
InstanceStoreMixin,
|
||||||
add_accept_encoding_header,
|
add_accept_encoding_header,
|
||||||
get_redirect_method,
|
get_redirect_method,
|
||||||
make_socks_proxy_opts,
|
make_socks_proxy_opts,
|
||||||
|
select_proxy,
|
||||||
|
)
|
||||||
|
from .common import Features, RequestHandler, Response, register
|
||||||
|
from .exceptions import (
|
||||||
|
CertificateVerifyError,
|
||||||
|
HTTPError,
|
||||||
|
IncompleteRead,
|
||||||
|
ProxyError,
|
||||||
|
RequestError,
|
||||||
|
SSLError,
|
||||||
|
TransportError,
|
||||||
)
|
)
|
||||||
from ..dependencies import brotli
|
from ..dependencies import brotli
|
||||||
|
from ..socks import ProxyError as SocksProxyError
|
||||||
from ..socks import sockssocket
|
from ..socks import sockssocket
|
||||||
from ..utils import escape_url, update_url_query
|
from ..utils import escape_url, update_url_query
|
||||||
from ..utils.networking import clean_headers, std_headers
|
|
||||||
|
|
||||||
SUPPORTED_ENCODINGS = ['gzip', 'deflate']
|
SUPPORTED_ENCODINGS = ['gzip', 'deflate']
|
||||||
|
CONTENT_DECODE_ERRORS = [zlib.error, OSError]
|
||||||
|
|
||||||
if brotli:
|
if brotli:
|
||||||
SUPPORTED_ENCODINGS.append('br')
|
SUPPORTED_ENCODINGS.append('br')
|
||||||
|
CONTENT_DECODE_ERRORS.append(brotli.error)
|
||||||
|
|
||||||
|
|
||||||
def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
|
def _create_http_connection(http_class, source_address, *args, **kwargs):
|
||||||
hc = http_class(*args, **kwargs)
|
hc = http_class(*args, **kwargs)
|
||||||
source_address = ydl_handler._params.get('source_address')
|
|
||||||
|
|
||||||
if source_address is not None:
|
if source_address is not None:
|
||||||
# This is to workaround _create_connection() from socket where it will try all
|
# This is to workaround _create_connection() from socket where it will try all
|
||||||
|
@ -73,7 +97,7 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
|
||||||
return hc
|
return hc
|
||||||
|
|
||||||
|
|
||||||
class HTTPHandler(urllib.request.HTTPHandler):
|
class HTTPHandler(urllib.request.AbstractHTTPHandler):
|
||||||
"""Handler for HTTP requests and responses.
|
"""Handler for HTTP requests and responses.
|
||||||
|
|
||||||
This class, when installed with an OpenerDirector, automatically adds
|
This class, when installed with an OpenerDirector, automatically adds
|
||||||
|
@ -88,21 +112,30 @@ class HTTPHandler(urllib.request.HTTPHandler):
|
||||||
public domain.
|
public domain.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, params, *args, **kwargs):
|
def __init__(self, context=None, source_address=None, *args, **kwargs):
|
||||||
urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self._params = params
|
self._source_address = source_address
|
||||||
|
self._context = context
|
||||||
|
|
||||||
def http_open(self, req):
|
@staticmethod
|
||||||
conn_class = http.client.HTTPConnection
|
def _make_conn_class(base, req):
|
||||||
|
conn_class = base
|
||||||
socks_proxy = req.headers.get('Ytdl-socks-proxy')
|
socks_proxy = req.headers.pop('Ytdl-socks-proxy', None)
|
||||||
if socks_proxy:
|
if socks_proxy:
|
||||||
conn_class = make_socks_conn_class(conn_class, socks_proxy)
|
conn_class = make_socks_conn_class(conn_class, socks_proxy)
|
||||||
del req.headers['Ytdl-socks-proxy']
|
return conn_class
|
||||||
|
|
||||||
|
def http_open(self, req):
|
||||||
|
conn_class = self._make_conn_class(http.client.HTTPConnection, req)
|
||||||
return self.do_open(functools.partial(
|
return self.do_open(functools.partial(
|
||||||
_create_http_connection, self, conn_class, False),
|
_create_http_connection, conn_class, self._source_address), req)
|
||||||
req)
|
|
||||||
|
def https_open(self, req):
|
||||||
|
conn_class = self._make_conn_class(http.client.HTTPSConnection, req)
|
||||||
|
return self.do_open(
|
||||||
|
functools.partial(
|
||||||
|
_create_http_connection, conn_class, self._source_address),
|
||||||
|
req, context=self._context)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def deflate(data):
|
def deflate(data):
|
||||||
|
@ -152,14 +185,6 @@ class HTTPHandler(urllib.request.HTTPHandler):
|
||||||
if url != url_escaped:
|
if url != url_escaped:
|
||||||
req = update_Request(req, url=url_escaped)
|
req = update_Request(req, url=url_escaped)
|
||||||
|
|
||||||
for h, v in self._params.get('http_headers', std_headers).items():
|
|
||||||
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
|
|
||||||
# The dict keys are capitalized because of this bug by urllib
|
|
||||||
if h.capitalize() not in req.headers:
|
|
||||||
req.add_header(h, v)
|
|
||||||
|
|
||||||
clean_headers(req.headers)
|
|
||||||
add_accept_encoding_header(req.headers, SUPPORTED_ENCODINGS)
|
|
||||||
return super().do_request_(req)
|
return super().do_request_(req)
|
||||||
|
|
||||||
def http_response(self, req, resp):
|
def http_response(self, req, resp):
|
||||||
|
@ -207,16 +232,12 @@ def make_socks_conn_class(base_class, socks_proxy):
|
||||||
def connect(self):
|
def connect(self):
|
||||||
self.sock = sockssocket()
|
self.sock = sockssocket()
|
||||||
self.sock.setproxy(**proxy_args)
|
self.sock.setproxy(**proxy_args)
|
||||||
if isinstance(self.timeout, (int, float)):
|
if type(self.timeout) in (int, float): # noqa: E721
|
||||||
self.sock.settimeout(self.timeout)
|
self.sock.settimeout(self.timeout)
|
||||||
self.sock.connect((self.host, self.port))
|
self.sock.connect((self.host, self.port))
|
||||||
|
|
||||||
if isinstance(self, http.client.HTTPSConnection):
|
if isinstance(self, http.client.HTTPSConnection):
|
||||||
if hasattr(self, '_context'): # Python > 2.6
|
self.sock = self._context.wrap_socket(self.sock, server_hostname=self.host)
|
||||||
self.sock = self._context.wrap_socket(
|
|
||||||
self.sock, server_hostname=self.host)
|
|
||||||
else:
|
|
||||||
self.sock = ssl.wrap_socket(self.sock)
|
|
||||||
|
|
||||||
return SocksConnection
|
return SocksConnection
|
||||||
|
|
||||||
|
@ -260,29 +281,25 @@ class RedirectHandler(urllib.request.HTTPRedirectHandler):
|
||||||
unverifiable=True, method=new_method, data=new_data)
|
unverifiable=True, method=new_method, data=new_data)
|
||||||
|
|
||||||
|
|
||||||
class ProxyHandler(urllib.request.ProxyHandler):
|
class ProxyHandler(urllib.request.BaseHandler):
|
||||||
|
handler_order = 100
|
||||||
|
|
||||||
def __init__(self, proxies=None):
|
def __init__(self, proxies=None):
|
||||||
|
self.proxies = proxies
|
||||||
# Set default handlers
|
# Set default handlers
|
||||||
for type in ('http', 'https'):
|
for type in ('http', 'https', 'ftp'):
|
||||||
setattr(self, '%s_open' % type,
|
setattr(self, '%s_open' % type, lambda r, meth=self.proxy_open: meth(r))
|
||||||
lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
|
|
||||||
meth(r, proxy, type))
|
|
||||||
urllib.request.ProxyHandler.__init__(self, proxies)
|
|
||||||
|
|
||||||
def proxy_open(self, req, proxy, type):
|
def proxy_open(self, req):
|
||||||
req_proxy = req.headers.get('Ytdl-request-proxy')
|
proxy = select_proxy(req.get_full_url(), self.proxies)
|
||||||
if req_proxy is not None:
|
if proxy is None:
|
||||||
proxy = req_proxy
|
return
|
||||||
del req.headers['Ytdl-request-proxy']
|
if urllib.parse.urlparse(proxy).scheme.lower() in ('socks4', 'socks4a', 'socks5', 'socks5h'):
|
||||||
|
|
||||||
if proxy == '__noproxy__':
|
|
||||||
return None # No Proxy
|
|
||||||
if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
|
|
||||||
req.add_header('Ytdl-socks-proxy', proxy)
|
req.add_header('Ytdl-socks-proxy', proxy)
|
||||||
# yt-dlp's http/https handlers do wrapping the socket with socks
|
# yt-dlp's http/https handlers do wrapping the socket with socks
|
||||||
return None
|
return None
|
||||||
return urllib.request.ProxyHandler.proxy_open(
|
return urllib.request.ProxyHandler.proxy_open(
|
||||||
self, req, proxy, type)
|
self, req, proxy, None)
|
||||||
|
|
||||||
|
|
||||||
class PUTRequest(urllib.request.Request):
|
class PUTRequest(urllib.request.Request):
|
||||||
|
@ -313,3 +330,129 @@ def update_Request(req, url=None, data=None, headers=None, query=None):
|
||||||
if hasattr(req, 'timeout'):
|
if hasattr(req, 'timeout'):
|
||||||
new_req.timeout = req.timeout
|
new_req.timeout = req.timeout
|
||||||
return new_req
|
return new_req
|
||||||
|
|
||||||
|
|
||||||
|
class UrllibResponseAdapter(Response):
|
||||||
|
"""
|
||||||
|
HTTP Response adapter class for urllib addinfourl and http.client.HTTPResponse
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, res: http.client.HTTPResponse | urllib.response.addinfourl):
|
||||||
|
# addinfourl: In Python 3.9+, .status was introduced and .getcode() was deprecated [1]
|
||||||
|
# HTTPResponse: .getcode() was deprecated, .status always existed [2]
|
||||||
|
# 1. https://docs.python.org/3/library/urllib.request.html#urllib.response.addinfourl.getcode
|
||||||
|
# 2. https://docs.python.org/3.10/library/http.client.html#http.client.HTTPResponse.status
|
||||||
|
super().__init__(
|
||||||
|
fp=res, headers=res.headers, url=res.url,
|
||||||
|
status=getattr(res, 'status', None) or res.getcode(), reason=getattr(res, 'reason', None))
|
||||||
|
|
||||||
|
def read(self, amt=None):
|
||||||
|
try:
|
||||||
|
return self.fp.read(amt)
|
||||||
|
except Exception as e:
|
||||||
|
handle_response_read_exceptions(e)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def handle_sslerror(e: ssl.SSLError):
|
||||||
|
if not isinstance(e, ssl.SSLError):
|
||||||
|
return
|
||||||
|
if isinstance(e, ssl.SSLCertVerificationError):
|
||||||
|
raise CertificateVerifyError(cause=e) from e
|
||||||
|
raise SSLError(cause=e) from e
|
||||||
|
|
||||||
|
|
||||||
|
def handle_response_read_exceptions(e):
|
||||||
|
if isinstance(e, http.client.IncompleteRead):
|
||||||
|
raise IncompleteRead(partial=e.partial, cause=e, expected=e.expected) from e
|
||||||
|
elif isinstance(e, ssl.SSLError):
|
||||||
|
handle_sslerror(e)
|
||||||
|
elif isinstance(e, (OSError, EOFError, http.client.HTTPException, *CONTENT_DECODE_ERRORS)):
|
||||||
|
# OSErrors raised here should mostly be network related
|
||||||
|
raise TransportError(cause=e) from e
|
||||||
|
|
||||||
|
|
||||||
|
@register
|
||||||
|
class UrllibRH(RequestHandler, InstanceStoreMixin):
|
||||||
|
_SUPPORTED_URL_SCHEMES = ('http', 'https', 'data', 'ftp')
|
||||||
|
_SUPPORTED_PROXY_SCHEMES = ('http', 'socks4', 'socks4a', 'socks5', 'socks5h')
|
||||||
|
_SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
|
||||||
|
RH_NAME = 'urllib'
|
||||||
|
|
||||||
|
def __init__(self, *, enable_file_urls: bool = False, **kwargs):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self.enable_file_urls = enable_file_urls
|
||||||
|
if self.enable_file_urls:
|
||||||
|
self._SUPPORTED_URL_SCHEMES = (*self._SUPPORTED_URL_SCHEMES, 'file')
|
||||||
|
|
||||||
|
def _create_instance(self, proxies, cookiejar):
|
||||||
|
opener = urllib.request.OpenerDirector()
|
||||||
|
handlers = [
|
||||||
|
ProxyHandler(proxies),
|
||||||
|
HTTPHandler(
|
||||||
|
debuglevel=int(bool(self.verbose)),
|
||||||
|
context=self._make_sslcontext(),
|
||||||
|
source_address=self.source_address),
|
||||||
|
HTTPCookieProcessor(cookiejar),
|
||||||
|
DataHandler(),
|
||||||
|
UnknownHandler(),
|
||||||
|
HTTPDefaultErrorHandler(),
|
||||||
|
FTPHandler(),
|
||||||
|
HTTPErrorProcessor(),
|
||||||
|
RedirectHandler(),
|
||||||
|
]
|
||||||
|
|
||||||
|
if self.enable_file_urls:
|
||||||
|
handlers.append(FileHandler())
|
||||||
|
|
||||||
|
for handler in handlers:
|
||||||
|
opener.add_handler(handler)
|
||||||
|
|
||||||
|
# Delete the default user-agent header, which would otherwise apply in
|
||||||
|
# cases where our custom HTTP handler doesn't come into play
|
||||||
|
# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
|
||||||
|
opener.addheaders = []
|
||||||
|
return opener
|
||||||
|
|
||||||
|
def _send(self, request):
|
||||||
|
headers = self._merge_headers(request.headers)
|
||||||
|
add_accept_encoding_header(headers, SUPPORTED_ENCODINGS)
|
||||||
|
urllib_req = urllib.request.Request(
|
||||||
|
url=request.url,
|
||||||
|
data=request.data,
|
||||||
|
headers=dict(headers),
|
||||||
|
method=request.method
|
||||||
|
)
|
||||||
|
|
||||||
|
opener = self._get_instance(
|
||||||
|
proxies=request.proxies or self.proxies,
|
||||||
|
cookiejar=request.extensions.get('cookiejar') or self.cookiejar
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
res = opener.open(urllib_req, timeout=float(request.extensions.get('timeout') or self.timeout))
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
if isinstance(e.fp, (http.client.HTTPResponse, urllib.response.addinfourl)):
|
||||||
|
# Prevent file object from being closed when urllib.error.HTTPError is destroyed.
|
||||||
|
e._closer.file = None
|
||||||
|
raise HTTPError(UrllibResponseAdapter(e.fp), redirect_loop='redirect error' in str(e)) from e
|
||||||
|
raise # unexpected
|
||||||
|
except urllib.error.URLError as e:
|
||||||
|
cause = e.reason # NOTE: cause may be a string
|
||||||
|
|
||||||
|
# proxy errors
|
||||||
|
if 'tunnel connection failed' in str(cause).lower() or isinstance(cause, SocksProxyError):
|
||||||
|
raise ProxyError(cause=e) from e
|
||||||
|
|
||||||
|
handle_response_read_exceptions(cause)
|
||||||
|
raise TransportError(cause=e) from e
|
||||||
|
except (http.client.InvalidURL, ValueError) as e:
|
||||||
|
# Validation errors
|
||||||
|
# http.client.HTTPConnection raises ValueError in some validation cases
|
||||||
|
# such as if request method contains illegal control characters [1]
|
||||||
|
# 1. https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
|
||||||
|
raise RequestError(cause=e) from e
|
||||||
|
except Exception as e:
|
||||||
|
handle_response_read_exceptions(e)
|
||||||
|
raise # unexpected
|
||||||
|
|
||||||
|
return UrllibResponseAdapter(res)
|
||||||
|
|
|
@ -0,0 +1,522 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import abc
|
||||||
|
import copy
|
||||||
|
import enum
|
||||||
|
import functools
|
||||||
|
import io
|
||||||
|
import typing
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
import urllib.response
|
||||||
|
from collections.abc import Iterable, Mapping
|
||||||
|
from email.message import Message
|
||||||
|
from http import HTTPStatus
|
||||||
|
from http.cookiejar import CookieJar
|
||||||
|
|
||||||
|
from ._helper import make_ssl_context, wrap_request_errors
|
||||||
|
from .exceptions import (
|
||||||
|
NoSupportingHandlers,
|
||||||
|
RequestError,
|
||||||
|
TransportError,
|
||||||
|
UnsupportedRequest,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
bug_reports_message,
|
||||||
|
classproperty,
|
||||||
|
error_to_str,
|
||||||
|
escape_url,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
from ..utils.networking import HTTPHeaderDict
|
||||||
|
|
||||||
|
if typing.TYPE_CHECKING:
|
||||||
|
RequestData = bytes | Iterable[bytes] | typing.IO | None
|
||||||
|
|
||||||
|
|
||||||
|
class RequestDirector:
|
||||||
|
"""RequestDirector class
|
||||||
|
|
||||||
|
Helper class that, when given a request, forward it to a RequestHandler that supports it.
|
||||||
|
|
||||||
|
@param logger: Logger instance.
|
||||||
|
@param verbose: Print debug request information to stdout.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, logger, verbose=False):
|
||||||
|
self.handlers: dict[str, RequestHandler] = {}
|
||||||
|
self.logger = logger # TODO(Grub4k): default logger
|
||||||
|
self.verbose = verbose
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
for handler in self.handlers.values():
|
||||||
|
handler.close()
|
||||||
|
|
||||||
|
def add_handler(self, handler: RequestHandler):
|
||||||
|
"""Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
|
||||||
|
assert isinstance(handler, RequestHandler), 'handler must be a RequestHandler'
|
||||||
|
self.handlers[handler.RH_KEY] = handler
|
||||||
|
|
||||||
|
def _print_verbose(self, msg):
|
||||||
|
if self.verbose:
|
||||||
|
self.logger.stdout(f'director: {msg}')
|
||||||
|
|
||||||
|
def send(self, request: Request) -> Response:
|
||||||
|
"""
|
||||||
|
Passes a request onto a suitable RequestHandler
|
||||||
|
"""
|
||||||
|
if not self.handlers:
|
||||||
|
raise RequestError('No request handlers configured')
|
||||||
|
|
||||||
|
assert isinstance(request, Request)
|
||||||
|
|
||||||
|
unexpected_errors = []
|
||||||
|
unsupported_errors = []
|
||||||
|
# TODO (future): add a per-request preference system
|
||||||
|
for handler in reversed(list(self.handlers.values())):
|
||||||
|
self._print_verbose(f'Checking if "{handler.RH_NAME}" supports this request.')
|
||||||
|
try:
|
||||||
|
handler.validate(request)
|
||||||
|
except UnsupportedRequest as e:
|
||||||
|
self._print_verbose(
|
||||||
|
f'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})')
|
||||||
|
unsupported_errors.append(e)
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._print_verbose(f'Sending request via "{handler.RH_NAME}"')
|
||||||
|
try:
|
||||||
|
response = handler.send(request)
|
||||||
|
except RequestError:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(
|
||||||
|
f'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}',
|
||||||
|
is_error=False)
|
||||||
|
unexpected_errors.append(e)
|
||||||
|
continue
|
||||||
|
|
||||||
|
assert isinstance(response, Response)
|
||||||
|
return response
|
||||||
|
|
||||||
|
raise NoSupportingHandlers(unsupported_errors, unexpected_errors)
|
||||||
|
|
||||||
|
|
||||||
|
_REQUEST_HANDLERS = {}
|
||||||
|
|
||||||
|
|
||||||
|
def register(handler):
|
||||||
|
"""Register a RequestHandler class"""
|
||||||
|
assert issubclass(handler, RequestHandler), f'{handler} must be a subclass of RequestHandler'
|
||||||
|
assert handler.RH_KEY not in _REQUEST_HANDLERS, f'RequestHandler {handler.RH_KEY} already registered'
|
||||||
|
_REQUEST_HANDLERS[handler.RH_KEY] = handler
|
||||||
|
return handler
|
||||||
|
|
||||||
|
|
||||||
|
class Features(enum.Enum):
|
||||||
|
ALL_PROXY = enum.auto()
|
||||||
|
NO_PROXY = enum.auto()
|
||||||
|
|
||||||
|
|
||||||
|
class RequestHandler(abc.ABC):
|
||||||
|
|
||||||
|
"""Request Handler class
|
||||||
|
|
||||||
|
Request handlers are class that, given a Request,
|
||||||
|
process the request from start to finish and return a Response.
|
||||||
|
|
||||||
|
Concrete subclasses need to redefine the _send(request) method,
|
||||||
|
which handles the underlying request logic and returns a Response.
|
||||||
|
|
||||||
|
RH_NAME class variable may contain a display name for the RequestHandler.
|
||||||
|
By default, this is generated from the class name.
|
||||||
|
|
||||||
|
The concrete request handler MUST have "RH" as the suffix in the class name.
|
||||||
|
|
||||||
|
All exceptions raised by a RequestHandler should be an instance of RequestError.
|
||||||
|
Any other exception raised will be treated as a handler issue.
|
||||||
|
|
||||||
|
If a Request is not supported by the handler, an UnsupportedRequest
|
||||||
|
should be raised with a reason.
|
||||||
|
|
||||||
|
By default, some checks are done on the request in _validate() based on the following class variables:
|
||||||
|
- `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes.
|
||||||
|
Any Request with an url scheme not in this list will raise an UnsupportedRequest.
|
||||||
|
|
||||||
|
- `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains
|
||||||
|
a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
|
||||||
|
|
||||||
|
- `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
|
||||||
|
The above may be set to None to disable the checks.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
@param logger: logger instance
|
||||||
|
@param headers: HTTP Headers to include when sending requests.
|
||||||
|
@param cookiejar: Cookiejar to use for requests.
|
||||||
|
@param timeout: Socket timeout to use when sending requests.
|
||||||
|
@param proxies: Proxies to use for sending requests.
|
||||||
|
@param source_address: Client-side IP address to bind to for requests.
|
||||||
|
@param verbose: Print debug request and traffic information to stdout.
|
||||||
|
@param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi).
|
||||||
|
@param client_cert: SSL client certificate configuration.
|
||||||
|
dict with {client_certificate, client_certificate_key, client_certificate_password}
|
||||||
|
@param verify: Verify SSL certificates
|
||||||
|
@param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support.
|
||||||
|
|
||||||
|
Some configuration options may be available for individual Requests too. In this case,
|
||||||
|
either the Request configuration option takes precedence or they are merged.
|
||||||
|
|
||||||
|
Requests may have additional optional parameters defined as extensions.
|
||||||
|
RequestHandler subclasses may choose to support custom extensions.
|
||||||
|
|
||||||
|
The following extensions are defined for RequestHandler:
|
||||||
|
- `cookiejar`: Cookiejar to use for this request
|
||||||
|
- `timeout`: socket timeout to use for this request
|
||||||
|
|
||||||
|
Apart from the url protocol, proxies dict may contain the following keys:
|
||||||
|
- `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
|
||||||
|
- `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for.
|
||||||
|
Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
_SUPPORTED_URL_SCHEMES = ()
|
||||||
|
_SUPPORTED_PROXY_SCHEMES = ()
|
||||||
|
_SUPPORTED_FEATURES = ()
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, *,
|
||||||
|
logger, # TODO(Grub4k): default logger
|
||||||
|
headers: HTTPHeaderDict = None,
|
||||||
|
cookiejar: CookieJar = None,
|
||||||
|
timeout: float | int | None = None,
|
||||||
|
proxies: dict = None,
|
||||||
|
source_address: str = None,
|
||||||
|
verbose: bool = False,
|
||||||
|
prefer_system_certs: bool = False,
|
||||||
|
client_cert: dict[str, str | None] = None,
|
||||||
|
verify: bool = True,
|
||||||
|
legacy_ssl_support: bool = False,
|
||||||
|
**_,
|
||||||
|
):
|
||||||
|
|
||||||
|
self._logger = logger
|
||||||
|
self.headers = headers or {}
|
||||||
|
self.cookiejar = cookiejar if cookiejar is not None else CookieJar()
|
||||||
|
self.timeout = float(timeout or 20)
|
||||||
|
self.proxies = proxies or {}
|
||||||
|
self.source_address = source_address
|
||||||
|
self.verbose = verbose
|
||||||
|
self.prefer_system_certs = prefer_system_certs
|
||||||
|
self._client_cert = client_cert or {}
|
||||||
|
self.verify = verify
|
||||||
|
self.legacy_ssl_support = legacy_ssl_support
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def _make_sslcontext(self):
|
||||||
|
return make_ssl_context(
|
||||||
|
verify=self.verify,
|
||||||
|
legacy_support=self.legacy_ssl_support,
|
||||||
|
use_certifi=not self.prefer_system_certs,
|
||||||
|
**self._client_cert,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _merge_headers(self, request_headers):
|
||||||
|
return HTTPHeaderDict(self.headers, request_headers)
|
||||||
|
|
||||||
|
def _check_url_scheme(self, request: Request):
|
||||||
|
scheme = urllib.parse.urlparse(request.url).scheme.lower()
|
||||||
|
if self._SUPPORTED_URL_SCHEMES is not None and scheme not in self._SUPPORTED_URL_SCHEMES:
|
||||||
|
raise UnsupportedRequest(f'Unsupported url scheme: "{scheme}"')
|
||||||
|
return scheme # for further processing
|
||||||
|
|
||||||
|
def _check_proxies(self, proxies):
|
||||||
|
for proxy_key, proxy_url in proxies.items():
|
||||||
|
if proxy_url is None:
|
||||||
|
continue
|
||||||
|
if proxy_key == 'no':
|
||||||
|
if self._SUPPORTED_FEATURES is not None and Features.NO_PROXY not in self._SUPPORTED_FEATURES:
|
||||||
|
raise UnsupportedRequest('"no" proxy is not supported')
|
||||||
|
continue
|
||||||
|
if (
|
||||||
|
proxy_key == 'all'
|
||||||
|
and self._SUPPORTED_FEATURES is not None
|
||||||
|
and Features.ALL_PROXY not in self._SUPPORTED_FEATURES
|
||||||
|
):
|
||||||
|
raise UnsupportedRequest('"all" proxy is not supported')
|
||||||
|
|
||||||
|
# Unlikely this handler will use this proxy, so ignore.
|
||||||
|
# This is to allow a case where a proxy may be set for a protocol
|
||||||
|
# for one handler in which such protocol (and proxy) is not supported by another handler.
|
||||||
|
if self._SUPPORTED_URL_SCHEMES is not None and proxy_key not in (*self._SUPPORTED_URL_SCHEMES, 'all'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if self._SUPPORTED_PROXY_SCHEMES is None:
|
||||||
|
# Skip proxy scheme checks
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Scheme-less proxies are not supported
|
||||||
|
if urllib.request._parse_proxy(proxy_url)[0] is None:
|
||||||
|
raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
|
||||||
|
|
||||||
|
scheme = urllib.parse.urlparse(proxy_url).scheme.lower()
|
||||||
|
if scheme not in self._SUPPORTED_PROXY_SCHEMES:
|
||||||
|
raise UnsupportedRequest(f'Unsupported proxy type: "{scheme}"')
|
||||||
|
|
||||||
|
def _check_cookiejar_extension(self, extensions):
|
||||||
|
if not extensions.get('cookiejar'):
|
||||||
|
return
|
||||||
|
if not isinstance(extensions['cookiejar'], CookieJar):
|
||||||
|
raise UnsupportedRequest('cookiejar is not a CookieJar')
|
||||||
|
|
||||||
|
def _check_timeout_extension(self, extensions):
|
||||||
|
if extensions.get('timeout') is None:
|
||||||
|
return
|
||||||
|
if not isinstance(extensions['timeout'], (float, int)):
|
||||||
|
raise UnsupportedRequest('timeout is not a float or int')
|
||||||
|
|
||||||
|
def _check_extensions(self, extensions):
|
||||||
|
self._check_cookiejar_extension(extensions)
|
||||||
|
self._check_timeout_extension(extensions)
|
||||||
|
|
||||||
|
def _validate(self, request):
|
||||||
|
self._check_url_scheme(request)
|
||||||
|
self._check_proxies(request.proxies or self.proxies)
|
||||||
|
self._check_extensions(request.extensions)
|
||||||
|
|
||||||
|
@wrap_request_errors
|
||||||
|
def validate(self, request: Request):
|
||||||
|
if not isinstance(request, Request):
|
||||||
|
raise TypeError('Expected an instance of Request')
|
||||||
|
self._validate(request)
|
||||||
|
|
||||||
|
@wrap_request_errors
|
||||||
|
def send(self, request: Request) -> Response:
|
||||||
|
if not isinstance(request, Request):
|
||||||
|
raise TypeError('Expected an instance of Request')
|
||||||
|
return self._send(request)
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def _send(self, request: Request):
|
||||||
|
"""Handle a request from start to finish. Redefine in subclasses."""
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@classproperty
|
||||||
|
def RH_NAME(cls):
|
||||||
|
return cls.__name__[:-2]
|
||||||
|
|
||||||
|
@classproperty
|
||||||
|
def RH_KEY(cls):
|
||||||
|
assert cls.__name__.endswith('RH'), 'RequestHandler class names must end with "RH"'
|
||||||
|
return cls.__name__[:-2]
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *args):
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
|
||||||
|
class Request:
|
||||||
|
"""
|
||||||
|
Represents a request to be made.
|
||||||
|
Partially backwards-compatible with urllib.request.Request.
|
||||||
|
|
||||||
|
@param url: url to send. Will be sanitized.
|
||||||
|
@param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None
|
||||||
|
@param headers: headers to send.
|
||||||
|
@param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects.
|
||||||
|
@param query: URL query parameters to update the url with.
|
||||||
|
@param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET
|
||||||
|
@param extensions: Dictionary of Request extensions to add, as supported by handlers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
data: RequestData = None,
|
||||||
|
headers: typing.Mapping = None,
|
||||||
|
proxies: dict = None,
|
||||||
|
query: dict = None,
|
||||||
|
method: str = None,
|
||||||
|
extensions: dict = None
|
||||||
|
):
|
||||||
|
|
||||||
|
self._headers = HTTPHeaderDict()
|
||||||
|
self._data = None
|
||||||
|
|
||||||
|
if query:
|
||||||
|
url = update_url_query(url, query)
|
||||||
|
|
||||||
|
self.url = url
|
||||||
|
self.method = method
|
||||||
|
if headers:
|
||||||
|
self.headers = headers
|
||||||
|
self.data = data # note: must be done after setting headers
|
||||||
|
self.proxies = proxies or {}
|
||||||
|
self.extensions = extensions or {}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def url(self):
|
||||||
|
return self._url
|
||||||
|
|
||||||
|
@url.setter
|
||||||
|
def url(self, url):
|
||||||
|
if not isinstance(url, str):
|
||||||
|
raise TypeError('url must be a string')
|
||||||
|
elif url.startswith('//'):
|
||||||
|
url = 'http:' + url
|
||||||
|
self._url = escape_url(url)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def method(self):
|
||||||
|
return self._method or ('POST' if self.data is not None else 'GET')
|
||||||
|
|
||||||
|
@method.setter
|
||||||
|
def method(self, method):
|
||||||
|
if method is None:
|
||||||
|
self._method = None
|
||||||
|
elif isinstance(method, str):
|
||||||
|
self._method = method.upper()
|
||||||
|
else:
|
||||||
|
raise TypeError('method must be a string')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def data(self):
|
||||||
|
return self._data
|
||||||
|
|
||||||
|
@data.setter
|
||||||
|
def data(self, data: RequestData):
|
||||||
|
# Try catch some common mistakes
|
||||||
|
if data is not None and (
|
||||||
|
not isinstance(data, (bytes, io.IOBase, Iterable)) or isinstance(data, (str, Mapping))
|
||||||
|
):
|
||||||
|
raise TypeError('data must be bytes, iterable of bytes, or a file-like object')
|
||||||
|
|
||||||
|
if data == self._data and self._data is None:
|
||||||
|
self.headers.pop('Content-Length', None)
|
||||||
|
|
||||||
|
# https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data
|
||||||
|
if data != self._data:
|
||||||
|
if self._data is not None:
|
||||||
|
self.headers.pop('Content-Length', None)
|
||||||
|
self._data = data
|
||||||
|
|
||||||
|
if self._data is None:
|
||||||
|
self.headers.pop('Content-Type', None)
|
||||||
|
|
||||||
|
if 'Content-Type' not in self.headers and self._data is not None:
|
||||||
|
self.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def headers(self) -> HTTPHeaderDict:
|
||||||
|
return self._headers
|
||||||
|
|
||||||
|
@headers.setter
|
||||||
|
def headers(self, new_headers: Mapping):
|
||||||
|
"""Replaces headers of the request. If not a CaseInsensitiveDict, it will be converted to one."""
|
||||||
|
if isinstance(new_headers, HTTPHeaderDict):
|
||||||
|
self._headers = new_headers
|
||||||
|
elif isinstance(new_headers, Mapping):
|
||||||
|
self._headers = HTTPHeaderDict(new_headers)
|
||||||
|
else:
|
||||||
|
raise TypeError('headers must be a mapping')
|
||||||
|
|
||||||
|
def update(self, url=None, data=None, headers=None, query=None):
|
||||||
|
self.data = data or self.data
|
||||||
|
self.headers.update(headers or {})
|
||||||
|
self.url = update_url_query(url or self.url, query or {})
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
return self.__class__(
|
||||||
|
url=self.url,
|
||||||
|
headers=copy.deepcopy(self.headers),
|
||||||
|
proxies=copy.deepcopy(self.proxies),
|
||||||
|
data=self._data,
|
||||||
|
extensions=copy.copy(self.extensions),
|
||||||
|
method=self._method,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
HEADRequest = functools.partial(Request, method='HEAD')
|
||||||
|
PUTRequest = functools.partial(Request, method='PUT')
|
||||||
|
|
||||||
|
|
||||||
|
class Response(io.IOBase):
|
||||||
|
"""
|
||||||
|
Base class for HTTP response adapters.
|
||||||
|
|
||||||
|
By default, it provides a basic wrapper for a file-like response object.
|
||||||
|
|
||||||
|
Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse.
|
||||||
|
|
||||||
|
@param fp: Original, file-like, response.
|
||||||
|
@param url: URL that this is a response of.
|
||||||
|
@param headers: response headers.
|
||||||
|
@param status: Response HTTP status code. Default is 200 OK.
|
||||||
|
@param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
fp: typing.IO,
|
||||||
|
url: str,
|
||||||
|
headers: Mapping[str, str],
|
||||||
|
status: int = 200,
|
||||||
|
reason: str = None):
|
||||||
|
|
||||||
|
self.fp = fp
|
||||||
|
self.headers = Message()
|
||||||
|
for name, value in headers.items():
|
||||||
|
self.headers.add_header(name, value)
|
||||||
|
self.status = status
|
||||||
|
self.url = url
|
||||||
|
try:
|
||||||
|
self.reason = reason or HTTPStatus(status).phrase
|
||||||
|
except ValueError:
|
||||||
|
self.reason = None
|
||||||
|
|
||||||
|
def readable(self):
|
||||||
|
return self.fp.readable()
|
||||||
|
|
||||||
|
def read(self, amt: int = None) -> bytes:
|
||||||
|
# Expected errors raised here should be of type RequestError or subclasses.
|
||||||
|
# Subclasses should redefine this method with more precise error handling.
|
||||||
|
try:
|
||||||
|
return self.fp.read(amt)
|
||||||
|
except Exception as e:
|
||||||
|
raise TransportError(cause=e) from e
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.fp.close()
|
||||||
|
return super().close()
|
||||||
|
|
||||||
|
def get_header(self, name, default=None):
|
||||||
|
"""Get header for name.
|
||||||
|
If there are multiple matching headers, return all seperated by comma."""
|
||||||
|
headers = self.headers.get_all(name)
|
||||||
|
if not headers:
|
||||||
|
return default
|
||||||
|
if name.title() == 'Set-Cookie':
|
||||||
|
# Special case, only get the first one
|
||||||
|
# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1
|
||||||
|
return headers[0]
|
||||||
|
return ', '.join(headers)
|
||||||
|
|
||||||
|
# The following methods are for compatability reasons and are deprecated
|
||||||
|
@property
|
||||||
|
def code(self):
|
||||||
|
return self.status
|
||||||
|
|
||||||
|
def getcode(self):
|
||||||
|
return self.status
|
||||||
|
|
||||||
|
def geturl(self):
|
||||||
|
return self.url
|
||||||
|
|
||||||
|
def info(self):
|
||||||
|
return self.headers
|
||||||
|
|
||||||
|
def getheader(self, name, default=None):
|
||||||
|
return self.get_header(name, default)
|
|
@ -1,9 +1,197 @@
|
||||||
import http.client
|
from __future__ import annotations
|
||||||
import socket
|
|
||||||
import ssl
|
import typing
|
||||||
import urllib.error
|
import urllib.error
|
||||||
|
|
||||||
network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
|
from ..utils import YoutubeDLError
|
||||||
if hasattr(ssl, 'CertificateError'):
|
|
||||||
network_exceptions.append(ssl.CertificateError)
|
if typing.TYPE_CHECKING:
|
||||||
network_exceptions = tuple(network_exceptions)
|
from .common import RequestHandler, Response
|
||||||
|
|
||||||
|
|
||||||
|
class RequestError(YoutubeDLError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
msg: str | None = None,
|
||||||
|
cause: Exception | str | None = None,
|
||||||
|
handler: RequestHandler = None
|
||||||
|
):
|
||||||
|
self.handler = handler
|
||||||
|
self.cause = cause
|
||||||
|
if not msg and cause:
|
||||||
|
msg = str(cause)
|
||||||
|
super().__init__(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedRequest(RequestError):
|
||||||
|
"""raised when a handler cannot handle a request"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class NoSupportingHandlers(RequestError):
|
||||||
|
"""raised when no handlers can support a request for various reasons"""
|
||||||
|
|
||||||
|
def __init__(self, unsupported_errors: list[UnsupportedRequest], unexpected_errors: list[Exception]):
|
||||||
|
self.unsupported_errors = unsupported_errors or []
|
||||||
|
self.unexpected_errors = unexpected_errors or []
|
||||||
|
|
||||||
|
# Print a quick summary of the errors
|
||||||
|
err_handler_map = {}
|
||||||
|
for err in unsupported_errors:
|
||||||
|
err_handler_map.setdefault(err.msg, []).append(err.handler.RH_NAME)
|
||||||
|
|
||||||
|
reason_str = ', '.join([f'{msg} ({", ".join(handlers)})' for msg, handlers in err_handler_map.items()])
|
||||||
|
if unexpected_errors:
|
||||||
|
reason_str = ' + '.join(filter(None, [reason_str, f'{len(unexpected_errors)} unexpected error(s)']))
|
||||||
|
|
||||||
|
err_str = 'Unable to handle request'
|
||||||
|
if reason_str:
|
||||||
|
err_str += f': {reason_str}'
|
||||||
|
|
||||||
|
super().__init__(msg=err_str)
|
||||||
|
|
||||||
|
|
||||||
|
class TransportError(RequestError):
|
||||||
|
"""Network related errors"""
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPError(RequestError):
|
||||||
|
def __init__(self, response: Response, redirect_loop=False):
|
||||||
|
self.response = response
|
||||||
|
self.status = response.status
|
||||||
|
self.reason = response.reason
|
||||||
|
self.redirect_loop = redirect_loop
|
||||||
|
msg = f'HTTP Error {response.status}: {response.reason}'
|
||||||
|
if redirect_loop:
|
||||||
|
msg += ' (redirect loop detected)'
|
||||||
|
|
||||||
|
super().__init__(msg=msg)
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.response.close()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'<HTTPError {self.status}: {self.reason}>'
|
||||||
|
|
||||||
|
|
||||||
|
class IncompleteRead(TransportError):
|
||||||
|
def __init__(self, partial, expected=None, **kwargs):
|
||||||
|
self.partial = partial
|
||||||
|
self.expected = expected
|
||||||
|
msg = f'{len(partial)} bytes read'
|
||||||
|
if expected is not None:
|
||||||
|
msg += f', {expected} more expected'
|
||||||
|
|
||||||
|
super().__init__(msg=msg, **kwargs)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'<IncompleteRead: {self.msg}>'
|
||||||
|
|
||||||
|
|
||||||
|
class SSLError(TransportError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class CertificateVerifyError(SSLError):
|
||||||
|
"""Raised when certificate validated has failed"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ProxyError(TransportError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class _CompatHTTPError(urllib.error.HTTPError, HTTPError):
|
||||||
|
"""
|
||||||
|
Provides backwards compatibility with urllib.error.HTTPError.
|
||||||
|
Do not use this class directly, use HTTPError instead.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, http_error: HTTPError):
|
||||||
|
super().__init__(
|
||||||
|
url=http_error.response.url,
|
||||||
|
code=http_error.status,
|
||||||
|
msg=http_error.msg,
|
||||||
|
hdrs=http_error.response.headers,
|
||||||
|
fp=http_error.response
|
||||||
|
)
|
||||||
|
self._closer.file = None # Disable auto close
|
||||||
|
self._http_error = http_error
|
||||||
|
HTTPError.__init__(self, http_error.response, redirect_loop=http_error.redirect_loop)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def status(self):
|
||||||
|
return self._http_error.status
|
||||||
|
|
||||||
|
@status.setter
|
||||||
|
def status(self, value):
|
||||||
|
return
|
||||||
|
|
||||||
|
@property
|
||||||
|
def reason(self):
|
||||||
|
return self._http_error.reason
|
||||||
|
|
||||||
|
@reason.setter
|
||||||
|
def reason(self, value):
|
||||||
|
return
|
||||||
|
|
||||||
|
@property
|
||||||
|
def headers(self):
|
||||||
|
return self._http_error.response.headers
|
||||||
|
|
||||||
|
@headers.setter
|
||||||
|
def headers(self, value):
|
||||||
|
return
|
||||||
|
|
||||||
|
def info(self):
|
||||||
|
return self.response.headers
|
||||||
|
|
||||||
|
def getcode(self):
|
||||||
|
return self.status
|
||||||
|
|
||||||
|
def geturl(self):
|
||||||
|
return self.response.url
|
||||||
|
|
||||||
|
@property
|
||||||
|
def code(self):
|
||||||
|
return self.status
|
||||||
|
|
||||||
|
@code.setter
|
||||||
|
def code(self, value):
|
||||||
|
return
|
||||||
|
|
||||||
|
@property
|
||||||
|
def url(self):
|
||||||
|
return self.response.url
|
||||||
|
|
||||||
|
@url.setter
|
||||||
|
def url(self, value):
|
||||||
|
return
|
||||||
|
|
||||||
|
@property
|
||||||
|
def hdrs(self):
|
||||||
|
return self.response.headers
|
||||||
|
|
||||||
|
@hdrs.setter
|
||||||
|
def hdrs(self, value):
|
||||||
|
return
|
||||||
|
|
||||||
|
@property
|
||||||
|
def filename(self):
|
||||||
|
return self.response.url
|
||||||
|
|
||||||
|
@filename.setter
|
||||||
|
def filename(self, value):
|
||||||
|
return
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return super().__getattr__(name)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str(self._http_error)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return repr(self._http_error)
|
||||||
|
|
||||||
|
|
||||||
|
network_exceptions = (HTTPError, TransportError)
|
||||||
|
|
|
@ -10,16 +10,16 @@ del passthrough_module
|
||||||
|
|
||||||
|
|
||||||
from ._utils import preferredencoding
|
from ._utils import preferredencoding
|
||||||
|
from ..networking._urllib import HTTPHandler
|
||||||
|
|
||||||
# isort: split
|
# isort: split
|
||||||
|
from .networking import random_user_agent, std_headers # noqa: F401
|
||||||
from ..networking._urllib import PUTRequest # noqa: F401
|
from ..networking._urllib import PUTRequest # noqa: F401
|
||||||
from ..networking._urllib import SUPPORTED_ENCODINGS, HEADRequest # noqa: F401
|
from ..networking._urllib import SUPPORTED_ENCODINGS, HEADRequest # noqa: F401
|
||||||
from ..networking._urllib import HTTPHandler as YoutubeDLHandler # noqa: F401
|
|
||||||
from ..networking._urllib import ProxyHandler as PerRequestProxyHandler # noqa: F401
|
from ..networking._urllib import ProxyHandler as PerRequestProxyHandler # noqa: F401
|
||||||
from ..networking._urllib import RedirectHandler as YoutubeDLRedirectHandler # noqa: F401
|
from ..networking._urllib import RedirectHandler as YoutubeDLRedirectHandler # noqa: F401
|
||||||
from ..networking._urllib import make_socks_conn_class, update_Request # noqa: F401
|
from ..networking._urllib import make_socks_conn_class, update_Request # noqa: F401
|
||||||
from ..networking.exceptions import network_exceptions # noqa: F401
|
from ..networking.exceptions import network_exceptions # noqa: F401
|
||||||
from .networking import random_user_agent, std_headers # noqa: F401
|
|
||||||
|
|
||||||
|
|
||||||
def encodeFilename(s, for_subprocess=False):
|
def encodeFilename(s, for_subprocess=False):
|
||||||
|
@ -47,3 +47,12 @@ def decodeOption(optval):
|
||||||
|
|
||||||
def error_to_compat_str(err):
|
def error_to_compat_str(err):
|
||||||
return str(err)
|
return str(err)
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeDLHandler(HTTPHandler):
|
||||||
|
def __init__(self, params, *args, **kwargs):
|
||||||
|
self._params = params
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
YoutubeDLHTTPSHandler = YoutubeDLHandler
|
||||||
|
|
|
@ -15,8 +15,6 @@ import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
import html.entities
|
import html.entities
|
||||||
import html.parser
|
import html.parser
|
||||||
import http.client
|
|
||||||
import http.cookiejar
|
|
||||||
import inspect
|
import inspect
|
||||||
import io
|
import io
|
||||||
import itertools
|
import itertools
|
||||||
|
@ -897,6 +895,7 @@ def formatSeconds(secs, delim=':', msec=False):
|
||||||
|
|
||||||
|
|
||||||
def make_HTTPS_handler(params, **kwargs):
|
def make_HTTPS_handler(params, **kwargs):
|
||||||
|
from ._deprecated import YoutubeDLHTTPSHandler
|
||||||
from ..networking._helper import make_ssl_context
|
from ..networking._helper import make_ssl_context
|
||||||
return YoutubeDLHTTPSHandler(params, context=make_ssl_context(
|
return YoutubeDLHTTPSHandler(params, context=make_ssl_context(
|
||||||
verify=not params.get('nocheckcertificate'),
|
verify=not params.get('nocheckcertificate'),
|
||||||
|
@ -1140,38 +1139,6 @@ class XAttrUnavailableError(YoutubeDLError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
|
|
||||||
def __init__(self, params, https_conn_class=None, *args, **kwargs):
|
|
||||||
urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
|
|
||||||
self._https_conn_class = https_conn_class or http.client.HTTPSConnection
|
|
||||||
self._params = params
|
|
||||||
|
|
||||||
def https_open(self, req):
|
|
||||||
kwargs = {}
|
|
||||||
conn_class = self._https_conn_class
|
|
||||||
|
|
||||||
if hasattr(self, '_context'): # python > 2.6
|
|
||||||
kwargs['context'] = self._context
|
|
||||||
if hasattr(self, '_check_hostname'): # python 3.x
|
|
||||||
kwargs['check_hostname'] = self._check_hostname
|
|
||||||
|
|
||||||
socks_proxy = req.headers.get('Ytdl-socks-proxy')
|
|
||||||
if socks_proxy:
|
|
||||||
from ..networking._urllib import make_socks_conn_class
|
|
||||||
conn_class = make_socks_conn_class(conn_class, socks_proxy)
|
|
||||||
del req.headers['Ytdl-socks-proxy']
|
|
||||||
|
|
||||||
from ..networking._urllib import _create_http_connection
|
|
||||||
try:
|
|
||||||
return self.do_open(
|
|
||||||
functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
|
|
||||||
except urllib.error.URLError as e:
|
|
||||||
if (isinstance(e.reason, ssl.SSLError)
|
|
||||||
and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
|
|
||||||
raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def is_path_like(f):
|
def is_path_like(f):
|
||||||
return isinstance(f, (str, bytes, os.PathLike))
|
return isinstance(f, (str, bytes, os.PathLike))
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,9 @@
|
||||||
|
import collections
|
||||||
import random
|
import random
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
from ._utils import remove_start
|
||||||
|
|
||||||
|
|
||||||
def random_user_agent():
|
def random_user_agent():
|
||||||
|
@ -46,15 +51,67 @@ def random_user_agent():
|
||||||
return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
|
return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
|
||||||
|
|
||||||
|
|
||||||
std_headers = {
|
class HTTPHeaderDict(collections.UserDict, dict):
|
||||||
|
"""
|
||||||
|
Store and access keys case-insensitively.
|
||||||
|
The constructor can take multiple dicts, in which keys in the latter are prioritised.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__()
|
||||||
|
for dct in args:
|
||||||
|
if dct is not None:
|
||||||
|
self.update(dct)
|
||||||
|
self.update(kwargs)
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
super().__setitem__(key.title(), str(value))
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return super().__getitem__(key.title())
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
super().__delitem__(key.title())
|
||||||
|
|
||||||
|
def __contains__(self, key):
|
||||||
|
return super().__contains__(key.title() if isinstance(key, str) else key)
|
||||||
|
|
||||||
|
|
||||||
|
std_headers = HTTPHeaderDict({
|
||||||
'User-Agent': random_user_agent(),
|
'User-Agent': random_user_agent(),
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
'Accept-Language': 'en-us,en;q=0.5',
|
'Accept-Language': 'en-us,en;q=0.5',
|
||||||
'Sec-Fetch-Mode': 'navigate',
|
'Sec-Fetch-Mode': 'navigate',
|
||||||
}
|
})
|
||||||
|
|
||||||
|
|
||||||
def clean_headers(headers):
|
def clean_proxies(proxies: dict, headers: HTTPHeaderDict):
|
||||||
if 'Youtubedl-no-compression' in headers: # compat
|
req_proxy = headers.pop('Ytdl-Request-Proxy', None)
|
||||||
del headers['Youtubedl-no-compression']
|
if req_proxy:
|
||||||
|
proxies.clear() # XXX: compat: Ytdl-Request-Proxy takes preference over everything, including NO_PROXY
|
||||||
|
proxies['all'] = req_proxy
|
||||||
|
for proxy_key, proxy_url in proxies.items():
|
||||||
|
if proxy_url == '__noproxy__':
|
||||||
|
proxies[proxy_key] = None
|
||||||
|
continue
|
||||||
|
if proxy_key == 'no': # special case
|
||||||
|
continue
|
||||||
|
if proxy_url is not None:
|
||||||
|
# Ensure proxies without a scheme are http.
|
||||||
|
proxy_scheme = urllib.request._parse_proxy(proxy_url)[0]
|
||||||
|
if proxy_scheme is None:
|
||||||
|
proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//')
|
||||||
|
|
||||||
|
replace_scheme = {
|
||||||
|
'socks5': 'socks5h', # compat: socks5 was treated as socks5h
|
||||||
|
'socks': 'socks4' # compat: non-standard
|
||||||
|
}
|
||||||
|
if proxy_scheme in replace_scheme:
|
||||||
|
proxies[proxy_key] = urllib.parse.urlunparse(
|
||||||
|
urllib.parse.urlparse(proxy_url)._replace(scheme=replace_scheme[proxy_scheme]))
|
||||||
|
|
||||||
|
|
||||||
|
def clean_headers(headers: HTTPHeaderDict):
|
||||||
|
if 'Youtubedl-No-Compression' in headers: # compat
|
||||||
|
del headers['Youtubedl-No-Compression']
|
||||||
headers['Accept-Encoding'] = 'identity'
|
headers['Accept-Encoding'] = 'identity'
|
||||||
|
|
Loading…
Reference in New Issue