From 4390d5ec12349e5b5bba30af6b4e7f08678af41a Mon Sep 17 00:00:00 2001 From: coletdev Date: Wed, 9 Mar 2022 05:44:05 +1300 Subject: [PATCH] Add brotli content-encoding support (#2433) Authored by: coletdjnz --- README.md | 3 ++- pyinst.py | 2 +- requirements.txt | 2 ++ setup.py | 2 +- yt_dlp/YoutubeDL.py | 2 ++ yt_dlp/compat.py | 8 ++++++++ yt_dlp/utils.py | 21 ++++++++++++++++++++- 7 files changed, 36 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ce5af129e..81b5d417d 100644 --- a/README.md +++ b/README.md @@ -268,6 +268,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly * [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) * [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD](https://github.com/mitya57/secretstorage/blob/master/LICENSE) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) +* [**brotli**](https://github.com/google/brotli) or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT [1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) * [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) * [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) * [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) @@ -284,7 +285,7 @@ The Windows and MacOS standalone release binaries are already built with the pyt ## COMPILE **For Windows**: -To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets). Once you have all the necessary dependencies installed, (optionally) build lazy extractors using `devscripts/make_lazy_extractors.py`, and then just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. +To build the Windows executable, you must have pyinstaller (and any of yt-dlp's optional dependencies if needed). Once you have all the necessary dependencies installed, (optionally) build lazy extractors using `devscripts/make_lazy_extractors.py`, and then just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. py -m pip install -U pyinstaller -r requirements.txt py devscripts/make_lazy_extractors.py diff --git a/pyinst.py b/pyinst.py index f135ec90d..ca115fd78 100644 --- a/pyinst.py +++ b/pyinst.py @@ -74,7 +74,7 @@ def version_to_list(version): def dependency_options(): - dependencies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets') + dependencies = [pycryptodome_module(), 'mutagen', 'brotli'] + collect_submodules('websockets') excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] yield from (f'--hidden-import={module}' for module in dependencies) diff --git a/requirements.txt b/requirements.txt index cecd08eae..cb0eece46 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ mutagen pycryptodomex websockets +brotli; platform_python_implementation=='CPython' +brotlicffi; platform_python_implementation!='CPython' \ No newline at end of file diff --git a/setup.py b/setup.py index f08ae2309..3e599cd95 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ LONG_DESCRIPTION = '\n\n'.join(( '**PS**: Some links in this document will not work since this is a copy of the README.md from Github', open('README.md', 'r', encoding='utf-8').read())) -REQUIREMENTS = ['mutagen', 'pycryptodomex', 'websockets'] +REQUIREMENTS = open('requirements.txt').read().splitlines() if sys.argv[1:2] == ['py2exe']: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 57201b6dc..51a89bd23 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -32,6 +32,7 @@ from string import ascii_letters from .compat import ( compat_basestring, + compat_brotli, compat_get_terminal_size, compat_kwargs, compat_numeric_types, @@ -3675,6 +3676,7 @@ class YoutubeDL(object): from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE lib_str = join_nonempty( + compat_brotli and compat_brotli.__name__, compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0], SECRETSTORAGE_AVAILABLE and 'secretstorage', has_mutagen and 'mutagen', diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index 2bc6a6b7f..0a0d3b351 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -170,6 +170,13 @@ except ImportError: except ImportError: compat_pycrypto_AES = None +try: + import brotlicffi as compat_brotli +except ImportError: + try: + import brotli as compat_brotli + except ImportError: + compat_brotli = None WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None @@ -258,6 +265,7 @@ __all__ = [ 'compat_asyncio_run', 'compat_b64decode', 'compat_basestring', + 'compat_brotli', 'compat_chr', 'compat_collections_abc', 'compat_cookiejar', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 9406eb834..f6e41f837 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -47,6 +47,7 @@ from .compat import ( compat_HTMLParser, compat_HTTPError, compat_basestring, + compat_brotli, compat_chr, compat_cookiejar, compat_ctypes_WINFUNCTYPE, @@ -143,10 +144,16 @@ def random_user_agent(): return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) +SUPPORTED_ENCODINGS = [ + 'gzip', 'deflate' +] +if compat_brotli: + SUPPORTED_ENCODINGS.append('br') + std_headers = { 'User-Agent': random_user_agent(), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Encoding': 'gzip, deflate', + 'Accept-Encoding': ', '.join(SUPPORTED_ENCODINGS), 'Accept-Language': 'en-us,en;q=0.5', 'Sec-Fetch-Mode': 'navigate', } @@ -1357,6 +1364,12 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): except zlib.error: return zlib.decompress(data) + @staticmethod + def brotli(data): + if not data: + return data + return compat_brotli.decompress(data) + def http_request(self, req): # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not # always respected by websites, some tend to give out URLs with non percent-encoded @@ -1417,6 +1430,12 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg del resp.headers['Content-encoding'] + # brotli + if resp.headers.get('Content-encoding', '') == 'br': + resp = compat_urllib_request.addinfourl( + io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code) + resp.msg = old_resp.msg + del resp.headers['Content-encoding'] # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see # https://github.com/ytdl-org/youtube-dl/issues/6457). if 300 <= resp.code < 400: