[downloader/http] Retry on more errors (#3065)

Closes #3056, #2071
Related: #3034, #2969
Authored-by: coletdjnz
This commit is contained in:
coletdev 2022-03-19 11:10:20 +13:00 committed by GitHub
parent 510809f1aa
commit a2e77303e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 14 additions and 26 deletions

View File

@ -1,8 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import errno
import os import os
import socket import ssl
import time import time
import random import random
@ -10,6 +9,7 @@ from .common import FileDownloader
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_error, compat_urllib_error,
compat_http_client
) )
from ..utils import ( from ..utils import (
ContentTooShortError, ContentTooShortError,
@ -23,6 +23,8 @@ from ..utils import (
XAttrUnavailableError, XAttrUnavailableError,
) )
RESPONSE_READ_EXCEPTIONS = (TimeoutError, ConnectionError, ssl.SSLError, compat_http_client.HTTPException)
class HttpFD(FileDownloader): class HttpFD(FileDownloader):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
@ -124,15 +126,8 @@ class HttpFD(FileDownloader):
if has_range: if has_range:
set_range(request, range_start, range_end) set_range(request, range_start, range_end)
# Establish connection # Establish connection
try:
try: try:
ctx.data = self.ydl.urlopen(request) ctx.data = self.ydl.urlopen(request)
except (compat_urllib_error.URLError, ) as err:
# reason may not be available, e.g. for urllib2.HTTPError on python 2.6
reason = getattr(err, 'reason', None)
if isinstance(reason, socket.timeout):
raise RetryDownload(err)
raise err
# When trying to resume, Content-Range HTTP header of response has to be checked # When trying to resume, Content-Range HTTP header of response has to be checked
# to match the value of requested Range HTTP header. This is due to a webservers # to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range # that don't support resuming and serve a whole file with no Content-Range
@ -202,13 +197,14 @@ class HttpFD(FileDownloader):
# Unexpected HTTP error # Unexpected HTTP error
raise raise
raise RetryDownload(err) raise RetryDownload(err)
except socket.timeout as err: except compat_urllib_error.URLError as err:
raise RetryDownload(err) if isinstance(err.reason, ssl.CertificateError):
except socket.error as err:
if err.errno in (errno.ECONNRESET, errno.ETIMEDOUT):
# Connection reset is no problem, just retry
raise RetryDownload(err)
raise raise
raise RetryDownload(err)
# In urllib.request.AbstractHTTPHandler, the response is partially read on request.
# Any errors that occur during this will not be wrapped by URLError
except RESPONSE_READ_EXCEPTIONS as err:
raise RetryDownload(err)
def download(): def download():
nonlocal throttle_start nonlocal throttle_start
@ -254,16 +250,8 @@ class HttpFD(FileDownloader):
try: try:
# Download and write # Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
# socket.timeout is a subclass of socket.error but may not have except RESPONSE_READ_EXCEPTIONS as err:
# errno set retry(err)
except socket.timeout as e:
retry(e)
except socket.error as e:
# SSLError on python 2 (inherits socket.error) may have
# no errno set but this error message
if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out':
retry(e)
raise
byte_counter += len(data_block) byte_counter += len(data_block)