Implement a content type allow list for URL previews (#11936)
This implements an allow list for content types for which Synapse will attempt URL preview. If a URL resolves to a resource with a content type which isn't in the list, the download will terminate immediately. This makes sense given that Synapse would never successfully generate a URL preview for such files in the first place, and helps prevent issues with streaming media servers, such as #8302. Signed-off-by: Denis Kasak dkasak@termina.org.uk
This commit is contained in:
parent
06e5a76322
commit
337f38cac3
|
@ -0,0 +1 @@
|
||||||
|
Implement an allow list of content types for which we will attempt to preview a URL. This prevents Synapse from making useless longer-lived connections to streaming media servers.
|
|
@ -20,6 +20,7 @@ from typing import (
|
||||||
TYPE_CHECKING,
|
TYPE_CHECKING,
|
||||||
Any,
|
Any,
|
||||||
BinaryIO,
|
BinaryIO,
|
||||||
|
Callable,
|
||||||
Dict,
|
Dict,
|
||||||
Iterable,
|
Iterable,
|
||||||
List,
|
List,
|
||||||
|
@ -693,12 +694,18 @@ class SimpleHttpClient:
|
||||||
output_stream: BinaryIO,
|
output_stream: BinaryIO,
|
||||||
max_size: Optional[int] = None,
|
max_size: Optional[int] = None,
|
||||||
headers: Optional[RawHeaders] = None,
|
headers: Optional[RawHeaders] = None,
|
||||||
|
is_allowed_content_type: Optional[Callable[[str], bool]] = None,
|
||||||
) -> Tuple[int, Dict[bytes, List[bytes]], str, int]:
|
) -> Tuple[int, Dict[bytes, List[bytes]], str, int]:
|
||||||
"""GETs a file from a given URL
|
"""GETs a file from a given URL
|
||||||
Args:
|
Args:
|
||||||
url: The URL to GET
|
url: The URL to GET
|
||||||
output_stream: File to write the response body to.
|
output_stream: File to write the response body to.
|
||||||
headers: A map from header name to a list of values for that header
|
headers: A map from header name to a list of values for that header
|
||||||
|
is_allowed_content_type: A predicate to determine whether the
|
||||||
|
content type of the file we're downloading is allowed. If set and
|
||||||
|
it evaluates to False when called with the content type, the
|
||||||
|
request will be terminated before completing the download by
|
||||||
|
raising SynapseError.
|
||||||
Returns:
|
Returns:
|
||||||
A tuple of the file length, dict of the response
|
A tuple of the file length, dict of the response
|
||||||
headers, absolute URI of the response and HTTP response code.
|
headers, absolute URI of the response and HTTP response code.
|
||||||
|
@ -726,6 +733,17 @@ class SimpleHttpClient:
|
||||||
HTTPStatus.BAD_GATEWAY, "Got error %d" % (response.code,), Codes.UNKNOWN
|
HTTPStatus.BAD_GATEWAY, "Got error %d" % (response.code,), Codes.UNKNOWN
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if is_allowed_content_type and b"Content-Type" in resp_headers:
|
||||||
|
content_type = resp_headers[b"Content-Type"][0].decode("ascii")
|
||||||
|
if not is_allowed_content_type(content_type):
|
||||||
|
raise SynapseError(
|
||||||
|
HTTPStatus.BAD_GATEWAY,
|
||||||
|
(
|
||||||
|
"Requested file's content type not allowed for this operation: %s"
|
||||||
|
% content_type
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
# TODO: if our Content-Type is HTML or something, just read the first
|
# TODO: if our Content-Type is HTML or something, just read the first
|
||||||
# N bytes into RAM rather than saving it all to disk only to read it
|
# N bytes into RAM rather than saving it all to disk only to read it
|
||||||
# straight back in again
|
# straight back in again
|
||||||
|
|
|
@ -403,6 +403,7 @@ class PreviewUrlResource(DirectServeJsonResource):
|
||||||
output_stream=output_stream,
|
output_stream=output_stream,
|
||||||
max_size=self.max_spider_size,
|
max_size=self.max_spider_size,
|
||||||
headers={"Accept-Language": self.url_preview_accept_language},
|
headers={"Accept-Language": self.url_preview_accept_language},
|
||||||
|
is_allowed_content_type=_is_previewable,
|
||||||
)
|
)
|
||||||
except SynapseError:
|
except SynapseError:
|
||||||
# Pass SynapseErrors through directly, so that the servlet
|
# Pass SynapseErrors through directly, so that the servlet
|
||||||
|
@ -761,3 +762,10 @@ def _is_html(content_type: str) -> bool:
|
||||||
|
|
||||||
def _is_json(content_type: str) -> bool:
|
def _is_json(content_type: str) -> bool:
|
||||||
return content_type.lower().startswith("application/json")
|
return content_type.lower().startswith("application/json")
|
||||||
|
|
||||||
|
|
||||||
|
def _is_previewable(content_type: str) -> bool:
|
||||||
|
"""Returns True for content types for which we will perform URL preview and False
|
||||||
|
otherwise."""
|
||||||
|
|
||||||
|
return _is_html(content_type) or _is_media(content_type) or _is_json(content_type)
|
||||||
|
|
|
@ -243,6 +243,78 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
self.assertEqual(channel.code, 200)
|
self.assertEqual(channel.code, 200)
|
||||||
self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430")
|
self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430")
|
||||||
|
|
||||||
|
def test_video_rejected(self):
|
||||||
|
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
|
||||||
|
|
||||||
|
end_content = b"anything"
|
||||||
|
|
||||||
|
channel = self.make_request(
|
||||||
|
"GET",
|
||||||
|
"preview_url?url=http://matrix.org",
|
||||||
|
shorthand=False,
|
||||||
|
await_result=False,
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
client = self.reactor.tcpClients[0][2].buildProtocol(None)
|
||||||
|
server = AccumulatingProtocol()
|
||||||
|
server.makeConnection(FakeTransport(client, self.reactor))
|
||||||
|
client.makeConnection(FakeTransport(server, self.reactor))
|
||||||
|
client.dataReceived(
|
||||||
|
(
|
||||||
|
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
|
||||||
|
b"Content-Type: video/mp4\r\n\r\n"
|
||||||
|
)
|
||||||
|
% (len(end_content))
|
||||||
|
+ end_content
|
||||||
|
)
|
||||||
|
|
||||||
|
self.pump()
|
||||||
|
self.assertEqual(channel.code, 502)
|
||||||
|
self.assertEqual(
|
||||||
|
channel.json_body,
|
||||||
|
{
|
||||||
|
"errcode": "M_UNKNOWN",
|
||||||
|
"error": "Requested file's content type not allowed for this operation: video/mp4",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_audio_rejected(self):
|
||||||
|
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
|
||||||
|
|
||||||
|
end_content = b"anything"
|
||||||
|
|
||||||
|
channel = self.make_request(
|
||||||
|
"GET",
|
||||||
|
"preview_url?url=http://matrix.org",
|
||||||
|
shorthand=False,
|
||||||
|
await_result=False,
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
client = self.reactor.tcpClients[0][2].buildProtocol(None)
|
||||||
|
server = AccumulatingProtocol()
|
||||||
|
server.makeConnection(FakeTransport(client, self.reactor))
|
||||||
|
client.makeConnection(FakeTransport(server, self.reactor))
|
||||||
|
client.dataReceived(
|
||||||
|
(
|
||||||
|
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
|
||||||
|
b"Content-Type: audio/aac\r\n\r\n"
|
||||||
|
)
|
||||||
|
% (len(end_content))
|
||||||
|
+ end_content
|
||||||
|
)
|
||||||
|
|
||||||
|
self.pump()
|
||||||
|
self.assertEqual(channel.code, 502)
|
||||||
|
self.assertEqual(
|
||||||
|
channel.json_body,
|
||||||
|
{
|
||||||
|
"errcode": "M_UNKNOWN",
|
||||||
|
"error": "Requested file's content type not allowed for this operation: audio/aac",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
def test_non_ascii_preview_content_type(self):
|
def test_non_ascii_preview_content_type(self):
|
||||||
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
|
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue